- more project cleanups and small code changes, also added the psx emu interface again

- someone should check __xgetbv under linux (avx/fma detection)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4295 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-02-12 21:45:16 +00:00
parent a8afd254b6
commit 1e6f280021
57 changed files with 6186 additions and 689 deletions

View File

@ -30,6 +30,7 @@
#ifdef _MSC_VER
# include <intrin.h>
extern "C" unsigned __int64 __xgetbv(int);
#else
# include <intrin_x86.h>
#endif

View File

@ -836,6 +836,13 @@ static __inline__ __attribute__((always_inline)) void __cpuid(int CPUInfo[], con
__asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
}
static __inline__ __attribute__((always_inline)) unsigned long long __xgetbv(unsigned int index)
{
unsigned int eax, edx;
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
return ((unsigned long long)edx << 32) | eax;
}
static __inline__ __attribute__((always_inline)) unsigned long long __rdtsc(void)
{
unsigned long long retval;

View File

@ -87,6 +87,8 @@ public:
u32 hasSupplementalStreamingSIMD3Extensions :1;
u32 hasStreamingSIMD4Extensions :1;
u32 hasStreamingSIMD4Extensions2 :1;
u32 hasAVX :1;
u32 hasFMA :1;
// AMD-specific CPU Features
u32 hasMultimediaExtensionsExt :1;

View File

@ -259,6 +259,15 @@ void x86capabilities::Identify()
hasStreamingSIMD4Extensions = ( Flags2 >> 19 ) & 1; //sse4.1
hasStreamingSIMD4Extensions2 = ( Flags2 >> 20 ) & 1; //sse4.2
if((Flags2 >> 27) & 1) // OSXSAVE
{
if((__xgetbv(0) & 6) == 6) // XFEATURE_ENABLED_MASK[2:1] = 11b (XMM state and YMM state are enabled by OS).
{
hasAVX = ( Flags2 >> 28 ) & 1; //avx
hasFMA = ( Flags2 >> 12 ) & 1; //fma
}
}
// Ones only for AMDs:
hasMultimediaExtensionsExt = ( EFlags >> 22 ) & 1; //mmx2
hasAMD64BitArchitecture = ( EFlags >> 29 ) & 1; //64bit cpu

View File

@ -245,6 +245,8 @@ void SysLogMachineCaps()
if( x86caps.hasSupplementalStreamingSIMD3Extensions ) features[0].Add( L"SSSE3" );
if( x86caps.hasStreamingSIMD4Extensions ) features[0].Add( L"SSE4.1" );
if( x86caps.hasStreamingSIMD4Extensions2 ) features[0].Add( L"SSE4.2" );
if( x86caps.hasAVX ) features[0].Add( L"AVX" );
if( x86caps.hasFMA) features[0].Add( L"FMA" );
if( x86caps.hasMultimediaExtensionsExt ) features[1].Add( L"MMX2 " );
if( x86caps.has3DNOWInstructionExtensions ) features[1].Add( L"3DNOW " );

View File

@ -40,11 +40,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SPU2-X", "plugins\spu2-x\sr
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GSdx", "plugins\GSdx\GSdx_vs2008.vcproj", "{18E42F6F-3A62-41EE-B42F-79366C4F1E95}"
ProjectSection(ProjectDependencies) = postProject
{067D7863-393B-494F-B296-4A8853EB3D1D} = {067D7863-393B-494F-B296-4A8853EB3D1D}
{26511268-2902-4997-8421-ECD7055F9E28} = {26511268-2902-4997-8421-ECD7055F9E28}
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86} = {7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SoundTouch", "3rdparty\SoundTouch\SoundTouch.vcproj", "{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}"
EndProject
@ -241,19 +236,19 @@ Global
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release SSSE3|x64.ActiveCfg = Release|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.ActiveCfg = Debug SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.Build.0 = Debug SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.ActiveCfg = Debug|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.ActiveCfg = Debug SSE2|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Devel|Win32.ActiveCfg = Release SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Devel|Win32.Build.0 = Release SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Devel|x64.ActiveCfg = Debug|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Devel|x64.ActiveCfg = Release SSE2|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.Build.0 = Release SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.ActiveCfg = Release SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.ActiveCfg = Release SSE2|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|Win32.ActiveCfg = Release SSE4|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|Win32.Build.0 = Release SSE4|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|x64.ActiveCfg = Release SSE4|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|x64.ActiveCfg = Release SSE4|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSSE3|Win32.ActiveCfg = Release SSSE3|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSSE3|Win32.Build.0 = Release SSSE3|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSSE3|x64.ActiveCfg = Release SSSE3|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSSE3|x64.ActiveCfg = Release SSSE3|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug|Win32.ActiveCfg = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug|Win32.Build.0 = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug|x64.ActiveCfg = Debug|Win32
@ -349,31 +344,31 @@ Global
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug|x64.ActiveCfg = Debug|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Devel|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Devel|Win32.Build.0 = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Devel|x64.ActiveCfg = Debug SSE4|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Devel|x64.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|Win32.Build.0 = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|x64.ActiveCfg = Release SSE2|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|x64.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE4|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE4|Win32.Build.0 = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE4|x64.ActiveCfg = Release SSE4|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE4|x64.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSSE3|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSSE3|Win32.Build.0 = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSSE3|x64.ActiveCfg = Release SSSE3|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSSE3|x64.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug|Win32.ActiveCfg = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug|Win32.Build.0 = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug|x64.ActiveCfg = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Devel|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Devel|Win32.Build.0 = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Devel|x64.ActiveCfg = Debug SSE4|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Devel|x64.ActiveCfg = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE2|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE2|Win32.Build.0 = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE2|x64.ActiveCfg = Release SSE2|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE2|x64.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE4|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE4|Win32.Build.0 = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE4|x64.ActiveCfg = Release SSE4|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE4|x64.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSSE3|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSSE3|Win32.Build.0 = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSSE3|x64.ActiveCfg = Release SSSE3|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSSE3|x64.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Debug|Win32.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Debug|Win32.Build.0 = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Debug|x64.ActiveCfg = Release|Win32

View File

@ -267,8 +267,8 @@ Global
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release|x64.ActiveCfg = Release|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX|Win32.ActiveCfg = Debug AVX|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX|Win32.Build.0 = Debug AVX|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX|x64.ActiveCfg = Debug SSE4|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX|x64.Build.0 = Debug SSE4|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX|x64.ActiveCfg = Debug AVX|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX|x64.Build.0 = Debug AVX|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64
@ -283,22 +283,22 @@ Global
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSSE3|x64.Build.0 = Debug SSSE3|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.ActiveCfg = Debug SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.Build.0 = Debug SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.ActiveCfg = Debug|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.ActiveCfg = Debug|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Devel|Win32.ActiveCfg = Devel|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Devel|Win32.Build.0 = Devel|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Devel|x64.ActiveCfg = Debug|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Devel|x64.ActiveCfg = Devel|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX|Win32.ActiveCfg = Release AVX|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX|Win32.Build.0 = Release AVX|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX|x64.ActiveCfg = Release SSE4|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX|x64.ActiveCfg = Release AVX|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.Build.0 = Release SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.ActiveCfg = Release SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.ActiveCfg = Release SSE2|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|Win32.ActiveCfg = Release SSE4|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|Win32.Build.0 = Release SSE4|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|x64.ActiveCfg = Release SSE4|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|x64.ActiveCfg = Release SSE4|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSSE3|Win32.ActiveCfg = Release SSSE3|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSSE3|Win32.Build.0 = Release SSSE3|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSSE3|x64.ActiveCfg = Release SSSE3|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSSE3|x64.ActiveCfg = Release SSSE3|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.ActiveCfg = Release|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.Build.0 = Release|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.ActiveCfg = Release|x64
@ -498,59 +498,59 @@ Global
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Release|Win32.ActiveCfg = Release|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Release|x64.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug AVX|x64.ActiveCfg = Debug SSE4|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug AVX|x64.ActiveCfg = Debug|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE2|x64.ActiveCfg = Debug|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE4|x64.ActiveCfg = Debug SSE4|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE4|x64.ActiveCfg = Debug|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSSE3|Win32.ActiveCfg = Debug|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSSE3|x64.ActiveCfg = Debug SSSE3|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSSE3|x64.ActiveCfg = Debug|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug|Win32.ActiveCfg = Debug|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug|Win32.Build.0 = Debug|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug|x64.ActiveCfg = Debug|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug|x64.ActiveCfg = Debug|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Devel|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Devel|Win32.Build.0 = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Devel|x64.ActiveCfg = Debug SSE4|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Devel|x64.ActiveCfg = Release|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release AVX|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release AVX|Win32.Build.0 = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release AVX|x64.ActiveCfg = Release SSE4|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release AVX|x64.ActiveCfg = Release|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|Win32.Build.0 = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|x64.ActiveCfg = Release SSE2|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|x64.ActiveCfg = Release|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE4|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE4|Win32.Build.0 = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE4|x64.ActiveCfg = Release SSE4|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE4|x64.ActiveCfg = Release|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSSE3|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSSE3|Win32.Build.0 = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSSE3|x64.ActiveCfg = Release SSSE3|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSSE3|x64.ActiveCfg = Release|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release|x64.ActiveCfg = Release|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug AVX|x64.ActiveCfg = Debug SSE4|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug AVX|x64.ActiveCfg = Debug|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug SSE2|x64.ActiveCfg = Debug|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug SSE4|x64.ActiveCfg = Debug SSE4|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug SSE4|x64.ActiveCfg = Debug|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug SSSE3|Win32.ActiveCfg = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug SSSE3|x64.ActiveCfg = Debug SSSE3|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug SSSE3|x64.ActiveCfg = Debug|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug|Win32.ActiveCfg = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug|Win32.Build.0 = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug|x64.ActiveCfg = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Devel|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Devel|Win32.Build.0 = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Devel|x64.ActiveCfg = Debug SSE4|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Devel|x64.ActiveCfg = Release|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release AVX|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release AVX|Win32.Build.0 = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release AVX|x64.ActiveCfg = Release SSE4|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release AVX|x64.ActiveCfg = Release|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE2|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE2|Win32.Build.0 = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE2|x64.ActiveCfg = Release SSE2|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE2|x64.ActiveCfg = Release|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE4|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE4|Win32.Build.0 = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE4|x64.ActiveCfg = Release SSE4|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE4|x64.ActiveCfg = Release|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSSE3|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSSE3|Win32.Build.0 = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSSE3|x64.ActiveCfg = Release SSSE3|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSSE3|x64.ActiveCfg = Release|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release|x64.ActiveCfg = Release|x64
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Debug AVX|Win32.ActiveCfg = Release|Win32

284
plugins/GSdx/GPU.cpp Normal file
View File

@ -0,0 +1,284 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GSdx.h"
#include "GSUtil.h"
#include "GPURendererSW.h"
#include "GSDevice9.h"
#include "GSDevice11.h"
#include "GPUSettingsDlg.h"
#define PSE_LT_GPU 2
static HRESULT s_hr = E_FAIL;
static GPURenderer* s_gpu = NULL;
EXPORT_C_(uint32) PSEgetLibType()
{
return PSE_LT_GPU;
}
EXPORT_C_(char*) PSEgetLibName()
{
return GSUtil::GetLibName();
}
EXPORT_C_(uint32) PSEgetLibVersion()
{
static const uint32 version = 1;
static const uint32 revision = 1;
return version << 16 | revision << 8 | PLUGIN_VERSION;
}
EXPORT_C_(int32) GPUinit()
{
return 0;
}
EXPORT_C_(int32) GPUshutdown()
{
return 0;
}
EXPORT_C_(int32) GPUclose()
{
delete s_gpu;
s_gpu = NULL;
if(SUCCEEDED(s_hr))
{
::CoUninitialize();
s_hr = E_FAIL;
}
return 0;
}
EXPORT_C_(int32) GPUopen(HWND hWnd)
{
GPUclose();
s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
if(!GSUtil::CheckDirectX())
{
return -1;
}
if(!GSUtil::CheckSSE())
{
return -1;
}
int renderer = theApp.GetConfig("Renderer", 1);
switch(renderer)
{
default:
case 0: s_gpu = new GPURendererSW(new GSDevice9()); break;
case 1: s_gpu = new GPURendererSW(new GSDevice11()); break;
// TODO: case 3: s_gpu = new GPURendererNull(new GSDeviceNull()); break;
}
if(!s_gpu->Create(hWnd))
{
GPUclose();
return -1;
}
return 0;
}
EXPORT_C_(int32) GPUconfigure()
{
GPUSettingsDlg dlg;
if(IDOK == dlg.DoModal())
{
GPUshutdown();
GPUinit();
}
return 0;
}
EXPORT_C_(int32) GPUtest()
{
return 0;
}
EXPORT_C GPUabout()
{
// TODO
}
EXPORT_C GPUwriteDataMem(const uint8* mem, uint32 size)
{
s_gpu->WriteData(mem, size);
}
EXPORT_C GPUwriteData(uint32 data)
{
s_gpu->WriteData((uint8*)&data, 1);
}
EXPORT_C GPUreadDataMem(uint8* mem, uint32 size)
{
s_gpu->ReadData(mem, size);
}
EXPORT_C_(uint32) GPUreadData()
{
uint32 data = 0;
s_gpu->ReadData((uint8*)&data, 1);
return data;
}
EXPORT_C GPUwriteStatus(uint32 status)
{
s_gpu->WriteStatus(status);
}
EXPORT_C_(uint32) GPUreadStatus()
{
return s_gpu->ReadStatus();
}
EXPORT_C_(uint32) GPUdmaChain(const uint8* mem, uint32 addr)
{
uint32 last[3];
memset(last, 0xff, sizeof(last));
do
{
if(addr == last[1] || addr == last[2])
{
break;
}
(addr < last[0] ? last[1] : last[2]) = addr;
last[0] = addr;
uint8 size = mem[addr + 3];
if(size > 0)
{
s_gpu->WriteData(&mem[addr + 4], size);
}
addr = *(uint32*)&mem[addr] & 0xffffff;
}
while(addr != 0xffffff);
return 0;
}
EXPORT_C_(uint32) GPUgetMode()
{
// TODO
return 0;
}
EXPORT_C GPUsetMode(uint32 mode)
{
// TODO
}
EXPORT_C GPUupdateLace()
{
s_gpu->VSync();
}
EXPORT_C GPUmakeSnapshot()
{
s_gpu->MakeSnapshot("c:/"); // TODO
}
EXPORT_C GPUdisplayText(char* text)
{
// TODO
}
EXPORT_C GPUdisplayFlags(uint32 flags)
{
// TODO
}
EXPORT_C_(int32) GPUfreeze(uint32 type, GPUFreezeData* data)
{
if(!data || data->version != 1)
{
return 0;
}
if(type == 0)
{
s_gpu->Defrost(data);
return 1;
}
else if(type == 1)
{
s_gpu->Freeze(data);
return 1;
}
else if(type == 2)
{
int slot = *(int*)data + 1;
if(slot < 1 || slot > 9)
{
return 0;
}
// TODO
return 1;
}
return 0;
}
EXPORT_C GPUgetScreenPic(uint8* mem)
{
// TODO
}
EXPORT_C GPUshowScreenPic(uint8* mem)
{
// TODO
}
EXPORT_C GPUcursor(int player, int x, int y)
{
// TODO
}

276
plugins/GSdx/GPU.h Normal file
View File

@ -0,0 +1,276 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#pragma pack(push, 1)
#include "GS.h"
enum
{
GPU_POLYGON = 1,
GPU_LINE = 2,
GPU_SPRITE = 3,
};
REG32_(GPUReg, STATUS)
uint32 TX:4;
uint32 TY:1;
uint32 ABR:2;
uint32 TP:2;
uint32 DTD:1;
uint32 DFE:1;
uint32 MD:1;
uint32 ME:1;
uint32 _PAD0:3;
uint32 WIDTH1:1;
uint32 WIDTH0:2;
uint32 HEIGHT:1;
uint32 ISPAL:1;
uint32 ISRGB24:1;
uint32 ISINTER:1;
uint32 DEN:1;
uint32 _PAD1:2;
uint32 IDLE:1;
uint32 IMG:1;
uint32 COM:1;
uint32 DMA:2;
uint32 LCF:1;
/*
uint32 TX:4;
uint32 TY:1;
uint32 ABR:2;
uint32 TP:2;
uint32 DTD:1;
uint32 DFE:1;
uint32 PBW:1;
uint32 PBC:1;
uint32 _PAD0:3;
uint32 HRES2:1;
uint32 HRES1:2;
uint32 VRES:1;
uint32 ISPAL:1;
uint32 ISRGB24:1;
uint32 ISINTER:1;
uint32 ISSTOP:1;
uint32 _PAD1:1;
uint32 DMARDY:1;
uint32 IDIDLE:1;
uint32 DATARDY:1;
uint32 ISEMPTY:1;
uint32 TMODE:2;
uint32 ODE:1;
*/
REG_END
REG32_(GPUReg, PACKET)
uint32 _PAD:24;
uint32 OPTION:5;
uint32 TYPE:3;
REG_END
REG32_(GPUReg, PRIM)
uint32 VTX:24;
uint32 TGE:1;
uint32 ABE:1;
uint32 TME:1;
uint32 _PAD2:1;
uint32 IIP:1;
uint32 TYPE:3;
REG_END
REG32_(GPUReg, POLYGON)
uint32 _PAD:24;
uint32 TGE:1;
uint32 ABE:1;
uint32 TME:1;
uint32 VTX:1;
uint32 IIP:1;
uint32 TYPE:3;
REG_END
REG32_(GPUReg, LINE)
uint32 _PAD:24;
uint32 ZERO1:1;
uint32 ABE:1;
uint32 ZERO2:1;
uint32 PLL:1;
uint32 IIP:1;
uint32 TYPE:3;
REG_END
REG32_(GPUReg, SPRITE)
uint32 _PAD:24;
uint32 ZERO:1;
uint32 ABE:1;
uint32 TME:1;
uint32 SIZE:2;
uint32 TYPE:3;
REG_END
REG32_(GPUReg, RESET)
uint32 _PAD:32;
REG_END
REG32_(GPUReg, DEN)
uint32 DEN:1;
uint32 _PAD:31;
REG_END
REG32_(GPUReg, DMA)
uint32 DMA:2;
uint32 _PAD:30;
REG_END
REG32_(GPUReg, DAREA)
uint32 X:10;
uint32 Y:9;
uint32 _PAD:13;
REG_END
REG32_(GPUReg, DHRANGE)
uint32 X1:12;
uint32 X2:12;
uint32 _PAD:8;
REG_END
REG32_(GPUReg, DVRANGE)
uint32 Y1:10;
uint32 Y2:11;
uint32 _PAD:11;
REG_END
REG32_(GPUReg, DMODE)
uint32 WIDTH0:2;
uint32 HEIGHT:1;
uint32 ISPAL:1;
uint32 ISRGB24:1;
uint32 ISINTER:1;
uint32 WIDTH1:1;
uint32 REVERSE:1;
uint32 _PAD:24;
REG_END
REG32_(GPUReg, GPUINFO)
uint32 PARAM:24;
uint32 _PAD:8;
REG_END
REG32_(GPUReg, MODE)
uint32 TX:4;
uint32 TY:1;
uint32 ABR:2;
uint32 TP:2;
uint32 DTD:1;
uint32 DFE:1;
uint32 _PAD:21;
REG_END
REG32_(GPUReg, MASK)
uint32 MD:1;
uint32 ME:1;
uint32 _PAD:30;
REG_END
REG32_(GPUReg, DRAREA)
uint32 X:10;
uint32 Y:10;
uint32 _PAD:12;
REG_END
REG32_(GPUReg, DROFF)
int32 X:11;
int32 Y:11;
int32 _PAD:10;
REG_END
REG32_(GPUReg, RGB)
uint32 R:8;
uint32 G:8;
uint32 B:8;
uint32 _PAD:8;
REG_END
REG32_(GPUReg, XY)
int32 X:11;
int32 _PAD1:5;
int32 Y:11;
int32 _PAD2:5;
REG_END
REG32_(GPUReg, UV)
uint32 U:8;
uint32 V:8;
uint32 _PAD:16;
REG_END
REG32_(GPUReg, TWIN)
uint32 TWW:5;
uint32 TWH:5;
uint32 TWX:5;
uint32 TWY:5;
uint32 _PAD:12;
REG_END
REG32_(GPUReg, CLUT)
uint32 _PAD1:16;
uint32 X:6;
uint32 Y:9;
uint32 _PAD2:1;
REG_END
REG32_SET(GPUReg)
GPURegSTATUS STATUS;
GPURegPACKET PACKET;
GPURegPRIM PRIM;
GPURegPOLYGON POLYGON;
GPURegLINE LINE;
GPURegSPRITE SPRITE;
GPURegRESET RESET;
GPURegDEN DEN;
GPURegDMA DMA;
GPURegDAREA DAREA;
GPURegDHRANGE DHRANGE;
GPURegDVRANGE DVRANGE;
GPURegDMODE DMODE;
GPURegGPUINFO GPUINFO;
GPURegMODE MODE;
GPURegMASK MASK;
GPURegDRAREA DRAREA;
GPURegDROFF DROFF;
GPURegRGB RGB;
GPURegXY XY;
GPURegUV UV;
GPURegTWIN TWIN;
GPURegCLUT CLUT;
REG_SET_END
struct GPUFreezeData
{
uint32 version; // == 1
uint32 status;
uint32 control[256];
uint16 vram[1024 * 1024];
};
#pragma pack(pop)

View File

@ -0,0 +1,119 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "StdAfx.h"
#include "GPUDrawScanline.h"
GPUDrawScanline::GPUDrawScanline(GPUState* state, int id)
: m_state(state)
, m_id(id)
, m_sp(m_env)
, m_ds(m_env)
{
}
GPUDrawScanline::~GPUDrawScanline()
{
}
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
{
GPUDrawingEnvironment& env = m_state->m_env;
const GPUScanlineParam* p = (const GPUScanlineParam*)data->param;
m_env.sel = p->sel;
m_env.vm = m_state->m_mem.GetPixelAddress(0, 0);
if(m_env.sel.tme)
{
m_env.tex = p->tex;
m_env.clut = p->clut;
if(m_env.sel.twin)
{
uint32 u, v;
u = ~(env.TWIN.TWW << 3) & 0xff;
v = ~(env.TWIN.TWH << 3) & 0xff;
m_env.twin[0].u = GSVector4i((u << 16) | u);
m_env.twin[0].v = GSVector4i((v << 16) | v);
u = env.TWIN.TWX << 3;
v = env.TWIN.TWY << 3;
m_env.twin[1].u = GSVector4i((u << 16) | u) & ~m_env.twin[0].u;
m_env.twin[1].v = GSVector4i((v << 16) | v) & ~m_env.twin[0].v;
}
}
//
f->ssl = m_ds[m_env.sel];
f->sr = NULL; // TODO
// doesn't need all bits => less functions generated
GPUScanlineSelector sel;
sel.key = 0;
sel.iip = m_env.sel.iip;
sel.tfx = m_env.sel.tfx;
sel.twin = m_env.sel.twin;
sel.sprite = m_env.sel.sprite;
f->ssp = m_sp[sel];
}
void GPUDrawScanline::EndDraw(const GSRasterizerStats& stats)
{
m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame());
}
//
GPUDrawScanline::GPUSetupPrimMap::GPUSetupPrimMap(GPUScanlineEnvironment& env)
: GSCodeGeneratorFunctionMap("GPUSetupPrim")
, m_env(env)
{
}
GPUSetupPrimCodeGenerator* GPUDrawScanline::GPUSetupPrimMap::Create(uint32 key, void* ptr, size_t maxsize)
{
return new GPUSetupPrimCodeGenerator(m_env, ptr, maxsize);
}
//
GPUDrawScanline::GPUDrawScanlineMap::GPUDrawScanlineMap(GPUScanlineEnvironment& env)
: GSCodeGeneratorFunctionMap("GPUDrawScanline")
, m_env(env)
{
}
GPUDrawScanlineCodeGenerator* GPUDrawScanline::GPUDrawScanlineMap::Create(uint32 key, void* ptr, size_t maxsize)
{
return new GPUDrawScanlineCodeGenerator(m_env, ptr, maxsize);
}

View File

@ -0,0 +1,69 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GPUState.h"
#include "GSRasterizer.h"
#include "GPUScanlineEnvironment.h"
#include "GPUSetupPrimCodeGenerator.h"
#include "GPUDrawScanlineCodeGenerator.h"
class GPUDrawScanline : public IDrawScanline
{
GPUScanlineEnvironment m_env;
//
class GPUSetupPrimMap : public GSCodeGeneratorFunctionMap<GPUSetupPrimCodeGenerator, uint32, SetupPrimStaticPtr>
{
GPUScanlineEnvironment& m_env;
public:
GPUSetupPrimMap(GPUScanlineEnvironment& env);
GPUSetupPrimCodeGenerator* Create(uint32 key, void* ptr, size_t maxsize);
} m_sp;
//
class GPUDrawScanlineMap : public GSCodeGeneratorFunctionMap<GPUDrawScanlineCodeGenerator, uint32, DrawScanlineStaticPtr>
{
GPUScanlineEnvironment& m_env;
public:
GPUDrawScanlineMap(GPUScanlineEnvironment& env);
GPUDrawScanlineCodeGenerator* Create(uint32 key, void* ptr, size_t maxsize);
} m_ds;
protected:
GPUState* m_state;
int m_id;
public:
GPUDrawScanline(GPUState* state, int id);
virtual ~GPUDrawScanline();
// IDrawScanline
void BeginDraw(const GSRasterizerData* data, Functions* f);
void EndDraw(const GSRasterizerStats& stats);
void PrintStats() {m_ds.PrintStats();}
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,62 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GPUScanlineEnvironment.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
using namespace Xbyak;
class GPUDrawScanlineCodeGenerator : public CodeGenerator
{
void operator = (const GPUDrawScanlineCodeGenerator&);
static const GSVector4i m_test[8];
static const uint16 m_dither[4][16];
util::Cpu m_cpu;
GPUScanlineEnvironment& m_env;
void Generate();
void Init(int params);
void Step();
void TestMask();
void SampleTexture();
void ColorTFX();
void AlphaBlend();
void Dither();
void WriteFrame();
void ReadTexel(const Xmm& dst, const Xmm& addr);
template<int shift> void modulate16(const Xmm& a, const Operand& f);
template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Operand& f);
void alltrue();
void blend8(const Xmm& a, const Xmm& b);
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
public:
GPUDrawScanlineCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize);
};

View File

@ -0,0 +1,79 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GPU.h"
#pragma pack(push, 1)
__declspec(align(16)) class GPUDrawingEnvironment
{
public:
GPURegSTATUS STATUS;
GPURegPRIM PRIM;
GPURegDAREA DAREA;
GPURegDHRANGE DHRANGE;
GPURegDVRANGE DVRANGE;
GPURegDRAREA DRAREATL;
GPURegDRAREA DRAREABR;
GPURegDROFF DROFF;
GPURegTWIN TWIN;
GPURegCLUT CLUT;
GPUDrawingEnvironment()
{
Reset();
}
void Reset()
{
memset(this, 0, sizeof(*this));
STATUS.IDLE = 1;
STATUS.COM = 1;
STATUS.WIDTH0 = 1;
DVRANGE.Y1 = 16;
DVRANGE.Y2 = 256;
}
GSVector4i GetDisplayRect()
{
static int s_width[] = {256, 320, 512, 640, 368, 384, 512, 640};
static int s_height[] = {240, 480};
GSVector4i r;
r.left = DAREA.X & ~7; // FIXME
r.top = DAREA.Y;
r.right = r.left + s_width[(STATUS.WIDTH1 << 2) | STATUS.WIDTH0];
r.bottom = r.top + (DVRANGE.Y2 - DVRANGE.Y1) * s_height[STATUS.HEIGHT] / 240;
return r.rintersect(GSVector4i(0, 0, 1024, 512));
}
int GetFPS()
{
return STATUS.ISPAL ? 50 : 60;
}
};
#pragma pack(pop)

View File

@ -0,0 +1,667 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "StdAfx.h"
#include "GPULocalMemory.h"
#include "GSdx.h"
const GSVector4i GPULocalMemory::m_xxxa(0x00008000);
const GSVector4i GPULocalMemory::m_xxbx(0x00007c00);
const GSVector4i GPULocalMemory::m_xgxx(0x000003e0);
const GSVector4i GPULocalMemory::m_rxxx(0x0000001f);
GPULocalMemory::GPULocalMemory()
{
m_scale.x = min(max(theApp.GetConfig("scale_x", 0), 0), 2);
m_scale.y = min(max(theApp.GetConfig("scale_y", 0), 0), 2);
//
int size = (1 << (12 + 11)) * sizeof(uint16);
m_vm = (uint16*)VirtualAlloc(NULL, size * 2, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
memset(m_vm, 0, size);
//
m_clut.buff = m_vm + size;
m_clut.dirty = true;
//
size = 256 * 256 * (1 + 1 + 4) * 32;
m_texture.buff[0] = (uint8*)VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
m_texture.buff[1] = m_texture.buff[0] + 256 * 256 * 32;
m_texture.buff[2] = m_texture.buff[1] + 256 * 256 * 32;
memset(m_texture.buff[0], 0, size);
memset(m_texture.valid, 0, sizeof(m_texture.valid));
for(int y = 0, offset = 0; y < 2; y++)
{
for(int x = 0; x < 16; x++, offset += 256 * 256)
{
m_texture.page[0][y][x] = &((uint8*)m_texture.buff[0])[offset];
m_texture.page[1][y][x] = &((uint8*)m_texture.buff[1])[offset];
}
}
for(int y = 0, offset = 0; y < 2; y++)
{
for(int x = 0; x < 16; x++, offset += 256 * 256)
{
m_texture.page[2][y][x] = &((uint32*)m_texture.buff[2])[offset];
}
}
}
GPULocalMemory::~GPULocalMemory()
{
VirtualFree(m_vm, 0, MEM_RELEASE);
VirtualFree(m_texture.buff[0], 0, MEM_RELEASE);
}
const uint16* GPULocalMemory::GetCLUT(int tp, int cx, int cy)
{
if(m_clut.dirty || m_clut.tp != tp || m_clut.cx != cx || m_clut.cy != cy)
{
uint16* src = GetPixelAddressScaled(cx << 4, cy);
uint16* dst = m_clut.buff;
if(m_scale.x == 0)
{
memcpy(dst, src, (tp == 0 ? 16 : 256) * 2);
}
else if(m_scale.x == 1)
{
if(tp == 0)
{
for(int i = 0; i < 16; i++)
{
dst[i] = src[i * 2];
}
}
else if(tp == 1)
{
for(int i = 0; i < 256; i++)
{
dst[i] = src[i * 2];
}
}
}
else if(m_scale.x == 2)
{
if(tp == 0)
{
for(int i = 0; i < 16; i++)
{
dst[i] = src[i * 4];
}
}
else if(tp == 1)
{
for(int i = 0; i < 256; i++)
{
dst[i] = src[i * 4];
}
}
}
else
{
ASSERT(0);
}
m_clut.tp = tp;
m_clut.cx = cx;
m_clut.cy = cy;
m_clut.dirty = false;
}
return m_clut.buff;
}
const void* GPULocalMemory::GetTexture(int tp, int tx, int ty)
{
if(tp == 3)
{
ASSERT(0);
return NULL;
}
void* buff = m_texture.page[tp][ty][tx];
uint32 flag = 1 << tx;
if((m_texture.valid[tp][ty] & flag) == 0)
{
int bpp = 0;
switch(tp)
{
case 0:
ReadPage4(tx, ty, (uint8*)buff);
bpp = 4;
break;
case 1:
ReadPage8(tx, ty, (uint8*)buff);
bpp = 8;
break;
case 2:
case 3:
ReadPage16(tx, ty, (uint16*)buff);
bpp = 16;
default:
// FIXME: __assume(0); // vc9 generates bogus code in release mode
break;
}
// TODO: m_state->m_perfmon.Put(GSPerfMon::Unswizzle, 256 * 256 * bpp >> 3);
m_texture.valid[tp][ty] |= flag;
}
return buff;
}
void GPULocalMemory::Invalidate(const GSVector4i& r)
{
if(!m_clut.dirty)
{
if(r.top <= m_clut.cy && m_clut.cy < r.bottom)
{
int left = m_clut.cx << 4;
int right = left + (m_clut.tp == 0 ? 16 : 256);
if(r.left < right && r.right > left)
{
m_clut.dirty = true;
}
}
}
for(int y = 0, ye = min(r.bottom, 512), j = 0; y < ye; y += 256, j++)
{
if(r.top >= y + 256) continue;
for(int x = 0, xe = min(r.right, 1024), i = 0; x < xe; x += 64, i++)
{
uint32 flag = 1 << i;
if(r.left >= x + 256) continue;
m_texture.valid[2][j] &= ~flag;
if(r.left >= x + 128) continue;
m_texture.valid[1][j] &= ~flag;
if(r.left >= x + 64) continue;
m_texture.valid[0][j] &= ~flag;
}
}
}
void GPULocalMemory::FillRect(const GSVector4i& r, uint16 c)
{
Invalidate(r);
uint16* RESTRICT dst = GetPixelAddressScaled(r.left, r.top);
int w = r.width() << m_scale.x;
int h = r.height() << m_scale.y;
int pitch = GetWidth();
for(int j = 0; j < h; j++, dst += pitch)
{
for(int i = 0; i < w; i++)
{
dst[i] = c;
}
}
}
void GPULocalMemory::WriteRect(const GSVector4i& r, const uint16* RESTRICT src)
{
Invalidate(r);
uint16* RESTRICT dst = GetPixelAddressScaled(r.left, r.top);
int w = r.width();
int h = r.height();
int pitch = GetWidth();
if(m_scale.x == 0)
{
for(int j = 0; j < h; j++, src += w)
{
for(int k = 1 << m_scale.y; k >= 1; k--, dst += pitch)
{
memcpy(dst, src, w * 2);
}
}
}
else if(m_scale.x == 1)
{
for(int j = 0; j < h; j++, src += w)
{
for(int k = 1 << m_scale.y; k >= 1; k--, dst += pitch)
{
for(int i = 0; i < w; i++)
{
dst[i * 2 + 0] = src[i];
dst[i * 2 + 1] = src[i];
}
}
}
}
else if(m_scale.x == 2)
{
for(int j = 0; j < h; j++, src += w)
{
for(int k = 1 << m_scale.y; k >= 1; k--, dst += pitch)
{
for(int i = 0; i < w; i++)
{
dst[i * 4 + 0] = src[i];
dst[i * 4 + 1] = src[i];
dst[i * 4 + 2] = src[i];
dst[i * 4 + 3] = src[i];
}
}
}
}
else
{
ASSERT(0);
}
}
void GPULocalMemory::ReadRect(const GSVector4i& r, uint16* RESTRICT dst)
{
uint16* RESTRICT src = GetPixelAddressScaled(r.left, r.top);
int w = r.width();
int h = r.height();
int pitch = GetWidth() << m_scale.y;
if(m_scale.x == 0)
{
for(int j = 0; j < h; j++, src += pitch, dst += w)
{
memcpy(dst, src, w * 2);
}
}
else if(m_scale.x == 1)
{
for(int j = 0; j < h; j++, src += pitch, dst += w)
{
for(int i = 0; i < w; i++)
{
dst[i] = src[i * 2];
}
}
}
else if(m_scale.x == 2)
{
for(int j = 0; j < h; j++, src += pitch, dst += w)
{
for(int i = 0; i < w; i++)
{
dst[i] = src[i * 4];
}
}
}
else
{
ASSERT(0);
}
}
void GPULocalMemory::MoveRect(int sx, int sy, int dx, int dy, int w, int h)
{
Invalidate(GSVector4i(dx, dy, dx + w, dy + h));
uint16* s = GetPixelAddressScaled(sx, sy);
uint16* d = GetPixelAddressScaled(dx, dy);
w <<= m_scale.x;
h <<= m_scale.y;
int pitch = GetWidth();
for(int i = 0; i < h; i++, s += pitch, d += pitch)
{
memcpy(d, s, w * sizeof(uint16));
}
}
void GPULocalMemory::ReadPage4(int tx, int ty, uint8* RESTRICT dst)
{
uint16* src = GetPixelAddressScaled(tx << 6, ty << 8);
int pitch = GetWidth() << m_scale.y;
if(m_scale.x == 0)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 64; i++)
{
dst[i * 4 + 0] = (src[i] >> 0) & 0xf;
dst[i * 4 + 1] = (src[i] >> 4) & 0xf;
dst[i * 4 + 2] = (src[i] >> 8) & 0xf;
dst[i * 4 + 3] = (src[i] >> 12) & 0xf;
}
}
}
else if(m_scale.x == 1)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 64; i++)
{
dst[i * 4 + 0] = (src[i * 2] >> 0) & 0xf;
dst[i * 4 + 1] = (src[i * 2] >> 4) & 0xf;
dst[i * 4 + 2] = (src[i * 2] >> 8) & 0xf;
dst[i * 4 + 3] = (src[i * 2] >> 12) & 0xf;
}
}
}
else if(m_scale.x == 2)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 64; i++)
{
dst[i * 4 + 0] = (src[i * 4] >> 0) & 0xf;
dst[i * 4 + 1] = (src[i * 4] >> 4) & 0xf;
dst[i * 4 + 2] = (src[i * 4] >> 8) & 0xf;
dst[i * 4 + 3] = (src[i * 4] >> 12) & 0xf;
}
}
}
else
{
ASSERT(0);
}
}
void GPULocalMemory::ReadPage8(int tx, int ty, uint8* RESTRICT dst)
{
uint16* src = GetPixelAddressScaled(tx << 6, ty << 8);
int pitch = GetWidth() << m_scale.y;
if(m_scale.x == 0)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
memcpy(dst, src, 256);
}
}
else if(m_scale.x == 1)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 128; i++)
{
((uint16*)dst)[i] = src[i * 2];
}
}
}
else if(m_scale.x == 2)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 128; i++)
{
((uint16*)dst)[i] = src[i * 4];
}
}
}
else
{
ASSERT(0);
}
}
void GPULocalMemory::ReadPage16(int tx, int ty, uint16* RESTRICT dst)
{
uint16* src = GetPixelAddressScaled(tx << 6, ty << 8);
int pitch = GetWidth() << m_scale.y;
if(m_scale.x == 0)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
memcpy(dst, src, 512);
}
}
else if(m_scale.x == 1)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 256; i++)
{
dst[i] = src[i * 2];
}
}
}
else if(m_scale.x == 2)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 256; i++)
{
dst[i] = src[i * 4];
}
}
}
else
{
ASSERT(0);
}
}
void GPULocalMemory::ReadFrame32(const GSVector4i& r, uint32* RESTRICT dst, bool rgb24)
{
uint16* src = GetPixelAddress(r.left, r.top);
int pitch = GetWidth();
if(rgb24)
{
for(int i = r.top; i < r.bottom; i++, src += pitch, dst += pitch)
{
Expand24(src, dst, r.width());
}
}
else
{
for(int i = r.top; i < r.bottom; i++, src += pitch, dst += pitch)
{
Expand16(src, dst, r.width());
}
}
}
void GPULocalMemory::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int pixels)
{
GSVector4i rm = m_rxxx;
GSVector4i gm = m_xgxx;
GSVector4i bm = m_xxbx;
GSVector4i am = m_xxxa;
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
for(int i = 0, j = pixels >> 3; i < j; i++)
{
GSVector4i c = s[i];
GSVector4i l = c.upl16();
GSVector4i h = c.uph16();
d[i * 2 + 0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | ((l & am) << 16);
d[i * 2 + 1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | ((h & am) << 16);
}
}
void GPULocalMemory::Expand24(const uint16* RESTRICT src, uint32* RESTRICT dst, int pixels)
{
uint8* s = (uint8*)src;
if(m_scale.x == 0)
{
for(int i = 0; i < pixels; i += 2, s += 6)
{
dst[i + 0] = (s[2] << 16) | (s[1] << 8) | s[0];
dst[i + 1] = (s[5] << 16) | (s[4] << 8) | s[3];
}
}
else if(m_scale.x == 1)
{
for(int i = 0; i < pixels; i += 4, s += 12)
{
dst[i + 0] = dst[i + 1] = (s[4] << 16) | (s[1] << 8) | s[0];
dst[i + 2] = dst[i + 3] = (s[9] << 16) | (s[8] << 8) | s[5];
}
}
else if(m_scale.x == 2)
{
for(int i = 0; i < pixels; i += 8, s += 24)
{
dst[i + 0] = dst[i + 1] = dst[i + 2] = dst[i + 3] = (s[8] << 16) | (s[1] << 8) | s[0];
dst[i + 4] = dst[i + 5] = dst[i + 6] = dst[i + 7] = (s[17] << 16) | (s[16] << 8) | s[9];
}
}
else
{
ASSERT(0);
}
}
void GPULocalMemory::SaveBMP(const string& path, const GSVector4i& r2, int tp, int cx, int cy)
{
GSVector4i r;
r.left = r2.left << m_scale.x;
r.top = r2.top << m_scale.y;
r.right = r2.right << m_scale.x;
r.bottom = r2.bottom << m_scale.y;
r.left &= ~1;
r.right &= ~1;
if(FILE* fp = fopen(path.c_str(), "wb"))
{
BITMAPINFOHEADER bih;
memset(&bih, 0, sizeof(bih));
bih.biSize = sizeof(bih);
bih.biWidth = r.width();
bih.biHeight = r.height();
bih.biPlanes = 1;
bih.biBitCount = 32;
bih.biCompression = BI_RGB;
bih.biSizeImage = bih.biWidth * bih.biHeight * 4;
BITMAPFILEHEADER bfh;
memset(&bfh, 0, sizeof(bfh));
bfh.bfType = 'MB';
bfh.bfOffBits = sizeof(bfh) + sizeof(bih);
bfh.bfSize = bfh.bfOffBits + bih.biSizeImage;
bfh.bfReserved1 = bfh.bfReserved2 = 0;
fwrite(&bfh, 1, sizeof(bfh), fp);
fwrite(&bih, 1, sizeof(bih), fp);
int pitch = GetWidth();
uint16* buff = (uint16*)_aligned_malloc(pitch * sizeof(WORD), 16);
uint32* buff32 = (uint32*)_aligned_malloc(pitch * sizeof(uint32), 16);
uint16* src = GetPixelAddress(r.left, r.bottom - 1);
const uint16* clut = GetCLUT(tp, cx, cy);
for(int j = r.bottom - 1; j >= r.top; j--, src -= pitch)
{
switch(tp)
{
case 0: // 4 bpp
for(int i = 0, k = r.width() / 2; i < k; i++)
{
buff[i * 2 + 0] = clut[((uint8*)src)[i] & 0xf];
buff[i * 2 + 1] = clut[((uint8*)src)[i] >> 4];
}
break;
case 1: // 8 bpp
for(int i = 0, k = r.width(); i < k; i++)
{
buff[i] = clut[((uint8*)src)[i]];
}
break;
case 2: // 16 bpp;
for(int i = 0, k = r.width(); i < k; i++)
{
buff[i] = src[i];
}
break;
case 3: // 24 bpp
// TODO
break;
}
Expand16(buff, buff32, r.width());
for(int i = 0, k = r.width(); i < k; i++)
{
buff32[i] = (buff32[i] & 0xff00ff00) | ((buff32[i] & 0x00ff0000) >> 16) | ((buff32[i] & 0x000000ff) << 16);
}
fwrite(buff32, 1, r.width() * 4, fp);
}
_aligned_free(buff);
_aligned_free(buff32);
fclose(fp);
}
}

View File

@ -0,0 +1,86 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GPU.h"
#include "GSVector.h"
class GPULocalMemory
{
static const GSVector4i m_xxxa;
static const GSVector4i m_xxbx;
static const GSVector4i m_xgxx;
static const GSVector4i m_rxxx;
uint16* m_vm;
struct
{
uint16* buff;
int tp, cx, cy;
bool dirty;
} m_clut;
struct
{
uint8* buff[3];
void* page[3][2][16];
uint16 valid[3][2];
} m_texture;
GSVector2i m_scale;
public:
GPULocalMemory();
virtual ~GPULocalMemory();
GSVector2i GetScale() {return m_scale;}
int GetWidth() {return 1 << (10 + m_scale.x);}
int GetHeight() {return 1 << (9 + m_scale.y);}
uint16* GetPixelAddress(int x, int y) const {return &m_vm[(y << (10 + m_scale.x)) + x];}
uint16* GetPixelAddressScaled(int x, int y) const {return &m_vm[((y << m_scale.y) << (10 + m_scale.x)) + (x << m_scale.x)];}
const uint16* GetCLUT(int tp, int cx, int cy);
const void* GetTexture(int tp, int tx, int ty);
void Invalidate(const GSVector4i& r);
void FillRect(const GSVector4i& r, uint16 c);
void WriteRect(const GSVector4i& r, const uint16* RESTRICT src);
void ReadRect(const GSVector4i& r, uint16* RESTRICT dst);
void MoveRect(int sx, int sy, int dx, int dy, int w, int h);
void ReadPage4(int tx, int ty, uint8* RESTRICT dst);
void ReadPage8(int tx, int ty, uint8* RESTRICT dst);
void ReadPage16(int tx, int ty, uint16* RESTRICT dst);
void ReadFrame32(const GSVector4i& r, uint32* RESTRICT dst, bool rgb24);
void Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int pixels);
void Expand24(const uint16* RESTRICT src, uint32* RESTRICT dst, int pixels);
void SaveBMP(const string& path, const GSVector4i& r, int tp, int cx, int cy);
};
#pragma warning(default: 4244)

View File

@ -0,0 +1,223 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "StdAfx.h"
#include "GPURenderer.h"
#include "GSdx.h"
map<HWND, GPURenderer*> GPURenderer::m_wnd2gpu;
GPURenderer::GPURenderer(GSDevice* dev)
: m_dev(dev)
, m_hWnd(NULL)
, m_wndproc(NULL)
{
m_filter = theApp.GetConfig("filter", 0);
m_dither = theApp.GetConfig("dithering", 1);
m_aspectratio = theApp.GetConfig("AspectRatio", 1);
m_vsync = !!theApp.GetConfig("vsync", 0);
m_scale = m_mem.GetScale();
}
GPURenderer::~GPURenderer()
{
if(m_wndproc)
{
SetWindowLongPtr(m_hWnd, GWLP_WNDPROC, (LONG_PTR)m_wndproc);
m_wnd2gpu.erase(m_hWnd);
}
}
bool GPURenderer::Create(HWND hWnd)
{
// TODO: move subclassing inside GSWnd::Attach
m_hWnd = hWnd;
m_wndproc = (WNDPROC)GetWindowLongPtr(hWnd, GWLP_WNDPROC);
SetWindowLongPtr(hWnd, GWLP_WNDPROC, (LONG_PTR)WndProc);
if(!m_wnd.Attach(m_hWnd))
{
return false;
}
m_wnd2gpu[hWnd] = this;
DWORD style = GetWindowLong(hWnd, GWL_STYLE);
style |= WS_OVERLAPPEDWINDOW;
SetWindowLong(hWnd, GWL_STYLE, style);
m_wnd.Show();
if(!m_dev->Create(&m_wnd))
{
return false;
}
m_dev->SetVsync(m_vsync);
Reset();
return true;
}
bool GPURenderer::Merge()
{
GSTexture* st[2] = {GetOutput(), NULL};
if(!st[0])
{
return false;
}
GSVector2i s = st[0]->GetSize();
GSVector4 sr[2];
GSVector4 dr[2];
sr[0] = GSVector4(0, 0, 1, 1);
dr[0] = GSVector4(0, 0, s.x, s.y);
m_dev->Merge(st, sr, dr, s, 1, 1, GSVector4(0, 0, 0, 1));
return true;
}
void GPURenderer::VSync()
{
GSPerfMonAutoTimer pmat(m_perfmon);
m_perfmon.Put(GSPerfMon::Frame);
// m_env.STATUS.LCF = ~m_env.STATUS.LCF; // ?
if(!IsWindow(m_hWnd)) return;
Flush();
if(!m_dev->IsLost(true))
{
if(!Merge())
{
return;
}
}
else
{
ResetDevice();
}
// osd
if((m_perfmon.GetFrame() & 0x1f) == 0)
{
m_perfmon.Update();
double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame);
GSVector4i r = m_env.GetDisplayRect();
int w = r.width() << m_scale.x;
int h = r.height() << m_scale.y;
string s = format(
"%I64d | %d x %d | %.2f fps (%d%%) | %d/%d | %d%% CPU | %.2f | %.2f",
m_perfmon.GetFrame(), w, h, fps, (int)(100.0 * fps / m_env.GetFPS()),
(int)m_perfmon.Get(GSPerfMon::Prim),
(int)m_perfmon.Get(GSPerfMon::Draw),
m_perfmon.CPU(),
m_perfmon.Get(GSPerfMon::Swizzle) / 1024,
m_perfmon.Get(GSPerfMon::Unswizzle) / 1024
);
double fillrate = m_perfmon.Get(GSPerfMon::Fillrate);
if(fillrate > 0)
{
s = format("%s | %.2f mpps", s.c_str(), fps * fillrate / (1024 * 1024));
}
SetWindowText(m_hWnd, s.c_str());
}
GSVector4i r;
GetClientRect(m_hWnd, r);
m_dev->Present(r.fit(m_aspectratio), 0);
}
bool GPURenderer::MakeSnapshot(const string& path)
{
time_t t = time(NULL);
char buff[16];
if(!strftime(buff, sizeof(buff), "%Y%m%d%H%M%S", localtime(&t)))
{
return false;
}
if(GSTexture* t = m_dev->GetCurrent())
{
return t->Save(format("%s_%s.bmp", path.c_str(), buff));
}
return false;
}
LRESULT CALLBACK GPURenderer::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
map<HWND, GPURenderer*>::iterator i = m_wnd2gpu.find(hWnd);
if(i != m_wnd2gpu.end())
{
return i->second->OnMessage(message, wParam, lParam);
}
ASSERT(0);
return 0;
}
LRESULT GPURenderer::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
{
if(message == WM_KEYUP)
{
switch(wParam)
{
case VK_DELETE:
m_filter = (m_filter + 1) % 3;
return 0;
case VK_END:
m_dither = m_dither ? 0 : 1;
return 0;
case VK_NEXT:
m_aspectratio = (m_aspectratio + 1) % 3;
return 0;
}
}
return CallWindowProc(m_wndproc, m_hWnd, message, wParam, lParam);
}

178
plugins/GSdx/GPURenderer.h Normal file
View File

@ -0,0 +1,178 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GPUState.h"
#include "GSVertexList.h"
#include "GSDevice.h"
class GPURenderer : public GPUState
{
bool Merge();
protected:
GSDevice* m_dev;
int m_filter;
int m_dither;
int m_aspectratio;
bool m_vsync;
GSVector2i m_scale;
virtual void ResetDevice() {}
virtual GSTexture* GetOutput() = 0;
HWND m_hWnd;
WNDPROC m_wndproc;
static map<HWND, GPURenderer*> m_wnd2gpu;
GSWnd m_wnd;
static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam);
LRESULT OnMessage(UINT message, WPARAM wParam, LPARAM lParam);
public:
GPURenderer(GSDevice* dev);
virtual ~GPURenderer();
virtual bool Create(HWND hWnd);
virtual void VSync();
virtual bool MakeSnapshot(const string& path);
};
template<class Vertex>
class GPURendererT : public GPURenderer
{
protected:
Vertex* m_vertices;
int m_count;
int m_maxcount;
GSVertexList<Vertex> m_vl;
void Reset()
{
m_count = 0;
m_vl.RemoveAll();
__super::Reset();
}
void ResetPrim()
{
m_vl.RemoveAll();
}
void FlushPrim()
{
if(m_count > 0)
{
/*
Dump("db");
if(m_env.PRIM.TME)
{
GSVector4i r;
r.left = m_env.STATUS.TX << 6;
r.top = m_env.STATUS.TY << 8;
r.right = r.left + 256;
r.bottom = r.top + 256;
Dump(format("da_%d_%d_%d_%d_%d", m_env.STATUS.TP, r).c_str(), m_env.STATUS.TP, r, false);
}
*/
Draw();
m_count = 0;
//Dump("dc", false);
}
}
void GrowVertexBuffer()
{
m_maxcount = max(10000, m_maxcount * 3/2);
m_vertices = (Vertex*)_aligned_realloc(m_vertices, sizeof(Vertex) * m_maxcount, 16);
m_maxcount -= 100;
}
__forceinline Vertex* DrawingKick(int& count)
{
count = (int)m_env.PRIM.VTX;
if(m_vl.GetCount() < count)
{
return NULL;
}
if(m_count >= m_maxcount)
{
GrowVertexBuffer();
}
Vertex* v = &m_vertices[m_count];
switch(m_env.PRIM.TYPE)
{
case GPU_POLYGON:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.GetAt(2, v[2]);
m_vl.RemoveAll();
break;
case GPU_LINE:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.RemoveAll();
break;
case GPU_SPRITE:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.RemoveAll();
break;
default:
ASSERT(0);
m_vl.RemoveAll();
return NULL;
}
return v;
}
virtual void VertexKick() = 0;
virtual void Draw() = 0;
public:
GPURendererT(GSDevice* dev)
: GPURenderer(dev)
, m_count(0)
, m_maxcount(0)
, m_vertices(NULL)
{
}
virtual ~GPURendererT()
{
if(m_vertices) _aligned_free(m_vertices);
}
};

View File

@ -0,0 +1,191 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "StdAfx.h"
#include "GPURendererSW.h"
#include "GSdx.h"
GPURendererSW::GPURendererSW(GSDevice* dev)
: GPURendererT(dev)
, m_texture(NULL)
{
m_rl.Create<GPUDrawScanline>(this, theApp.GetConfig("swthreads", 1));
}
GPURendererSW::~GPURendererSW()
{
delete m_texture;
}
void GPURendererSW::ResetDevice()
{
delete m_texture;
m_texture = NULL;
}
GSTexture* GPURendererSW::GetOutput()
{
GSVector4i r = m_env.GetDisplayRect();
r.left <<= m_scale.x;
r.top <<= m_scale.y;
r.right <<= m_scale.x;
r.bottom <<= m_scale.y;
if(m_dev->ResizeTexture(&m_texture, r.width(), r.height()))
{
// TODO
static uint32* buff = (uint32*)_aligned_malloc(m_mem.GetWidth() * m_mem.GetHeight() * sizeof(uint32), 16);
m_mem.ReadFrame32(r, buff, !!m_env.STATUS.ISRGB24);
m_texture->Update(r.rsize(), buff, m_mem.GetWidth() * sizeof(uint32));
}
return m_texture;
}
void GPURendererSW::Draw()
{
const GPUDrawingEnvironment& env = m_env;
//
GPUScanlineParam p;
p.sel.key = 0;
p.sel.iip = env.PRIM.IIP;
p.sel.me = env.STATUS.ME;
if(env.PRIM.ABE)
{
p.sel.abe = env.PRIM.ABE;
p.sel.abr = env.STATUS.ABR;
}
p.sel.tge = env.PRIM.TGE;
if(env.PRIM.TME)
{
p.sel.tme = env.PRIM.TME;
p.sel.tlu = env.STATUS.TP < 2;
p.sel.twin = (env.TWIN.u32 & 0xfffff) != 0;
p.sel.ltf = m_filter == 1 && env.PRIM.TYPE == GPU_POLYGON || m_filter == 2 ? 1 : 0;
const void* t = m_mem.GetTexture(env.STATUS.TP, env.STATUS.TX, env.STATUS.TY);
if(!t) {ASSERT(0); return;}
p.tex = t;
p.clut = m_mem.GetCLUT(env.STATUS.TP, env.CLUT.X, env.CLUT.Y);
}
p.sel.dtd = m_dither ? env.STATUS.DTD : 0;
p.sel.md = env.STATUS.MD;
p.sel.sprite = env.PRIM.TYPE == GPU_SPRITE;
p.sel.scalex = m_mem.GetScale().x;
//
GSRasterizerData data;
data.vertices = m_vertices;
data.count = m_count;
data.param = &p;
data.scissor.left = (int)m_env.DRAREATL.X << m_scale.x;
data.scissor.top = (int)m_env.DRAREATL.Y << m_scale.y;
data.scissor.right = min((int)(m_env.DRAREABR.X + 1) << m_scale.x, m_mem.GetWidth());
data.scissor.bottom = min((int)(m_env.DRAREABR.Y + 1) << m_scale.y, m_mem.GetHeight());
switch(env.PRIM.TYPE)
{
case GPU_POLYGON: data.primclass = GS_TRIANGLE_CLASS; break;
case GPU_LINE: data.primclass = GS_LINE_CLASS; break;
case GPU_SPRITE: data.primclass = GS_SPRITE_CLASS; break;
default: __assume(0);
}
m_rl.Draw(&data);
GSRasterizerStats stats;
m_rl.GetStats(stats);
m_perfmon.Put(GSPerfMon::Draw, 1);
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
// TODO
{
GSVector4 tl(+1e10f);
GSVector4 br(-1e10f);
for(int i = 0, j = m_count; i < j; i++)
{
GSVector4 p = m_vertices[i].p;
tl = tl.min(p);
br = br.max(p);
}
GSVector4i r = GSVector4i(tl.xyxy(br)).rintersect(data.scissor);
r.left >>= m_scale.x;
r.top >>= m_scale.y;
r.right >>= m_scale.x;
r.bottom >>= m_scale.y;
Invalidate(r);
}
}
void GPURendererSW::VertexKick()
{
GSVertexSW& dst = m_vl.AddTail();
// TODO: x/y + off.x/y should wrap around at +/-1024
int x = (int)(m_v.XY.X + m_env.DROFF.X) << m_scale.x;
int y = (int)(m_v.XY.Y + m_env.DROFF.Y) << m_scale.y;
int s = m_v.UV.X;
int t = m_v.UV.Y;
GSVector4 pt(x, y, s, t);
dst.p = pt.xyxy(GSVector4::zero());
dst.t = (pt.zwzw(GSVector4::zero()) + GSVector4(0.125f)) * 256.0f;
// dst.c = GSVector4(m_v.RGB.u32) * 128.0f;
dst.c = GSVector4(GSVector4i::load((int)m_v.RGB.u32).u8to32() << 7);
int count = 0;
if(GSVertexSW* v = DrawingKick(count))
{
// TODO
m_count += count;
}
}

View File

@ -0,0 +1,41 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GPURenderer.h"
#include "GPUDrawScanline.h"
class GPURendererSW : public GPURendererT<GSVertexSW>
{
protected:
GSRasterizerList m_rl;
GSTexture* m_texture;
void ResetDevice();
GSTexture* GetOutput();
void VertexKick();
void Draw();
public:
GPURendererSW(GSDevice* dev);
virtual ~GPURendererSW();
};

View File

@ -0,0 +1,81 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSVector.h"
#include "GPULocalMemory.h"
union GPUScanlineSelector
{
struct
{
uint32 iip:1; // 0
uint32 me:1; // 1
uint32 abe:1; // 2
uint32 abr:2; // 3
uint32 tge:1; // 5
uint32 tme:1; // 6
uint32 twin:1; // 7
uint32 tlu:1; // 8
uint32 dtd:1; // 9
uint32 ltf:1; // 10
uint32 md:1; // 11
uint32 sprite:1; // 12
uint32 scalex:2; // 13
};
struct
{
uint32 _pad1:1; // 0
uint32 rfb:2; // 1
uint32 _pad2:2; // 3
uint32 tfx:2; // 5
};
uint32 key;
operator uint32() {return key;}
};
__declspec(align(16)) struct GPUScanlineParam
{
GPUScanlineSelector sel;
const void* tex;
const uint16* clut;
};
__declspec(align(16)) struct GPUScanlineEnvironment
{
GPUScanlineSelector sel;
void* vm;
const void* tex;
const uint16* clut;
// GSVector4i md; // similar to gs fba
struct {GSVector4i u, v;} twin[3];
struct {GSVector4i s, t, r, g, b, _pad[3];} d;
struct {GSVector4i st, c;} d8;
struct {GSVector4i s, t, r, b, g, uf, vf, dither, fd, test;} temp;
};

View File

@ -0,0 +1,196 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GSdx.h"
#include "GSUtil.h"
#include "GPUSettingsDlg.h"
#include "resource.h"
GSSetting GPUSettingsDlg::g_renderers[] =
{
{0, "Direct3D9 (Software)", ""},
{1, "Direct3D11 (Software)", ""},
// {2, "Null (Null)", ""},
};
GSSetting GPUSettingsDlg::g_filter[] =
{
{0, "Nearest", ""},
{1, "Bilinear (polygons only)", ""},
{2, "Bilinear", ""},
};
GSSetting GPUSettingsDlg::g_dithering[] =
{
{0, "Disabled", ""},
{1, "Auto", ""},
};
GSSetting GPUSettingsDlg::g_aspectratio[] =
{
{0, "Stretch", ""},
{1, "4:3", ""},
{2, "16:9", ""},
};
GSSetting GPUSettingsDlg::g_scale[] =
{
{0 | (0 << 2), "H x 1 - V x 1", ""},
{1 | (0 << 2), "H x 2 - V x 1", ""},
{0 | (1 << 2), "H x 1 - V x 2", ""},
{1 | (1 << 2), "H x 2 - V x 2", ""},
{2 | (1 << 2), "H x 4 - V x 2", ""},
{1 | (2 << 2), "H x 2 - V x 4", ""},
{2 | (2 << 2), "H x 4 - V x 4", ""},
};
GPUSettingsDlg::GPUSettingsDlg()
: GSDialog(IDD_GPUCONFIG)
{
}
void GPUSettingsDlg::OnInit()
{
__super::OnInit();
m_modes.clear();
{
D3DDISPLAYMODE mode;
memset(&mode, 0, sizeof(mode));
m_modes.push_back(mode);
ComboBoxAppend(IDC_RESOLUTION, "Please select...", (LPARAM)&m_modes.back(), true);
if(CComPtr<IDirect3D9> d3d = Direct3DCreate9(D3D_SDK_VERSION))
{
uint32 w = theApp.GetConfig("ModeWidth", 0);
uint32 h = theApp.GetConfig("ModeHeight", 0);
uint32 hz = theApp.GetConfig("ModeRefreshRate", 0);
uint32 n = d3d->GetAdapterModeCount(D3DADAPTER_DEFAULT, D3DFMT_X8R8G8B8);
for(uint32 i = 0; i < n; i++)
{
if(S_OK == d3d->EnumAdapterModes(D3DADAPTER_DEFAULT, D3DFMT_X8R8G8B8, i, &mode))
{
m_modes.push_back(mode);
string str = format("%dx%d %dHz", mode.Width, mode.Height, mode.RefreshRate);
ComboBoxAppend(IDC_RESOLUTION, str.c_str(), (LPARAM)&m_modes.back(), w == mode.Width && h == mode.Height && hz == mode.RefreshRate);
}
}
}
}
vector<GSSetting> renderers;
for(size_t i = 0; i < countof(g_renderers); i++)
{
renderers.push_back(g_renderers[i]);
}
ComboBoxInit(IDC_RENDERER, &renderers[0], renderers.size(), theApp.GetConfig("Renderer", 0));
ComboBoxInit(IDC_FILTER, g_filter, countof(g_filter), theApp.GetConfig("filter", 0));
ComboBoxInit(IDC_DITHERING, g_dithering, countof(g_dithering), theApp.GetConfig("dithering", 1));
ComboBoxInit(IDC_ASPECTRATIO, g_aspectratio, countof(g_aspectratio), theApp.GetConfig("AspectRatio", 1));
ComboBoxInit(IDC_SCALE, g_scale, countof(g_scale), theApp.GetConfig("scale_x", 0) | (theApp.GetConfig("scale_y", 0) << 2));
CheckDlgButton(m_hWnd, IDC_WINDOWED, theApp.GetConfig("windowed", 1));
SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETRANGE, 0, MAKELPARAM(16, 1));
SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("swthreads", 1), 0));
UpdateControls();
}
bool GPUSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code)
{
if(id == IDC_RENDERER && code == CBN_SELCHANGE)
{
UpdateControls();
}
else if(id == IDOK)
{
INT_PTR data;
if(ComboBoxGetSelData(IDC_RESOLUTION, data))
{
const D3DDISPLAYMODE* mode = (D3DDISPLAYMODE*)data;
theApp.SetConfig("ModeWidth", (int)mode->Width);
theApp.SetConfig("ModeHeight", (int)mode->Height);
theApp.SetConfig("ModeRefreshRate", (int)mode->RefreshRate);
}
if(ComboBoxGetSelData(IDC_RENDERER, data))
{
theApp.SetConfig("Renderer", (int)data);
}
if(ComboBoxGetSelData(IDC_FILTER, data))
{
theApp.SetConfig("filter", (int)data);
}
if(ComboBoxGetSelData(IDC_DITHERING, data))
{
theApp.SetConfig("dithering", (int)data);
}
if(ComboBoxGetSelData(IDC_ASPECTRATIO, data))
{
theApp.SetConfig("AspectRatio", (int)data);
}
if(ComboBoxGetSelData(IDC_SCALE, data))
{
theApp.SetConfig("scale_x", data & 3);
theApp.SetConfig("scale_y", (data >> 2) & 3);
}
theApp.SetConfig("swthreads", (int)SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_GETPOS, 0, 0));
theApp.SetConfig("windowed", (int)IsDlgButtonChecked(m_hWnd, IDC_WINDOWED));
}
return __super::OnCommand(hWnd, id, code);
}
void GPUSettingsDlg::UpdateControls()
{
INT_PTR i;
if(ComboBoxGetSelData(IDC_RENDERER, i))
{
bool dx9 = i == 0;
bool dx11 = i == 1;
bool sw = i >= 0 && i <= 1;
ShowWindow(GetDlgItem(m_hWnd, IDC_LOGO9), dx9 ? SW_SHOW : SW_HIDE);
ShowWindow(GetDlgItem(m_hWnd, IDC_LOGO11), dx11 ? SW_SHOW : SW_HIDE);
EnableWindow(GetDlgItem(m_hWnd, IDC_SCALE), sw);
EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_EDIT), sw);
EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS), sw);
}
}

View File

@ -0,0 +1,45 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSDialog.h"
#include "GSSetting.h"
class GPUSettingsDlg : public GSDialog
{
list<D3DDISPLAYMODE> m_modes;
void UpdateControls();
protected:
void OnInit();
bool OnCommand(HWND hWnd, UINT id, UINT code);
public:
GPUSettingsDlg();
static GSSetting g_renderers[];
static GSSetting g_filter[];
static GSSetting g_dithering[];
static GSSetting g_aspectratio[];
static GSSetting g_scale[];
};

View File

@ -0,0 +1,216 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
// TODO: x64
#include "StdAfx.h"
#include "GSVertexSW.h"
#include "GPUSetupPrimCodeGenerator.h"
GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize)
: CodeGenerator(maxsize, ptr)
, m_env(env)
{
#if _M_AMD64
#error TODO
#endif
Generate();
}
void GPUSetupPrimCodeGenerator::Generate()
{
if(m_env.sel.tme && !m_env.sel.twin)
{
pcmpeqd(xmm0, xmm0);
if(m_env.sel.sprite)
{
// t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001();
cvttps2dq(xmm1, ptr[ecx + sizeof(GSVertexSW) * 1 + 32]);
psrld(xmm1, 8);
psrld(xmm0, 31);
psubd(xmm1, xmm0);
// t = t.ps32(t);
// t = t.upl16(t);
packssdw(xmm1, xmm1);
punpcklwd(xmm1, xmm1);
// m_env.twin[2].u = t.xxxx();
// m_env.twin[2].v = t.yyyy();
pshufd(xmm2, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
pshufd(xmm3, xmm1, _MM_SHUFFLE(1, 1, 1, 1));
movdqa(ptr[&m_env.twin[2].u], xmm2);
movdqa(ptr[&m_env.twin[2].v], xmm3);
}
else
{
// TODO: not really needed
// m_env.twin[2].u = GSVector4i::x00ff();
// m_env.twin[2].v = GSVector4i::x00ff();
psrlw(xmm0, 8);
movdqa(ptr[&m_env.twin[2].u], xmm0);
movdqa(ptr[&m_env.twin[2].v], xmm0);
}
}
if(m_env.sel.tme || m_env.sel.iip && m_env.sel.tfx != 3)
{
for(int i = 0; i < 3; i++)
{
movaps(Xmm(5 + i), ptr[&m_shift[i]]);
}
// GSVector4 dt = dscan.t;
// GSVector4 dc = dscan.c;
movaps(xmm4, ptr[edx]);
movaps(xmm3, ptr[edx + 32]);
// GSVector4i dtc8 = GSVector4i(dt * 8.0f).ps32(GSVector4i(dc * 8.0f));
movaps(xmm1, xmm3);
mulps(xmm1, xmm5);
cvttps2dq(xmm1, xmm1);
movaps(xmm2, xmm4);
mulps(xmm2, xmm5);
cvttps2dq(xmm2, xmm2);
packssdw(xmm1, xmm2);
if(m_env.sel.tme)
{
// m_env.d8.st = dtc8.upl16(dtc8);
movdqa(xmm0, xmm1);
punpcklwd(xmm0, xmm0);
movdqa(ptr[&m_env.d8.st], xmm0);
}
if(m_env.sel.iip && m_env.sel.tfx != 3)
{
// m_env.d8.c = dtc8.uph16(dtc8);
punpckhwd(xmm1, xmm1);
movdqa(ptr[&m_env.d8.c], xmm1);
}
// xmm3 = dt
// xmm4 = dc
// xmm6 = ps0123
// xmm7 = ps4567
// xmm0, xmm1, xmm2, xmm5 = free
if(m_env.sel.tme)
{
// GSVector4 dtx = dt.xxxx();
// GSVector4 dty = dt.yyyy();
movaps(xmm0, xmm3);
shufps(xmm3, xmm3, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
// m_env.d.s = GSVector4i(dtx * ps0123).ps32(GSVector4i(dtx * ps4567));
movaps(xmm1, xmm3);
mulps(xmm3, xmm6);
mulps(xmm1, xmm7);
cvttps2dq(xmm3, xmm3);
cvttps2dq(xmm1, xmm1);
packssdw(xmm3, xmm1);
movdqa(ptr[&m_env.d.s], xmm3);
// m_env.d.t = GSVector4i(dty * ps0123).ps32(GSVector4i(dty * ps4567));
movaps(xmm1, xmm0);
mulps(xmm0, xmm6);
mulps(xmm1, xmm7);
cvttps2dq(xmm0, xmm0);
cvttps2dq(xmm1, xmm1);
packssdw(xmm0, xmm1);
movdqa(ptr[&m_env.d.t], xmm0);
}
// xmm4 = dc
// xmm6 = ps0123
// xmm7 = ps4567
// xmm0, xmm1, zmm2, xmm3, xmm5 = free
if(m_env.sel.iip && m_env.sel.tfx != 3)
{
// GSVector4 dcx = dc.xxxx();
// GSVector4 dcy = dc.yyyy();
// GSVector4 dcz = dc.zzzz();
movaps(xmm0, xmm4);
movaps(xmm1, xmm4);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
// m_env.d.r = GSVector4i(dcx * ps0123).ps32(GSVector4i(dcx * ps4567));
movaps(xmm2, xmm4);
mulps(xmm4, xmm6);
mulps(xmm2, xmm7);
cvttps2dq(xmm4, xmm4);
cvttps2dq(xmm2, xmm2);
packssdw(xmm4, xmm2);
movdqa(ptr[&m_env.d.r], xmm4);
// m_env.d.g = GSVector4i(dcy * ps0123).ps32(GSVector4i(dcy * ps4567));
movaps(xmm2, xmm0);
mulps(xmm0, xmm6);
mulps(xmm2, xmm7);
cvttps2dq(xmm0, xmm0);
cvttps2dq(xmm2, xmm2);
packssdw(xmm0, xmm2);
movdqa(ptr[&m_env.d.g], xmm0);
// m_env.d.b = GSVector4i(dcz * ps0123).ps32(GSVector4i(dcz * ps4567));
movaps(xmm2, xmm1);
mulps(xmm1, xmm6);
mulps(xmm2, xmm7);
cvttps2dq(xmm1, xmm1);
cvttps2dq(xmm2, xmm2);
packssdw(xmm1, xmm2);
movdqa(ptr[&m_env.d.b], xmm1);
}
}
ret();
}
const GSVector4 GPUSetupPrimCodeGenerator::m_shift[3] =
{
GSVector4(8.0f, 8.0f, 8.0f, 8.0f),
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
GSVector4(4.0f, 5.0f, 6.0f, 7.0f),
};

View File

@ -0,0 +1,44 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GPUScanlineEnvironment.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
using namespace Xbyak;
class GPUSetupPrimCodeGenerator : public CodeGenerator
{
void operator = (const GPUSetupPrimCodeGenerator&);
static const GSVector4 m_shift[3];
util::Cpu m_cpu;
GPUScanlineEnvironment& m_env;
void Generate();
public:
GPUSetupPrimCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize);
};

744
plugins/GSdx/GPUState.cpp Normal file
View File

@ -0,0 +1,744 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GPUState.h"
GPUState::GPUState()
: s_n(0)
{
memset(m_status, 0, sizeof(m_status));
for(int i = 0; i < countof(m_fpGPUStatusCommandHandlers); i++)
{
m_fpGPUStatusCommandHandlers[i] = &GPUState::SCH_Null;
}
m_fpGPUStatusCommandHandlers[0x00] = &GPUState::SCH_ResetGPU;
m_fpGPUStatusCommandHandlers[0x01] = &GPUState::SCH_ResetCommandBuffer;
m_fpGPUStatusCommandHandlers[0x02] = &GPUState::SCH_ResetIRQ;
m_fpGPUStatusCommandHandlers[0x03] = &GPUState::SCH_DisplayEnable;
m_fpGPUStatusCommandHandlers[0x04] = &GPUState::SCH_DMASetup;
m_fpGPUStatusCommandHandlers[0x05] = &GPUState::SCH_StartOfDisplayArea;
m_fpGPUStatusCommandHandlers[0x06] = &GPUState::SCH_HorizontalDisplayRange;
m_fpGPUStatusCommandHandlers[0x07] = &GPUState::SCH_VerticalDisplayRange;
m_fpGPUStatusCommandHandlers[0x08] = &GPUState::SCH_DisplayMode;
m_fpGPUStatusCommandHandlers[0x10] = &GPUState::SCH_GPUInfo;
m_fpGPUPacketHandler[0] = &GPUState::PH_Command;
m_fpGPUPacketHandler[1] = &GPUState::PH_Polygon;
m_fpGPUPacketHandler[2] = &GPUState::PH_Line;
m_fpGPUPacketHandler[3] = &GPUState::PH_Sprite;
m_fpGPUPacketHandler[4] = &GPUState::PH_Move;
m_fpGPUPacketHandler[5] = &GPUState::PH_Write;
m_fpGPUPacketHandler[6] = &GPUState::PH_Read;
m_fpGPUPacketHandler[7] = &GPUState::PH_Environment;
Reset();
}
GPUState::~GPUState()
{
}
void GPUState::Reset()
{
m_env.Reset();
m_mem.Invalidate(GSVector4i(0, 0, 1024, 512));
memset(&m_v, 0, sizeof(m_v));
}
void GPUState::Flush()
{
FlushPrim();
}
void GPUState::SetPrim(GPUReg* r)
{
if(m_env.PRIM.TYPE != r->PRIM.TYPE)
{
ResetPrim();
}
GPURegPRIM PRIM = r->PRIM;
PRIM.VTX = 0;
switch(r->PRIM.TYPE)
{
case GPU_POLYGON:
PRIM.u32 = (r->PRIM.u32 & 0xF7000000) | 3; // TYPE IIP TME ABE TGE
break;
case GPU_LINE:
PRIM.u32 = (r->PRIM.u32 & 0xF2000000) | 2; // TYPE IIP ABE
PRIM.TGE = 1; // ?
break;
case GPU_SPRITE:
PRIM.u32 = (r->PRIM.u32 & 0xE7000000) | 2; // TYPE TME ABE TGE
break;
}
if(m_env.PRIM.u32 != PRIM.u32)
{
Flush();
m_env.PRIM = PRIM;
}
}
void GPUState::SetCLUT(GPUReg* r)
{
uint32 mask = 0xFFFF0000; // X Y
uint32 value = (m_env.CLUT.u32 & ~mask) | (r->u32 & mask);
if(m_env.CLUT.u32 != value)
{
Flush();
m_env.CLUT.u32 = value;
}
}
void GPUState::SetTPAGE(GPUReg* r)
{
uint32 mask = 0x000001FF; // TP ABR TY TX
uint32 value = (m_env.STATUS.u32 & ~mask) | ((r->u32 >> 16) & mask);
if(m_env.STATUS.u32 != value)
{
Flush();
m_env.STATUS.u32 = value;
}
}
void GPUState::Invalidate(const GSVector4i& r)
{
m_mem.Invalidate(r);
}
void GPUState::WriteData(const uint8* mem, uint32 size)
{
GSPerfMonAutoTimer pmat(m_perfmon);
size <<= 2;
m_write.Append(mem, size);
int i = 0;
while(i < m_write.bytes)
{
GPUReg* r = (GPUReg*)&m_write.buff[i];
int ret = (this->*m_fpGPUPacketHandler[r->PACKET.TYPE])(r, (m_write.bytes - i) >> 2);
if(ret == 0) return; // need more data
i += ret << 2;
}
m_write.Remove(i);
}
void GPUState::ReadData(uint8* mem, uint32 size)
{
GSPerfMonAutoTimer pmat(m_perfmon);
int remaining = m_read.bytes - m_read.cur;
int bytes = (int)size << 2;
if(bytes > remaining)
{
// ASSERT(0);
// printf"WARNING: ReadData\n");
// memset(&mem[remaining], 0, bytes - remaining);
bytes = remaining;
}
memcpy(mem, &m_read.buff[m_read.cur], bytes);
m_read.cur += bytes;
if(m_read.cur >= m_read.bytes)
{
m_env.STATUS.IMG = 0;
}
}
void GPUState::WriteStatus(uint32 status)
{
GSPerfMonAutoTimer pmat(m_perfmon);
uint32 b = status >> 24;
m_status[b] = status;
(this->*m_fpGPUStatusCommandHandlers[b])((GPUReg*)&status);
}
uint32 GPUState::ReadStatus()
{
GSPerfMonAutoTimer pmat(m_perfmon);
m_env.STATUS.LCF = ~m_env.STATUS.LCF; // ?
return m_env.STATUS.u32;
}
void GPUState::Freeze(GPUFreezeData* data)
{
data->status = m_env.STATUS.u32;
memcpy(data->control, m_status, 256 * 4);
m_mem.ReadRect(GSVector4i(0, 0, 1024, 512), data->vram);
}
void GPUState::Defrost(const GPUFreezeData* data)
{
m_env.STATUS.u32 = data->status;
memcpy(m_status, data->control, 256 * 4);
m_mem.WriteRect(GSVector4i(0, 0, 1024, 512), data->vram);
for(int i = 0; i <= 8; i++)
{
WriteStatus(m_status[i]);
}
}
void GPUState::SCH_Null(GPUReg* r)
{
ASSERT(0);
}
void GPUState::SCH_ResetGPU(GPUReg* r)
{
Reset();
}
void GPUState::SCH_ResetCommandBuffer(GPUReg* r)
{
// ?
}
void GPUState::SCH_ResetIRQ(GPUReg* r)
{
// ?
}
void GPUState::SCH_DisplayEnable(GPUReg* r)
{
m_env.STATUS.DEN = r->DEN.DEN;
}
void GPUState::SCH_DMASetup(GPUReg* r)
{
m_env.STATUS.DMA = r->DMA.DMA;
}
void GPUState::SCH_StartOfDisplayArea(GPUReg* r)
{
m_env.DAREA = r->DAREA;
}
void GPUState::SCH_HorizontalDisplayRange(GPUReg* r)
{
m_env.DHRANGE = r->DHRANGE;
}
void GPUState::SCH_VerticalDisplayRange(GPUReg* r)
{
m_env.DVRANGE = r->DVRANGE;
}
void GPUState::SCH_DisplayMode(GPUReg* r)
{
m_env.STATUS.WIDTH0 = r->DMODE.WIDTH0;
m_env.STATUS.HEIGHT = r->DMODE.HEIGHT;
m_env.STATUS.ISPAL = r->DMODE.ISPAL;
m_env.STATUS.ISRGB24 = r->DMODE.ISRGB24;
m_env.STATUS.ISINTER = r->DMODE.ISINTER;
m_env.STATUS.WIDTH1 = r->DMODE.WIDTH1;
}
void GPUState::SCH_GPUInfo(GPUReg* r)
{
uint32 value = 0;
switch(r->GPUINFO.PARAM)
{
case 0x2:
value = m_env.TWIN.u32;
break;
case 0x0:
case 0x1:
case 0x3:
value = m_env.DRAREATL.u32;
break;
case 0x4:
value = m_env.DRAREABR.u32;
break;
case 0x5:
case 0x6:
value = m_env.DROFF.u32;
break;
case 0x7:
value = 2;
break;
case 0x8:
case 0xf:
value = 0xBFC03720; // ?
break;
default:
ASSERT(0);
break;
}
m_read.RemoveAll();
m_read.Append((uint8*)&value, 4);
m_read.cur = 0;
}
int GPUState::PH_Command(GPUReg* r, int size)
{
switch(r->PACKET.OPTION)
{
case 0: // ???
return 1;
case 1: // clear cache
return 1;
case 2: // fillrect
if(size < 3) return 0;
Flush();
GSVector4i r2;
r2.left = r[1].XY.X;
r2.top = r[1].XY.Y;
r2.right = r2.left + r[2].XY.X;
r2.bottom = r2.top + r[2].XY.Y;
uint16 c = (uint16)(((r[0].RGB.R >> 3) << 10) | ((r[0].RGB.R >> 3) << 5) | (r[0].RGB.R >> 3));
m_mem.FillRect(r2, c);
Invalidate(r2);
Dump("f");
return 3;
}
ASSERT(0);
return 1;
}
int GPUState::PH_Polygon(GPUReg* r, int size)
{
int required = 1;
int vertices = r[0].POLYGON.VTX ? 4 : 3;
required += vertices;
if(r[0].POLYGON.TME) required += vertices;
if(r[0].POLYGON.IIP) required += vertices - 1;
if(size < required) return 0;
//
SetPrim(r);
if(r[0].POLYGON.TME)
{
SetCLUT(&r[2]);
SetTPAGE(&r[r[0].POLYGON.IIP ? 5 : 4]);
}
//
GPUVertex v[4];
for(int i = 0, j = 0; j < vertices; j++)
{
v[j].RGB = r[r[0].POLYGON.IIP ? i : 0].RGB;
if(j == 0 || r[0].POLYGON.IIP) i++;
v[j].XY = r[i++].XY;
if(r[0].POLYGON.TME)
{
v[j].UV.X = r[i].UV.U;
v[j].UV.Y = r[i].UV.V;
i++;
}
}
for(int i = 0; i <= vertices - 3; i++)
{
for(int j = 0; j < 3; j++)
{
m_v = v[i + j];
VertexKick();
}
}
//
return required;
}
int GPUState::PH_Line(GPUReg* r, int size)
{
int required = 1;
int vertices = 0;
if(r->LINE.PLL)
{
required++;
for(int i = 1; i < size; i++)
{
if(r[i].u32 == 0x55555555)
{
vertices = i - 1;
}
}
if(vertices < 2)
{
return 0;
}
}
else
{
vertices = 2;
}
required += vertices;
if(r->LINE.IIP) required += vertices - 1;
//
SetPrim(r);
//
for(int i = 0, j = 0; j < vertices; j++)
{
if(j >= 2) VertexKick();
m_v.RGB = r[r[0].LINE.IIP ? i : 0].RGB;
if(j == 0 || r[0].LINE.IIP) i++;
m_v.XY = r[i++].XY;
VertexKick();
}
//
return required;
}
int GPUState::PH_Sprite(GPUReg* r, int size)
{
int required = 2;
if(r[0].SPRITE.TME) required++;
if(r[0].SPRITE.SIZE == 0) required++;
if(size < required) return 0;
//
SetPrim(r);
if(r[0].SPRITE.TME)
{
SetCLUT(&r[2]);
}
//
int i = 0;
m_v.RGB = r[i++].RGB;
m_v.XY = r[i++].XY;
if(r[0].SPRITE.TME)
{
m_v.UV.X = r[i].UV.U;
m_v.UV.Y = r[i].UV.V;
i++;
}
VertexKick();
int w = 0;
int h = 0;
switch(r[0].SPRITE.SIZE)
{
case 0: w = r[i].XY.X; h = r[i].XY.Y; i++; break;
case 1: w = h = 1; break;
case 2: w = h = 8; break;
case 3: w = h = 16; break;
default: __assume(0);
}
m_v.XY.X += w;
m_v.XY.Y += h;
if(r[0].SPRITE.TME)
{
m_v.UV.X += w;
m_v.UV.Y += h;
}
VertexKick();
//
return required;
}
int GPUState::PH_Move(GPUReg* r, int size)
{
if(size < 4) return 0;
Flush();
int sx = r[1].XY.X;
int sy = r[1].XY.Y;
int dx = r[2].XY.X;
int dy = r[2].XY.Y;
int w = r[3].XY.X;
int h = r[3].XY.Y;
m_mem.MoveRect(sx, sy, dx, dy, w, h);
Invalidate(GSVector4i(dx, dy, dx + w, dy + h));
// Dump("m");
return 4;
}
int GPUState::PH_Write(GPUReg* r, int size)
{
if(size < 3) return 0;
int w = r[2].XY.X;
int h = r[2].XY.Y;
int required = 3 + ((w * h + 1) >> 1);
if(size < required) return 0;
Flush();
GSVector4i r2;
r2.left = r[1].XY.X;
r2.top = r[1].XY.Y;
r2.right = r2.left + w;
r2.bottom = r2.top + h;
m_mem.WriteRect(r2, (const uint16*)&r[3]);
Invalidate(r2);
Dump("w");
m_perfmon.Put(GSPerfMon::Swizzle, w * h * 2);
return required;
}
int GPUState::PH_Read(GPUReg* r, int size)
{
if(size < 3) return 0;
Flush();
int w = r[2].XY.X;
int h = r[2].XY.Y;
GSVector4i r2;
r2.left = r[1].XY.X;
r2.top = r[1].XY.Y;
r2.right = r2.left + w;
r2.bottom = r2.top + h;
m_read.bytes = ((w * h + 1) & ~1) * 2;
m_read.cur = 0;
m_read.Reserve(m_read.bytes);
m_mem.ReadRect(r2, (uint16*)m_read.buff);
Dump("r");
m_env.STATUS.IMG = 1;
return 3;
}
int GPUState::PH_Environment(GPUReg* r, int size)
{
Flush(); // TODO: only call when something really changes
switch(r->PACKET.OPTION)
{
case 1: // draw mode setting
m_env.STATUS.TX = r->MODE.TX;
m_env.STATUS.TY = r->MODE.TY;
m_env.STATUS.ABR = r->MODE.ABR;
m_env.STATUS.TP = r->MODE.TP;
m_env.STATUS.DTD = r->MODE.DTD;
m_env.STATUS.DFE = r->MODE.DFE;
return 1;
case 2: // texture window setting
m_env.TWIN = r->TWIN;
return 1;
case 3: // set drawing area top left
m_env.DRAREATL = r->DRAREA;
return 1;
case 4: // set drawing area bottom right
m_env.DRAREABR = r->DRAREA;
return 1;
case 5: // drawing offset
m_env.DROFF = r->DROFF;
return 1;
case 6: // mask setting
m_env.STATUS.MD = r->MASK.MD;
m_env.STATUS.ME = r->MASK.ME;
return 1;
}
ASSERT(0);
return 1;
}
//
GPUState::Buffer::Buffer()
{
bytes = 0;
maxbytes = 4096;
buff = (uint8*)_aligned_malloc(maxbytes, 16);
cur = 0;
}
GPUState::Buffer::~Buffer()
{
_aligned_free(buff);
}
void GPUState::Buffer::Reserve(int size)
{
if(size > maxbytes)
{
maxbytes = (maxbytes + size + 1023) & ~1023;
buff = (uint8*)_aligned_realloc(buff, maxbytes, 16);
}
}
void GPUState::Buffer::Append(const uint8* src, int size)
{
Reserve(bytes + (int)size);
memcpy(&buff[bytes], src, size);
bytes += size;
}
void GPUState::Buffer::Remove(int size)
{
ASSERT(size <= bytes);
if(size < bytes)
{
memmove(&buff[0], &buff[size], bytes - size);
bytes -= size;
}
else
{
bytes = 0;
}
#ifdef DEBUG
memset(&buff[bytes], 0xff, maxbytes - bytes);
#endif
}
void GPUState::Buffer::RemoveAll()
{
bytes = 0;
}

141
plugins/GSdx/GPUState.h Normal file
View File

@ -0,0 +1,141 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GPU.h"
#include "GPUDrawingEnvironment.h"
#include "GPULocalMemory.h"
#include "GPUVertex.h"
#include "GSAlignedClass.h"
#include "GSUtil.h"
#include "GSPerfMon.h"
class GPUState : public GSAlignedClass<16>
{
typedef void (GPUState::*GPUStatusCommandHandler)(GPUReg* r);
GPUStatusCommandHandler m_fpGPUStatusCommandHandlers[256];
void SCH_Null(GPUReg* r);
void SCH_ResetGPU(GPUReg* r);
void SCH_ResetCommandBuffer(GPUReg* r);
void SCH_ResetIRQ(GPUReg* r);
void SCH_DisplayEnable(GPUReg* r);
void SCH_DMASetup(GPUReg* r);
void SCH_StartOfDisplayArea(GPUReg* r);
void SCH_HorizontalDisplayRange(GPUReg* r);
void SCH_VerticalDisplayRange(GPUReg* r);
void SCH_DisplayMode(GPUReg* r);
void SCH_GPUInfo(GPUReg* r);
typedef int (GPUState::*GPUPacketHandler)(GPUReg* r, int size);
GPUPacketHandler m_fpGPUPacketHandler[8];
int PH_Command(GPUReg* r, int size);
int PH_Polygon(GPUReg* r, int size);
int PH_Line(GPUReg* r, int size);
int PH_Sprite(GPUReg* r, int size);
int PH_Move(GPUReg* r, int size);
int PH_Write(GPUReg* r, int size);
int PH_Read(GPUReg* r, int size);
int PH_Environment(GPUReg* r, int size);
class Buffer
{
public:
int bytes;
int maxbytes;
uint8* buff;
int cur;
public:
Buffer();
~Buffer();
void Reserve(int size);
void Append(const uint8* src, int size);
void Remove(int size);
void RemoveAll();
};
Buffer m_write;
Buffer m_read;
void SetPrim(GPUReg* r);
void SetCLUT(GPUReg* r);
void SetTPAGE(GPUReg* r);
protected:
int s_n;
void Dump(const string& s, uint32 TP, const GSVector4i& r, int inc = true)
{
//if(m_perfmon.GetFrame() < 1000)
//if((m_env.TWIN.u32 & 0xfffff) == 0)
//if(!m_env.STATUS.ME && !m_env.STATUS.MD)
return;
if(inc) s_n++;
//if(s_n < 86) return;
int dir = 1;
#ifdef DEBUG
dir = 2;
#endif
m_mem.SaveBMP(format("c:\\temp%d\\%04d_%s.bmp", dir, s_n, s), r, TP, m_env.CLUT.X, m_env.CLUT.Y);
}
void Dump(const string& s, int inc = true)
{
Dump(s, 2, GSVector4i(0, 0, 1024, 512), inc);
}
public:
GPUDrawingEnvironment m_env;
GPULocalMemory m_mem;
GPUVertex m_v;
GSPerfMon m_perfmon;
uint32 m_status[256];
public:
GPUState();
virtual ~GPUState();
virtual void Reset();
virtual void Flush();
virtual void FlushPrim() = 0;
virtual void ResetPrim() = 0;
virtual void VertexKick() = 0;
virtual void Invalidate(const GSVector4i& r);
void WriteData(const uint8* mem, uint32 size);
void ReadData(uint8* mem, uint32 size);
void WriteStatus(uint32 status);
uint32 ReadStatus();
void Freeze(GPUFreezeData* data);
void Defrost(const GPUFreezeData* data);
};

51
plugins/GSdx/GPUVertex.h Normal file
View File

@ -0,0 +1,51 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GPU.h"
#include "GSVector.h"
#pragma pack(push, 1)
__declspec(align(16)) struct GPUVertex
{
union
{
struct
{
GPURegRGB RGB;
GPURegXY XY;
GPURegXY UV;
};
struct {__m128i m128i;};
struct {__m128 m128;};
};
GPUVertex() {memset(this, 0, sizeof(*this));}
};
struct GPUVertexNull
{
};
#pragma pack(pop)

View File

@ -67,5 +67,7 @@ void GSCodeBuffer::ReleaseBuffer(size_t size)
m_pos = ((m_pos + size) + 15) & ~15;
ASSERT(m_pos < m_blocksize);
m_reserved = 0;
}

View File

@ -830,9 +830,10 @@ void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride)
m_state.vb = vb;
m_state.vb_stride = stride;
uint32 stride2 = stride;
uint32 offset = 0;
m_ctx->IASetVertexBuffers(0, 1, &vb, &stride, &offset);
m_ctx->IASetVertexBuffers(0, 1, &vb, &stride2, &offset);
}
}

View File

@ -42,7 +42,7 @@ INT_PTR CALLBACK GSDialog::DialogProc(HWND hWnd, UINT message, WPARAM wParam, LP
if(message == WM_INITDIALOG)
{
dlg = (GSDialog*)lParam;
SetWindowLongPtr(hWnd, GWL_USERDATA, (LONG_PTR)dlg);
SetWindowLongPtr(hWnd, GWLP_USERDATA, (LONG_PTR)dlg);
dlg->m_hWnd = hWnd;
MONITORINFO mi;
@ -62,7 +62,7 @@ INT_PTR CALLBACK GSDialog::DialogProc(HWND hWnd, UINT message, WPARAM wParam, LP
return true;
}
dlg = (GSDialog*)GetWindowLongPtr(hWnd, GWL_USERDATA);
dlg = (GSDialog*)GetWindowLongPtr(hWnd, GWLP_USERDATA);
return dlg != NULL ? dlg->OnMessage(message, wParam, lParam) : FALSE;
}

View File

@ -98,7 +98,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
{
m_env.tex = p->tex;
m_env.clut = p->clut;
m_env.tw = p->tw;
// m_env.tw = p->tw;
unsigned short tw = (unsigned short)(1 << context->TEX0.TW);
unsigned short th = (unsigned short)(1 << context->TEX0.TH);

View File

@ -35,6 +35,8 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment&
m_sel.key = key;
// ret(8);
Generate();
}
@ -104,30 +106,7 @@ L("loop");
// xmm6 = ga
// xmm7 = test
if(m_cpu.has(util::Cpu::tAVX))
{
if(m_sel.fwrite)
{
vmovdqa(xmm3, ptr[&m_env.fm]);
}
if(m_sel.zwrite)
{
vmovdqa(xmm4, ptr[&m_env.zm]);
}
}
else
{
if(m_sel.fwrite)
{
movdqa(xmm3, ptr[&m_env.fm]);
}
if(m_sel.zwrite)
{
movdqa(xmm4, ptr[&m_env.zm]);
}
}
ReadMask();
// ecx = steps
// esi = fzbr
@ -192,85 +171,18 @@ L("loop");
TestDestAlpha();
if(m_cpu.has(util::Cpu::tAVX))
{
// fm |= test;
// zm |= test;
// ecx = steps
// esi = fzbr
// edi = fzbc
// ebp = za
// xmm2 = fd
// xmm3 = fm
// xmm4 = zm
// xmm5 = rb
// xmm6 = ga
// xmm7 = test
if(m_sel.fwrite)
{
vpor(xmm3, xmm7);
}
if(m_sel.zwrite)
{
vpor(xmm4, xmm7);
}
// int fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask();
vpcmpeqd(xmm1, xmm1);
if(m_sel.fwrite && m_sel.zwrite)
{
vpcmpeqd(xmm0, xmm1, xmm4);
vpcmpeqd(xmm1, xmm3);
vpackssdw(xmm1, xmm0);
}
else if(m_sel.fwrite)
{
vpcmpeqd(xmm1, xmm3);
vpackssdw(xmm1, xmm1);
}
else if(m_sel.zwrite)
{
vpcmpeqd(xmm1, xmm4);
vpackssdw(xmm1, xmm1);
}
vpmovmskb(edx, xmm1);
not(edx);
}
else
{
// fm |= test;
// zm |= test;
if(m_sel.fwrite)
{
por(xmm3, xmm7);
}
if(m_sel.zwrite)
{
por(xmm4, xmm7);
}
// int fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask();
pcmpeqd(xmm1, xmm1);
if(m_sel.fwrite && m_sel.zwrite)
{
movdqa(xmm0, xmm1);
pcmpeqd(xmm1, xmm3);
pcmpeqd(xmm0, xmm4);
packssdw(xmm1, xmm0);
}
else if(m_sel.fwrite)
{
pcmpeqd(xmm1, xmm3);
packssdw(xmm1, xmm1);
}
else if(m_sel.zwrite)
{
pcmpeqd(xmm1, xmm4);
packssdw(xmm1, xmm1);
}
pmovmskb(edx, xmm1);
not(edx);
}
WriteMask();
// ebx = fa
// ecx = steps
@ -329,6 +241,8 @@ L("step");
L("exit");
// vzeroupper();
pop(ebp);
pop(edi);
pop(esi);
@ -622,9 +536,6 @@ void GSDrawScanlineCodeGenerator::Init(int params)
if(m_sel.edge || m_sel.tfx != TFX_NONE)
{
movaps(xmm4, ptr[ebx + 32]); // v.t
//vbroadcastf128(ymm4, ptr[ebx + 32]); // v.t
//vzeroupper();
}
if(m_sel.edge)
@ -1288,14 +1199,14 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i y0 = uv0.uph16() << tw;
vpxor(xmm0, xmm0);
vmovd(xmm1, ptr[&m_env.tw]);
//vmovd(xmm1, ptr[&m_env.tw]);
vpunpcklwd(xmm4, xmm2, xmm0);
vpunpckhwd(xmm2, xmm2, xmm0);
vpslld(xmm2, xmm1);
vpslld(xmm2, m_sel.tw + 3); // xmm1);
// xmm0 = 0
// xmm1 = tw
// xmm1 = free // tw
// xmm2 = y0
// xmm3 = uv1 (ltf)
// xmm4 = x0
@ -1309,7 +1220,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
vpunpcklwd(xmm6, xmm3, xmm0);
vpunpckhwd(xmm3, xmm3, xmm0);
vpslld(xmm3, xmm1);
vpslld(xmm3, m_sel.tw + 3); // xmm1);
// xmm2 = y0
// xmm3 = y1
@ -1547,15 +1458,15 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i x0 = uv0.upl16();
pxor(xmm0, xmm0);
movd(xmm1, ptr[&m_env.tw]);
// movd(xmm1, ptr[&m_env.tw]);
movdqa(xmm4, xmm2);
punpckhwd(xmm2, xmm0);
punpcklwd(xmm4, xmm0);
pslld(xmm2, xmm1);
pslld(xmm2, m_sel.tw + 3); // xmm1);
// xmm0 = 0
// xmm1 = tw
// xmm1 = free // tw
// xmm2 = y0
// xmm3 = uv1 (ltf)
// xmm4 = x0
@ -1570,7 +1481,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
movdqa(xmm6, xmm3);
punpckhwd(xmm3, xmm0);
punpcklwd(xmm6, xmm0);
pslld(xmm3, xmm1);
pslld(xmm3, m_sel.tw + 3); // xmm1);
// xmm2 = y0
// xmm3 = y1
@ -2345,6 +2256,34 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
}
}
void GSDrawScanlineCodeGenerator::ReadMask()
{
if(m_cpu.has(util::Cpu::tAVX))
{
if(m_sel.fwrite)
{
vmovdqa(xmm3, ptr[&m_env.fm]);
}
if(m_sel.zwrite)
{
vmovdqa(xmm4, ptr[&m_env.zm]);
}
}
else
{
if(m_sel.fwrite)
{
movdqa(xmm3, ptr[&m_env.fm]);
}
if(m_sel.zwrite)
{
movdqa(xmm4, ptr[&m_env.zm]);
}
}
}
void GSDrawScanlineCodeGenerator::TestAlpha()
{
switch(m_sel.afail)
@ -2794,6 +2733,91 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
alltrue();
}
void GSDrawScanlineCodeGenerator::WriteMask()
{
if(m_cpu.has(util::Cpu::tAVX))
{
// fm |= test;
// zm |= test;
if(m_sel.fwrite)
{
vpor(xmm3, xmm7);
}
if(m_sel.zwrite)
{
vpor(xmm4, xmm7);
}
// int fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask();
vpcmpeqd(xmm1, xmm1);
if(m_sel.fwrite && m_sel.zwrite)
{
vpcmpeqd(xmm0, xmm1, xmm4);
vpcmpeqd(xmm1, xmm3);
vpackssdw(xmm1, xmm0);
}
else if(m_sel.fwrite)
{
vpcmpeqd(xmm1, xmm3);
vpackssdw(xmm1, xmm1);
}
else if(m_sel.zwrite)
{
vpcmpeqd(xmm1, xmm4);
vpackssdw(xmm1, xmm1);
}
vpmovmskb(edx, xmm1);
not(edx);
}
else
{
// fm |= test;
// zm |= test;
if(m_sel.fwrite)
{
por(xmm3, xmm7);
}
if(m_sel.zwrite)
{
por(xmm4, xmm7);
}
// int fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask();
pcmpeqd(xmm1, xmm1);
if(m_sel.fwrite && m_sel.zwrite)
{
movdqa(xmm0, xmm1);
pcmpeqd(xmm1, xmm3);
pcmpeqd(xmm0, xmm4);
packssdw(xmm1, xmm0);
}
else if(m_sel.fwrite)
{
pcmpeqd(xmm1, xmm3);
packssdw(xmm1, xmm1);
}
else if(m_sel.zwrite)
{
pcmpeqd(xmm1, xmm4);
packssdw(xmm1, xmm1);
}
pmovmskb(edx, xmm1);
not(edx);
}
}
void GSDrawScanlineCodeGenerator::WriteZBuf()
{
if(!m_sel.zwrite)
@ -2828,7 +2852,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
}
}
WritePixel(xmm1, xmm0, ebp, dh, fast, m_sel.zpsm);
WritePixel(xmm1, ebp, dh, fast, m_sel.zpsm, 1);
}
void GSDrawScanlineCodeGenerator::AlphaBlend()
@ -3444,7 +3468,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
bool fast = m_sel.rfb && m_sel.fpsm < 2;
WritePixel(xmm5, xmm0, ebx, dl, fast, m_sel.fpsm);
WritePixel(xmm5, ebx, dl, fast, m_sel.fpsm, 0);
}
void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr)
@ -3461,7 +3485,7 @@ void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr)
}
}
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Xmm& temp, const Reg32& addr, const Reg8& mask, bool fast, int psm)
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz)
{
if(fast)
{
@ -3504,27 +3528,27 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Xmm& temp, co
test(mask, 0x03);
je("@f");
WritePixel(src, temp, addr, 0, psm);
WritePixel(src, addr, 0, psm);
L("@@");
test(mask, 0x0c);
je("@f");
WritePixel(src, temp, addr, 1, psm);
WritePixel(src, addr, 1, psm);
L("@@");
test(mask, 0x30);
je("@f");
WritePixel(src, temp, addr, 2, psm);
WritePixel(src, addr, 2, psm);
L("@@");
test(mask, 0xc0);
je("@f");
WritePixel(src, temp, addr, 3, psm);
WritePixel(src, addr, 3, psm);
L("@@");
}
}
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Xmm& temp, const Reg32& addr, uint8 i, int psm)
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm)
{
static const int offsets[4] = {0, 2, 8, 10};
@ -3546,7 +3570,7 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Xmm& temp, co
xor(dst, eax);
break;
case 2:
pextrw(eax, src, i * 2); // vpextrw is broken in xbyak 2.99
vpextrw(eax, src, i * 2);
mov(dst, ax);
break;
}
@ -3578,11 +3602,11 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Xmm& temp, co
{
case 0:
if(i == 0) movd(dst, src);
else {pshufd(temp, src, _MM_SHUFFLE(i, i, i, i)); movd(dst, temp);}
else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(dst, xmm0);}
break;
case 1:
if(i == 0) movd(eax, src);
else {pshufd(temp, src, _MM_SHUFFLE(i, i, i, i)); movd(eax, temp);}
else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(eax, xmm0);}
xor(eax, dst);
and(eax, 0xffffff);
xor(dst, eax);

View File

@ -47,18 +47,20 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
void Wrap(const Xmm& uv0);
void Wrap(const Xmm& uv0, const Xmm& uv1);
void AlphaTFX();
void ReadMask();
void TestAlpha();
void ColorTFX();
void Fog();
void ReadFrame();
void TestDestAlpha();
void WriteMask();
void WriteZBuf();
void AlphaBlend();
void WriteFrame(int params);
void ReadPixel(const Xmm& dst, const Reg32& addr);
void WritePixel(const Xmm& src, const Xmm& temp, const Reg32& addr, const Reg8& mask, bool fast, int psm);
void WritePixel(const Xmm& src, const Xmm& temp, const Reg32& addr, uint8 i, int psm);
void WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz);
void WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm);
void ReadTexel(const Xmm& dst, const Xmm& addr, const Xmm& temp1, const Xmm& temp2);
void ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i);

View File

@ -155,7 +155,6 @@ public:
template<class CG, class KEY, class VALUE>
class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
{
uint32 m_id;
string m_name;
hash_map<uint64, CG*> m_cgmap;
GSCodeBuffer m_cb;
@ -167,8 +166,7 @@ protected:
public:
GSCodeGeneratorFunctionMap(const char* name)
: m_id(0x100000)
, m_name(name)
: m_name(name)
{
}
@ -195,25 +193,30 @@ public:
ASSERT(cg);
ASSERT(cg->getSize() < MAX_SIZE);
m_cb.ReleaseBuffer(cg->getSize());
m_cgmap[key] = cg;
// vtune method registration
if(iJIT_IsProfilingActive())
{
string name = format("%s<%016I64x>()", m_name.c_str(), (uint64)key);
iJIT_Method_Load ml;
memset(&ml, 0, sizeof(ml));
ml.method_id = m_id++;
ml.method_id = iJIT_GetNewMethodID();
ml.method_name = (char*)name.c_str();
ml.method_load_address = (void*)cg->getCode();
ml.method_size = cg->getSize();
ml.method_size = (unsigned int)cg->getSize();
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
}
}
return (VALUE)cg->getCode();
}

View File

@ -24,12 +24,11 @@
#include "StdAfx.h"
#include "GSRasterizer.h"
#include "pthread.h"
// Using a spinning finish on the main (MTGS) thread is apparently a big win still, over trying
// to wait out all the pending m_finished semaphores. It leaves one spinwait in the rasterizer,
// but that's still worlds better than 2-6 spinning threads like before.
//
#define UseSpinningFinish
// Set this to 1 to remove a lot of non-const div/modulus ops from the rasterization process.
@ -65,7 +64,7 @@ __forceinline bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
#else
return (scanline % m_threads) == m_id;
return m_threads == 1 || (scanline % m_threads) == m_id;
#endif
}
@ -845,43 +844,50 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
//
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads, sem_t& finished, volatile long& sync)
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads, HANDLE ready, volatile long& sync)
: GSRasterizer(ds, id, threads)
, m_finished(finished)
, m_ready(ready)
, m_sync(sync)
, m_exit(false)
, m_data(NULL)
{
sem_init(&m_semaphore, false, 0);
sem_init(&m_stopped, false, 0);
m_exit = CreateEvent(NULL, FALSE, FALSE, NULL);
m_draw = CreateEvent(NULL, FALSE, FALSE, NULL);
CreateThread();
}
GSRasterizerMT::~GSRasterizerMT()
{
m_exit = true;
sem_post(&m_semaphore);
sem_wait(&m_stopped);
SetEvent(m_exit);
sem_destroy(&m_semaphore);
sem_destroy(&m_stopped);
CloseThread();
DeleteObject(m_exit);
DeleteObject(m_draw);
}
void GSRasterizerMT::Draw(const GSRasterizerData* data)
{
m_data = data;
sem_post(&m_semaphore);
SetEvent(m_draw);
}
void GSRasterizerMT::ThreadProc()
{
// _mm_setcsr(MXCSR);
HANDLE events[] = {m_exit, m_draw};
while(true)
{
sem_wait(&m_semaphore);
switch(WaitForMultipleObjects(countof(events), events, FALSE, INFINITE))
{
case WAIT_OBJECT_0 + 0: // exit
if(m_exit) break;
return;
case WAIT_OBJECT_0 + 1: // draw
__super::Draw(m_data);
@ -891,33 +897,37 @@ void GSRasterizerMT::ThreadProc()
#else
sem_post(&m_finished);
SetEvent(m_ready);
#endif
break;
}
}
sem_post(&m_stopped);
ASSERT(0);
}
//
GSRasterizerList::GSRasterizerList()
{
m_threadcount = 0;
sem_init(&m_finished, false, 0);
}
GSRasterizerList::~GSRasterizerList()
{
FreeRasterizers();
sem_destroy(&m_finished);
}
void GSRasterizerList::FreeRasterizers()
{
for(unsigned i=0; i<size(); ++i) delete (*this)[i];
for(size_t i = 0; i < size(); i++) delete (*this)[i];
clear();
for(size_t i = 0; i < m_ready.size(); i++) CloseHandle(m_ready[i]);
m_ready.clear();
}
void GSRasterizerList::Draw(const GSRasterizerData* data)
@ -941,10 +951,7 @@ void GSRasterizerList::Draw(const GSRasterizerData* data)
#else
for(size_t i = 1; i < size(); i++)
{
sem_wait(&m_finished);
}
WaitForMultipleObjects(m_ready.size(), &m_ready[0], TRUE, INFINITE);
#endif

View File

@ -27,9 +27,6 @@
#include "GSThread.h"
#include "GSAlignedClass.h"
#include "pthread.h"
#include "semaphore.h"
__aligned32 class GSRasterizerData
{
public:
@ -112,17 +109,16 @@ public:
class GSRasterizerMT : public GSRasterizer, private GSThread
{
protected:
sem_t& m_finished;
volatile long& m_sync;
sem_t m_semaphore;
sem_t m_stopped;
bool m_exit;
HANDLE m_exit;
HANDLE m_draw;
HANDLE m_ready;
const GSRasterizerData* m_data;
void ThreadProc();
public:
GSRasterizerMT(IDrawScanline* ds, int id, int threads, sem_t& finished, volatile long& sync);
GSRasterizerMT(IDrawScanline* ds, int id, int threads, HANDLE ready, volatile long& sync);
virtual ~GSRasterizerMT();
// IRasterizer
@ -133,8 +129,7 @@ public:
class GSRasterizerList : protected vector<IRasterizer*>, public IRasterizer
{
protected:
int m_threadcount;
sem_t m_finished;
std::vector<HANDLE> m_ready;
volatile long m_sync;
long m_syncstart;
GSRasterizerStats m_stats;
@ -148,7 +143,7 @@ public:
{
FreeRasterizers();
threads = max(threads, 1); // TODO: min(threads, number of cpu cores)
threads = std::max<int>(threads, 1); // TODO: min(threads, number of cpu cores)
push_back(new GSRasterizer(new DS(parent, 0), 0, threads));
@ -156,7 +151,11 @@ public:
for(int i = 1; i < threads; i++)
{
push_back(new GSRasterizerMT(new DS(parent, i), i, threads, m_finished, m_sync));
HANDLE ready = CreateEvent(NULL, FALSE, FALSE, NULL);
push_back(new GSRasterizerMT(new DS(parent, i), i, threads, ready, m_sync));
m_ready.push_back(ready);
_interlockedbittestandset(&m_syncstart, i);
}

View File

@ -70,11 +70,7 @@ GSRenderer::~GSRenderer()
bool GSRenderer::CreateWnd(const string& title, int w, int h)
{
if(!m_wnd.Create(title.c_str(), w, h))
{
return false;
}
return true;
return m_wnd.Create(title.c_str(), w, h);
}
bool GSRenderer::CreateDevice(GSDevice* dev)

View File

@ -373,7 +373,9 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
p.tex = t->m_buff;
p.clut = m_mem.m_clut;
p.tw = t->m_tw;
// p.tw = t->m_tw;
p.sel.tw = t->m_tw - 3;
}
p.sel.fge = PRIM->FGE;

View File

@ -63,6 +63,8 @@ union GSScanlineSelector
uint32 dthe:1; // 45
uint32 sprite:1; // 46
uint32 edge:1; // 47
uint32 tw:3; // 48 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
};
struct
@ -106,7 +108,7 @@ __aligned32 struct GSScanlineParam
void* vm;
const void* tex;
const uint32* clut;
uint32 tw;
//uint32 tw;
GSOffset* fbo;
GSOffset* zbo;
@ -120,7 +122,7 @@ __aligned32 struct GSScanlineEnvironment
void* vm;
const void* tex;
const uint32* clut;
uint32 tw;
//uint32 tw;
int* fbr;
int* zbr;

View File

@ -301,7 +301,9 @@ void GSSettingsDlg::UpdateControls()
bool allowHacks = !!theApp.GetConfig("allowHacks", 0);
int scaling = 1; // in case reading the combo doesn't work, enable the custom res control anyway
if (ComboBoxGetSelData(IDC_UPSCALE_MULTIPLIER, i)){
if(ComboBoxGetSelData(IDC_UPSCALE_MULTIPLIER, i))
{
scaling = (int)i;
}
@ -310,15 +312,12 @@ void GSSettingsDlg::UpdateControls()
bool dx9 = i >= 0 && i <= 2;
bool dx10 = i >= 3 && i <= 5;
bool dx11 = i >= 6 && i <= 8;
bool ogl = i >= 9 && i <= 12;
bool hw = i == 0 || i == 3 || i == 6 || i == 9;
bool sw = i == 1 || i == 4 || i == 7 || i == 10;
bool hw = i == 0 || i == 3 || i == 6;
bool sw = i == 1 || i == 4 || i == 7;
bool native = !!IsDlgButtonChecked(m_hWnd, IDC_NATIVERES);
ShowWindow(GetDlgItem(m_hWnd, IDC_LOGO9), dx9 ? SW_SHOW : SW_HIDE);
ShowWindow(GetDlgItem(m_hWnd, IDC_LOGO10), dx10 ? SW_SHOW : SW_HIDE);
// TODO: ShowWindow(GetDlgItem(m_hWnd, IDC_LOGO11), dx11 ? SW_SHOW : SW_HIDE);
// TODO: ShowWindow(GetDlgItem(m_hWnd, IDC_LOGO_OGL), ogl ? SW_SHOW : SW_HIDE);
ShowWindow(GetDlgItem(m_hWnd, IDC_LOGO11), dx10 ? SW_SHOW : SW_HIDE);
EnableWindow(GetDlgItem(m_hWnd, IDC_WINDOWED), dx9);
EnableWindow(GetDlgItem(m_hWnd, IDC_RESX), hw && !native && scaling == 1);

View File

@ -157,8 +157,7 @@ void GSSetupPrimCodeGenerator::Depth()
static const float half = 0.5f;
vmovss(xmm1, dword[&half]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
vbroadcastss(xmm1, dword[&half]);
vmulps(xmm1, xmm0);
vcvttps2dq(xmm1, xmm1);
vpslld(xmm1, 1);

View File

@ -2948,7 +2948,7 @@ public:
__forceinline explicit GSVector8i(__m128i m)
{
this->m = _mm256_insertf128_si256(_mm256_insertf128_si256(zero(), m, 0), m, 1);
this->m = zero().insert<0>(m).insert<1>(m);
}
__forceinline explicit GSVector8i(__m256i m)
@ -2970,7 +2970,7 @@ public:
__forceinline void operator = (__m128i m)
{
this->m = _mm256_insertf128_si256(_mm256_insertf128_si256(zero(), m, 0), m, 1);
this->m = zero().insert<0>(m).insert<1>(m);
}
__forceinline void operator = (__m256i m)
@ -2987,6 +2987,16 @@ public:
// TODO
template<int i> __forceinline GSVector4i extract() const
{
return GSVector4i(_mm256_extractf128_si256(m, i));
}
template<int i> __forceinline GSVector8i insert(__m128i m) const
{
return GSVector8i(_mm256_insertf128_si256(this->m, m, i));
}
__forceinline static GSVector8i zero()
{
return GSVector8i(_mm256_setzero_si256());
@ -3044,7 +3054,7 @@ public:
__forceinline GSVector8(__m128 m0, __m128 m1)
{
m = _mm256_insertf128_ps(_mm256_insertf128_ps(zero(), m0, 0), m1, 1);
m = zero().insert<0>(m0).insert<1>(m1);
}
__forceinline GSVector8(const GSVector8& v)
@ -3059,7 +3069,7 @@ public:
__forceinline explicit GSVector8(__m128 m)
{
this->m = _mm256_insertf128_ps(_mm256_insertf128_ps(zero(), m, 0), m, 1);
this->m = zero().insert<0>(m).insert<1>(m);
}
__forceinline explicit GSVector8(__m256 m)
@ -3081,7 +3091,7 @@ public:
__forceinline void operator = (__m128 m)
{
this->m = _mm256_insertf128_ps(_mm256_insertf128_ps(zero(), m, 0), m, 1);
this->m = zero().insert<0>(m).insert<1>(m);
}
__forceinline void operator = (__m256 m)
@ -3164,6 +3174,16 @@ public:
// TODO
__forceinline GSVector8 l2h() const
{
return insert<1>(extract<0>());
}
__forceinline GSVector8 h2l() const
{
return insert<0>(extract<1>());
}
__forceinline GSVector8 andnot(const GSVector8& v) const
{
return GSVector8(_mm256_andnot_ps(v.m, m));
@ -3189,13 +3209,26 @@ public:
return GSVector4(_mm256_extractf128_ps(m, i));
}
// TODO: insert
template<int i> __forceinline GSVector8 insert(__m128 m) const
{
return GSVector8(_mm256_insertf128_ps(this->m, m, i));
}
__forceinline static GSVector8 zero()
{
return GSVector8(_mm256_setzero_ps());
}
__forceinline static void zeroupper()
{
_mm256_zeroupper();
}
__forceinline static void zeroall()
{
_mm256_zeroall();
}
__forceinline static GSVector8 xffffffff()
{
return zero() == zero();

View File

@ -161,34 +161,149 @@ GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize)
static const float fmin = -FLT_MAX;
static const float fmax = FLT_MAX;
movss(xmm0, ptr[&fmax]);
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
if(m_cpu.has(util::Cpu::tAVX))
{
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movss(xmm1, ptr[&fmin]);
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
vbroadcastss(xmm4, ptr[&fmax]);
vbroadcastss(xmm5, ptr[&fmin]);
if(color)
{
// min.c = FLT_MAX;
// max.c = -FLT_MAX;
movaps(xmm2, xmm0);
movaps(xmm3, xmm1);
vmovaps(xmm2, xmm4);
vmovaps(xmm3, xmm5);
}
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movaps(xmm4, xmm0);
movaps(xmm5, xmm1);
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm0);
movaps(xmm7, xmm1);
vmovaps(xmm6, xmm4);
vmovaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
vmovaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + 32]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
for(int j = 0; j < n; j++)
{
if(color && (iip || j == n - 1))
{
// min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW)]);
vminps(xmm2, xmm0);
vmaxps(xmm3, xmm0);
}
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + 16]);
vminps(xmm4, xmm0);
vmaxps(xmm5, xmm0);
if(tme)
{
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + 32]);
if(!fst)
{
if(primclass != GS_SPRITE_CLASS)
{
vmovaps(xmm1, xmm0);
vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
vdivps(xmm0, xmm1);
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
}
vminps(xmm6, xmm0);
vmaxps(xmm7, xmm0);
}
}
add(edx, n * sizeof(GSVertexSW));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
vcvttps2dq(xmm2, xmm2);
vpsrld(xmm2, 7);
vmovaps(ptr[eax], xmm2);
vcvttps2dq(xmm3, xmm3);
vpsrld(xmm3, 7);
vmovaps(ptr[edx], xmm3);
}
vmovaps(ptr[eax + 16], xmm4);
vmovaps(ptr[edx + 16], xmm5);
if(tme)
{
vmovaps(ptr[eax + 32], xmm6);
vmovaps(ptr[edx + 32], xmm7);
}
}
else
{
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movss(xmm4, ptr[&fmax]);
movss(xmm5, ptr[&fmin]);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
// min.c = FLT_MAX;
// max.c = -FLT_MAX;
movaps(xmm2, xmm4);
movaps(xmm3, xmm5);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm4);
movaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
@ -280,6 +395,7 @@ L("loop");
movaps(ptr[eax + 32], xmm6);
movaps(ptr[edx + 32], xmm7);
}
}
ret();
}
@ -327,11 +443,161 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize)
static const float fmin = -FLT_MAX;
static const float fmax = FLT_MAX;
movss(xmm0, ptr[&fmax]);
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
if(m_cpu.has(util::Cpu::tAVX))
{
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movss(xmm1, ptr[&fmin]);
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
vbroadcastss(xmm4, ptr[&fmax]);
vbroadcastss(xmm5, ptr[&fmin]);
if(color)
{
// min.c = 0xffffffff;
// max.c = 0;
vpcmpeqd(xmm2, xmm2);
vpxor(xmm3, xmm3);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
vmovaps(xmm6, xmm4);
vmovaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
vmovaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + 16]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
for(int j = 0; j < n; j++)
{
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + 16]);
vminps(xmm4, xmm0);
vmaxps(xmm5, xmm0);
if(tme && !fst && primclass != GS_SPRITE_CLASS)
{
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
}
if(color && (iip || j == n - 1) || tme)
{
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9)]);
}
if(color && (iip || j == n - 1))
{
// min.c = min.c.min_u8(v[i + j].c);
// max.c = max.c.min_u8(v[i + j].c);
vpminub(xmm2, xmm0);
vpmaxub(xmm3, xmm0);
}
if(tme)
{
vshufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
if(!fst)
{
// t /= p.wwww();
vdivps(xmm0, xmm1);
}
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
vminps(xmm6, xmm0);
vmaxps(xmm7, xmm0);
}
}
add(edx, n * sizeof(GSVertexHW9));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
// m_min.c = cmin.zzzz().u8to32();
// m_max.c = cmax.zzzz().u8to32();
if(m_cpu.has(util::Cpu::tSSE41))
{
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
vpmovzxbd(xmm2, xmm2);
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
vpmovzxbd(xmm3, xmm3);
}
else
{
vpxor(xmm0, xmm0);
vpunpckhbw(xmm2, xmm0);
vpunpcklwd(xmm2, xmm0);
vpunpckhbw(xmm3, xmm0);
vpunpcklwd(xmm3, xmm0);
}
vmovaps(ptr[eax], xmm2);
vmovaps(ptr[edx], xmm3);
}
// m_min.p = pmin;
// m_max.p = pmax;
vmovaps(ptr[eax + 16], xmm4);
vmovaps(ptr[edx + 16], xmm5);
if(tme)
{
// m_min.t = tmin.xyww(pmin);
// m_max.t = tmax.xyww(pmax);
vshufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
vshufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
vmovaps(ptr[eax + 32], xmm6);
vmovaps(ptr[edx + 32], xmm7);
}
}
else
{
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movss(xmm4, ptr[&fmax]);
movss(xmm5, ptr[&fmin]);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
@ -342,19 +608,13 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize)
pxor(xmm3, xmm3);
}
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movaps(xmm4, xmm0);
movaps(xmm5, xmm1);
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm0);
movaps(xmm7, xmm1);
movaps(xmm6, xmm4);
movaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
@ -476,6 +736,7 @@ L("loop");
movaps(ptr[eax + 32], xmm6);
movaps(ptr[edx + 32], xmm7);
}
}
ret();
}
@ -521,11 +782,160 @@ GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize)
static const float fmin = -FLT_MAX;
static const float fmax = FLT_MAX;
movss(xmm0, ptr[&fmax]);
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
if(m_cpu.has(util::Cpu::tAVX))
{
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movss(xmm1, ptr[&fmin]);
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
vbroadcastss(xmm4, ptr[&fmax]);
vbroadcastss(xmm5, ptr[&fmin]);
if(color)
{
// min.c = 0xffffffff;
// max.c = 0;
vpcmpeqd(xmm2, xmm2);
vpxor(xmm3, xmm3);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
vmovaps(xmm6, xmm4);
vmovaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
for(int j = 0; j < n; j++)
{
if(color && (iip || j == n - 1) || tme)
{
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]);
}
if(color && (iip || j == n - 1))
{
vpminub(xmm2, xmm0);
vpmaxub(xmm3, xmm0);
}
if(tme)
{
if(!fst)
{
vmovaps(xmm1, xmm0);
}
vshufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
if(!fst)
{
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
vdivps(xmm0, xmm1);
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
}
vminps(xmm6, xmm0);
vmaxps(xmm7, xmm0);
}
vmovdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]);
if(m_cpu.has(util::Cpu::tSSE41))
{
vpmovzxwd(xmm1, xmm0);
}
else
{
vpunpcklwd(xmm1, xmm0, xmm0);
vpsrld(xmm1, 16);
}
vpsrld(xmm0, 1);
vpunpcklqdq(xmm1, xmm0);
vcvtdq2ps(xmm1, xmm1);
vminps(xmm4, xmm1);
vmaxps(xmm5, xmm1);
}
add(edx, n * sizeof(GSVertexHW11));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
// m_min.c = cmin.zzzz().u8to32();
// m_max.c = cmax.zzzz().u8to32();
if(m_cpu.has(util::Cpu::tSSE41))
{
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
vpmovzxbd(xmm2, xmm2);
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
vpmovzxbd(xmm3, xmm3);
}
else
{
vpxor(xmm0, xmm0);
vpunpckhbw(xmm2, xmm0);
vpunpcklwd(xmm2, xmm0);
vpunpckhbw(xmm3, xmm0);
vpunpcklwd(xmm3, xmm0);
}
vmovaps(ptr[eax], xmm2);
vmovaps(ptr[edx], xmm3);
}
// m_min.p = pmin.xyww();
// m_max.p = pmax.xyww();
vshufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
vshufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
vmovaps(ptr[eax + 16], xmm4);
vmovaps(ptr[edx + 16], xmm5);
if(tme)
{
// m_min.t = tmin;
// m_max.t = tmax;
vmovaps(ptr[eax + 32], xmm6);
vmovaps(ptr[edx + 32], xmm7);
}
}
else
{
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movss(xmm4, ptr[&fmax]);
movss(xmm5, ptr[&fmin]);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
@ -536,19 +946,13 @@ GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize)
pxor(xmm3, xmm3);
}
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movaps(xmm4, xmm0);
movaps(xmm5, xmm1);
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm0);
movaps(xmm7, xmm1);
movaps(xmm6, xmm4);
movaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
@ -669,6 +1073,7 @@ L("loop");
movaps(ptr[eax + 32], xmm6);
movaps(ptr[edx + 32], xmm7);
}
}
ret();
}

View File

@ -40,6 +40,8 @@ __aligned32 class GSVertexTrace
class CGSW : public Xbyak::CodeGenerator
{
Xbyak::util::Cpu m_cpu;
public:
CGSW(uint32 key, void* code, size_t maxsize);
};

View File

@ -42,13 +42,13 @@ LRESULT CALLBACK GSWnd::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM l
{
wnd = (GSWnd*)((LPCREATESTRUCT)lParam)->lpCreateParams;
SetWindowLongPtr(hWnd, GWL_USERDATA, (LONG_PTR)wnd);
SetWindowLongPtr(hWnd, GWLP_USERDATA, (LONG_PTR)wnd);
wnd->m_hWnd = hWnd;
}
else
{
wnd = (GSWnd*)GetWindowLongPtr(hWnd, GWL_USERDATA);
wnd = (GSWnd*)GetWindowLongPtr(hWnd, GWLP_USERDATA);
}
if(wnd == NULL)

View File

@ -41,3 +41,30 @@ EXPORTS
GSReplay
GSBenchmark
GSgetTitleInfo2
PSEgetLibType
PSEgetLibName
PSEgetLibVersion
GPUinit
GPUshutdown
GPUopen
GPUclose
GPUconfigure
GPUabout
GPUtest
GPUwriteData
GPUwriteStatus
GPUreadData
GPUreadStatus
GPUdmaChain
GPUgetMode
GPUsetMode
GPUupdateLace
GPUmakeSnapshot
GPUwriteDataMem
GPUreadDataMem
GPUdisplayText
GPUdisplayFlags
GPUfreeze
GPUshowScreenPic
GPUgetScreenPic
GPUcursor

View File

@ -76,7 +76,7 @@ STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSM
CAPTION "Settings..."
FONT 8, "MS Shell Dlg", 400, 0, 0x1
BEGIN
CONTROL 2022,IDC_LOGO10,"Static",SS_BITMAP,7,7,175,42
CONTROL 2022,IDC_LOGO11,"Static",SS_BITMAP,7,7,175,42
CONTROL 2021,IDC_LOGO9,"Static",SS_BITMAP,7,7,175,44
LTEXT "Resolution:",IDC_STATIC,7,58,37,8
COMBOBOX IDC_RESOLUTION,71,56,111,125,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
@ -155,7 +155,7 @@ BEGIN
CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,161,11,14
DEFPUSHBUTTON "OK",IDOK,43,178,50,14
PUSHBUTTON "Cancel",IDCANCEL,96,178,50,14
CONTROL 2022,IDC_LOGO10,"Static",SS_BITMAP,7,7,173,42
CONTROL 2022,IDC_LOGO11,"Static",SS_BITMAP,7,7,173,42
LTEXT "Internal Resolution:",IDC_STATIC,7,135,64,8
COMBOBOX IDC_SCALE,78,132,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
CONTROL "Windowed",IDC_WINDOWED,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,129,157,49,10
@ -166,7 +166,7 @@ STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSM
CAPTION "Settings..."
FONT 8, "MS Shell Dlg", 400, 0, 0x1
BEGIN
CONTROL 2022,IDC_LOGO10,"Static",SS_BITMAP,6,6,173,42
CONTROL 2022,IDC_LOGO11,"Static",SS_BITMAP,6,6,173,42
DEFPUSHBUTTON "OK",IDOK,41,312,50,14
LTEXT "Renderer:",IDC_STATIC,6,57,34,8
COMBOBOX IDC_RENDERER,70,55,111,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP

View File

@ -212,7 +212,6 @@
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -224,8 +223,6 @@
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\release.props" />
<Import Project="..\..\common\vsprops\3rdpartyDeps.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -234,8 +231,6 @@
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\release.props" />
<Import Project="..\..\common\vsprops\3rdpartyDeps.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -244,8 +239,6 @@
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\debug.props" />
<Import Project="..\..\common\vsprops\3rdpartyDeps.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -254,8 +247,6 @@
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\debug.props" />
<Import Project="..\..\common\vsprops\3rdpartyDeps.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -264,8 +255,6 @@
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\debug.props" />
<Import Project="..\..\common\vsprops\3rdpartyDeps.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -274,8 +263,6 @@
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\release.props" />
<Import Project="..\..\common\vsprops\3rdpartyDeps.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Devel|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -284,8 +271,6 @@
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\devel.props" />
<Import Project="..\..\common\vsprops\3rdpartyDeps.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release SSE2|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -294,8 +279,6 @@
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\release.props" />
<Import Project="..\..\common\vsprops\3rdpartyDeps.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -304,76 +287,79 @@
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\debug.props" />
<Import Project="..\..\common\vsprops\3rdpartyDeps.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Template|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\release.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\debug.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\sse4.props" />
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\release.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\sse4.props" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx.props" />
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\release.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\sse4.props" />
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\debug.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\sse4.props" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx.props" />
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\debug.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\ssse3.props" />
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\debug.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\ssse3.props" />
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\release.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\sse2.props" />
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\release.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\sse2.props" />
<Import Project="vsprops\common.props" />
<Import Project="..\..\common\vsprops\BaseProperties.props" />
<Import Project="vsprops\debug.props" />
<Import Project="..\..\common\vsprops\pthreads.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
@ -702,6 +688,15 @@
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="GPU.cpp" />
<ClCompile Include="GPUDrawScanline.cpp" />
<ClCompile Include="GPUDrawScanlineCodeGenerator.cpp" />
<ClCompile Include="GPULocalMemory.cpp" />
<ClCompile Include="GPURenderer.cpp" />
<ClCompile Include="GPURendererSW.cpp" />
<ClCompile Include="GPUSettingsDlg.cpp" />
<ClCompile Include="GPUSetupPrimCodeGenerator.cpp" />
<ClCompile Include="GPUState.cpp" />
<ClCompile Include="GS.cpp">
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">AssemblyAndSourceCode</AssemblerOutput>
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
@ -1746,6 +1741,18 @@
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="GPU.h" />
<ClInclude Include="GPUDrawingEnvironment.h" />
<ClInclude Include="GPUDrawScanline.h" />
<ClInclude Include="GPUDrawScanlineCodeGenerator.h" />
<ClInclude Include="GPULocalMemory.h" />
<ClInclude Include="GPURenderer.h" />
<ClInclude Include="GPURendererSW.h" />
<ClInclude Include="GPUScanlineEnvironment.h" />
<ClInclude Include="GPUSettingsDlg.h" />
<ClInclude Include="GPUSetupPrimCodeGenerator.h" />
<ClInclude Include="GPUState.h" />
<ClInclude Include="GPUVertex.h" />
<ClInclude Include="GS.h" />
<ClInclude Include="GSAlignedClass.h" />
<ClInclude Include="GSBlock.h" />
@ -1853,16 +1860,6 @@
<ItemGroup>
<ResourceCompile Include="GSdx.rc" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\3rdparty\w32pthreads\pthreads.vcxproj">
<Project>{26511268-2902-4997-8421-ecd7055f9e28}</Project>
<ReferenceOutputAssembly>false</ReferenceOutputAssembly>
</ProjectReference>
<ProjectReference Include="..\..\3rdparty\w32pthreads\pthreads_lib.vcxproj">
<Project>{7e9b2be7-cec3-4f14-847b-0ab8d562fb86}</Project>
<ReferenceOutputAssembly>false</ReferenceOutputAssembly>
</ProjectReference>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>

View File

@ -249,6 +249,33 @@
<ClCompile Include="baseclasses\wxutil.cpp">
<Filter>Baseclasses</Filter>
</ClCompile>
<ClCompile Include="GPU.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GPUDrawScanline.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GPUDrawScanlineCodeGenerator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GPULocalMemory.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GPURenderer.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GPURendererSW.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GPUSettingsDlg.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GPUSetupPrimCodeGenerator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GPUState.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="GS.h">
@ -527,11 +554,44 @@
<ClInclude Include="xbyak\xbyak_util.h">
<Filter>Xbyak</Filter>
</ClInclude>
<ClInclude Include="GPU.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GPUDrawingEnvironment.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GPUDrawScanline.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GPUDrawScanlineCodeGenerator.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GPULocalMemory.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GPURenderer.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GPURendererSW.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GPUScanlineEnvironment.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GPUSettingsDlg.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GPUSetupPrimCodeGenerator.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GPUState.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GPUVertex.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="GSdx.def">
<Filter>Resource Files</Filter>
</None>
<None Include="res\logo10.bmp">
<Filter>Resource Files</Filter>
</None>
@ -556,6 +616,7 @@
<None Include="baseclasses\activex.ver">
<Filter>Baseclasses</Filter>
</None>
<None Include="GSdx.def" />
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="GSdx.rc">

View File

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="windows-1250"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Version="9,00"
Name="GSdx"
ProjectGUID="{18E42F6F-3A62-41EE-B42F-79366C4F1E95}"
RootNamespace="GSdx"
@ -22,7 +22,7 @@
<Configuration
Name="Debug SSE2|Win32"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\ProjectRootDir.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\debug.vsprops;.\vsprops\sse2.vsprops;..\..\common\vsprops\3rdpartyDeps.vsprops;..\..\common\vsprops\pthreads.vsprops"
InheritedPropertySheets=".\vsprops\ProjectRootDir.vsprops;.\vsprops\sse2.vsprops;.\vsprops\common.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\debug.vsprops"
UseOfMFC="0"
CharacterSet="2"
>
@ -88,7 +88,7 @@
OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;..\..\common\vsprops\pthreads.vsprops"
InheritedPropertySheets=".\vsprops\ProjectRootDir.vsprops;.\vsprops\common.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\debug.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="0"
@ -150,9 +150,8 @@
</Configuration>
<Configuration
Name="Release SSE2|Win32"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\ProjectRootDir.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\release.vsprops;.\vsprops\sse2.vsprops;..\..\common\vsprops\3rdpartyDeps.vsprops;..\..\common\vsprops\pthreads.vsprops"
InheritedPropertySheets=".\vsprops\ProjectRootDir.vsprops;.\vsprops\sse2.vsprops;.\vsprops\common.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\release.vsprops"
UseOfMFC="0"
CharacterSet="2"
WholeProgramOptimization="1"
@ -218,7 +217,7 @@
OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;..\..\common\vsprops\pthreads.vsprops"
InheritedPropertySheets=".\vsprops\ProjectRootDir.vsprops;.\vsprops\common.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\release.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="1"
@ -281,7 +280,7 @@
<Configuration
Name="Release SSSE3|Win32"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\ProjectRootDir.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\release.vsprops;.\vsprops\ssse3.vsprops;..\..\common\vsprops\3rdpartyDeps.vsprops;..\..\common\vsprops\pthreads.vsprops"
InheritedPropertySheets=".\vsprops\ProjectRootDir.vsprops;.\vsprops\ssse3.vsprops;.\vsprops\common.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\release.vsprops"
UseOfMFC="0"
CharacterSet="2"
WholeProgramOptimization="1"
@ -347,7 +346,7 @@
OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;.\vsprops\ssse3.vsprops;..\..\common\vsprops\pthreads.vsprops"
InheritedPropertySheets=".\vsprops\ProjectRootDir.vsprops;.\vsprops\ssse3.vsprops;.\vsprops\common.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\release.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="1"
@ -410,7 +409,7 @@
<Configuration
Name="Debug SSSE3|Win32"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\ProjectRootDir.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\debug.vsprops;.\vsprops\ssse3.vsprops;..\..\common\vsprops\3rdpartyDeps.vsprops;..\..\common\vsprops\pthreads.vsprops"
InheritedPropertySheets=".\vsprops\ProjectRootDir.vsprops;.\vsprops\ssse3.vsprops;.\vsprops\common.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\debug.vsprops"
UseOfMFC="0"
CharacterSet="2"
>
@ -476,7 +475,7 @@
OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;.\vsprops\ssse3.vsprops;..\..\common\vsprops\pthreads.vsprops"
InheritedPropertySheets=".\vsprops\ProjectRootDir.vsprops;.\vsprops\ssse3.vsprops;.\vsprops\common.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\debug.vsprops"
UseOfMFC="1"
CharacterSet="2"
>
@ -538,7 +537,7 @@
<Configuration
Name="Debug SSE4|Win32"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\ProjectRootDir.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\debug.vsprops;.\vsprops\sse4.vsprops;..\..\common\vsprops\3rdpartyDeps.vsprops;..\..\common\vsprops\pthreads.vsprops"
InheritedPropertySheets=".\vsprops\ProjectRootDir.vsprops;.\vsprops\sse4.vsprops;.\vsprops\common.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\debug.vsprops"
UseOfMFC="0"
CharacterSet="2"
>
@ -604,7 +603,7 @@
OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;.\vsprops\sse4.vsprops;..\..\common\vsprops\pthreads.vsprops"
InheritedPropertySheets=".\vsprops\ProjectRootDir.vsprops;.\vsprops\sse4.vsprops;.\vsprops\common.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\debug.vsprops"
UseOfMFC="1"
CharacterSet="2"
>
@ -667,7 +666,7 @@
<Configuration
Name="Release SSE4|Win32"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\sse4.vsprops;.\vsprops\ProjectRootDir.vsprops;..\..\common\vsprops\3rdpartyDeps.vsprops;..\..\common\vsprops\pthreads.vsprops"
InheritedPropertySheets=".\vsprops\ProjectRootDir.vsprops;.\vsprops\sse4.vsprops;.\vsprops\common.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\release.vsprops"
UseOfMFC="0"
CharacterSet="2"
WholeProgramOptimization="1"
@ -733,7 +732,7 @@
OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;.\vsprops\sse4.vsprops;..\..\common\vsprops\pthreads.vsprops"
InheritedPropertySheets=".\vsprops\ProjectRootDir.vsprops;.\vsprops\sse4.vsprops;.\vsprops\common.vsprops;..\..\common\vsprops\BaseProperties.vsprops;.\vsprops\release.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="1"
@ -858,7 +857,6 @@
OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops"
UseOfMFC="1"
CharacterSet="2"
>
@ -924,7 +922,6 @@
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="..\..\common\vsprops\pthreads.vsprops"
>
<Tool
Name="VCPreBuildEventTool"
@ -984,7 +981,6 @@
OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;..\..\common\vsprops\pthreads.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="1"
@ -1055,6 +1051,42 @@
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath=".\GPU.cpp"
>
</File>
<File
RelativePath=".\GPUDrawScanline.cpp"
>
</File>
<File
RelativePath=".\GPUDrawScanlineCodeGenerator.cpp"
>
</File>
<File
RelativePath=".\GPULocalMemory.cpp"
>
</File>
<File
RelativePath=".\GPURenderer.cpp"
>
</File>
<File
RelativePath=".\GPURendererSW.cpp"
>
</File>
<File
RelativePath=".\GPUSettingsDlg.cpp"
>
</File>
<File
RelativePath=".\GPUSetupPrimCodeGenerator.cpp"
>
</File>
<File
RelativePath=".\GPUState.cpp"
>
</File>
<File
RelativePath=".\GS.cpp"
>
@ -1557,6 +1589,54 @@
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath=".\GPU.h"
>
</File>
<File
RelativePath=".\GPUDrawingEnvironment.h"
>
</File>
<File
RelativePath=".\GPUDrawScanline.h"
>
</File>
<File
RelativePath=".\GPUDrawScanlineCodeGenerator.h"
>
</File>
<File
RelativePath=".\GPULocalMemory.h"
>
</File>
<File
RelativePath=".\GPURenderer.h"
>
</File>
<File
RelativePath=".\GPURendererSW.h"
>
</File>
<File
RelativePath=".\GPUScanlineEnvironment.h"
>
</File>
<File
RelativePath=".\GPUSettingsDlg.h"
>
</File>
<File
RelativePath=".\GPUSetupPrimCodeGenerator.h"
>
</File>
<File
RelativePath=".\GPUState.h"
>
</File>
<File
RelativePath=".\GPUVertex.h"
>
</File>
<File
RelativePath=".\GS.h"
>

View File

@ -28,7 +28,7 @@
#define IDB_LOGO10 2022
#define IDC_FBA 2023
#define IDC_LOGO9 2024
#define IDC_LOGO10 2025
#define IDC_LOGO11 2025
#define IDD_CAPTURE 2026
#define IDD_GPUCONFIG 2027
#define IDC_BLUR 2028

View File

@ -19,7 +19,7 @@
AdditionalDependencies="JITProfiling.lib d3d11.lib d3dx11.lib d3d10_1.lib d3dx10.lib d3d9.lib d3dx9.lib dxguid.lib winmm.lib strmiids.lib xinput.lib"
OutputFile="$(OutDir)\$(ProjectName)-$(SSEtype).dll"
AdditionalLibraryDirectories="./vtune"
DelayLoadDLLs="d3d9.dll;d3dx9_42.dll;d3d11.dll;d3dx11_42.dll"
DelayLoadDLLs="d3d9.dll;d3dx9_43.dll;d3d11.dll;d3dx11_43.dll"
GenerateDebugInformation="true"
SubSystem="2"
RandomizedBaseAddress="1"

Binary file not shown.

View File

@ -4,9 +4,9 @@
@file xbyak.h
@brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
@author herumi
@version $Revision: 1.238 $
@version $Revision: 1.239 $
@url http://homepage1.nifty.com/herumi/soft/xbyak.html
@date $Date: 2011/02/04 03:46:09 $
@date $Date: 2011/02/07 06:09:35 $
@note modified new BSD license
http://www.opensource.org/licenses/bsd-license.php
*/
@ -56,7 +56,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x2990, /* 0xABCD = A.BC(D) */
VERSION = 0x2991, /* 0xABCD = A.BC(D) */
};
/*
#ifndef MIE_INTEGER_TYPE_DEFINED

View File

@ -1,4 +1,4 @@
const char *getVersionString() const { return "2.99"; }
const char *getVersionString() const { return "2.991"; }
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
@ -943,7 +943,7 @@ void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, MM_0F, 0xAE, f
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); }
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }
void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }
void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), MM_0F | PP_66, 0xC5, false); db(imm); }
void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); }
void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }
void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); }

View File

@ -44,6 +44,10 @@
#endif
#endif
#ifdef _MSC_VER
extern "C" unsigned __int64 __xgetbv(int);
#endif
namespace Xbyak { namespace util {
/**
@ -62,6 +66,16 @@ public:
__cpuid(reinterpret_cast<int*>(data), eaxIn);
#else
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
#endif
}
static inline uint64 getXfeature()
{
#ifdef _MSC_VER
return __xgetbv(0);
#else
unsigned int eax, edx;
__asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
return ((uint64)edx << 32) | eax;
#endif
}
enum Type {
@ -121,12 +135,16 @@ public:
if (data[2] & (1U << 25)) type_ |= tAESNI;
if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ;
if (data[2] & (1U << 27)) type_ |= tOSXSACE;
#if _M_SSE >= 0x500
// QQQ
// should check XFEATURE_ENABLED_MASK[2:1] = '11b' by xgetvb
if (type_ & tOSXSACE) {
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
uint64 bv = getXfeature();
if ((bv & 6) == 6) {
if (data[2] & (1U << 28)) type_ |= tAVX;
if (data[2] & (1U << 12)) type_ |= tFMA;
#endif
}
}
if (data[3] & (1U << 15)) type_ |= tCMOV;
if (data[3] & (1U << 23)) type_ |= tMMX;
if (data[3] & (1U << 25)) type_ |= tMMX2 | tSSE;