mirror of https://github.com/PCSX2/pcsx2.git
Improved SSE detection:
* SSE3 detection via cpuid implemented. [fixes Linux -- the force_sse3 option should no longer be needed!] * Instruction tests are now done for SSE3, SSE4, and SSE4.1 to confirm cpuid results (I doubt this is necessary, but the old code did it for SSE3, so I figured I'd keep it and log results anytime an inconsistency is detected). * SSE4.2 and SSE4a detection added. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1086 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
6e82c77e92
commit
41079415fa
|
@ -135,13 +135,15 @@ void SysDetect()
|
||||||
"\t%sDetected SSE2\n"
|
"\t%sDetected SSE2\n"
|
||||||
"\t%sDetected SSE3\n"
|
"\t%sDetected SSE3\n"
|
||||||
"\t%sDetected SSSE3\n"
|
"\t%sDetected SSSE3\n"
|
||||||
"\t%sDetected SSE4.1\n", params
|
"\t%sDetected SSE4.1\n"
|
||||||
|
"\t%sDetected SSE4.2\n", params
|
||||||
cpucaps.hasMultimediaExtensions ? "" : "Not ",
|
cpucaps.hasMultimediaExtensions ? "" : "Not ",
|
||||||
cpucaps.hasStreamingSIMDExtensions ? "" : "Not ",
|
cpucaps.hasStreamingSIMDExtensions ? "" : "Not ",
|
||||||
cpucaps.hasStreamingSIMD2Extensions ? "" : "Not ",
|
cpucaps.hasStreamingSIMD2Extensions ? "" : "Not ",
|
||||||
cpucaps.hasStreamingSIMD3Extensions ? "" : "Not ",
|
cpucaps.hasStreamingSIMD3Extensions ? "" : "Not ",
|
||||||
cpucaps.hasSupplementalStreamingSIMD3Extensions ? "" : "Not ",
|
cpucaps.hasSupplementalStreamingSIMD3Extensions ? "" : "Not ",
|
||||||
cpucaps.hasStreamingSIMD4Extensions ? "" : "Not "
|
cpucaps.hasStreamingSIMD4Extensions ? "" : "Not ",
|
||||||
|
cpucaps.hasStreamingSIMD4Extensions2 ? "" : "Not "
|
||||||
);
|
);
|
||||||
|
|
||||||
if ( cpuinfo.x86ID[0] == 'A' ) //AMD cpu
|
if ( cpuinfo.x86ID[0] == 'A' ) //AMD cpu
|
||||||
|
@ -150,10 +152,12 @@ void SysDetect()
|
||||||
WriteLn(
|
WriteLn(
|
||||||
"\t%sDetected MMX2\n"
|
"\t%sDetected MMX2\n"
|
||||||
"\t%sDetected 3DNOW\n"
|
"\t%sDetected 3DNOW\n"
|
||||||
"\t%sDetected 3DNOW2\n", params
|
"\t%sDetected 3DNOW2\n"
|
||||||
|
"\t%sDetected SSE4a\n", params
|
||||||
cpucaps.hasMultimediaExtensionsExt ? "" : "Not ",
|
cpucaps.hasMultimediaExtensionsExt ? "" : "Not ",
|
||||||
cpucaps.has3DNOWInstructionExtensions ? "" : "Not ",
|
cpucaps.has3DNOWInstructionExtensions ? "" : "Not ",
|
||||||
cpucaps.has3DNOWInstructionExtensionsExt ? "" : "Not "
|
cpucaps.has3DNOWInstructionExtensionsExt ? "" : "Not ",
|
||||||
|
cpucaps.hasStreamingSIMD4ExtensionsA ? "" : "Not "
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,8 @@
|
||||||
|
|
||||||
#include "RedtapeWindows.h"
|
#include "RedtapeWindows.h"
|
||||||
|
|
||||||
|
using namespace x86Emitter;
|
||||||
|
|
||||||
#if defined (_MSC_VER) && _MSC_VER >= 1400
|
#if defined (_MSC_VER) && _MSC_VER >= 1400
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
|
@ -148,31 +150,29 @@ u64 GetCPUTick( void )
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Note: This function doesn't support GCC/Linux. Looking online it seems the only
|
// Note: This function doesn't support GCC/Linux. Looking online it seems the only
|
||||||
// way to simulate the Micrsoft SEH model is to use unix signals, and the 'sigaction'
|
// way to simulate the Micrsoft SEH model is to use unix signals, and the 'sigaction'
|
||||||
// function specifically. Maybe a project for a linux developer at a later date. :)
|
// function specifically. Maybe a project for a linux developer at a later date. :)
|
||||||
void cpudetectSSE3(void* pfnCallSSE3)
|
|
||||||
{
|
|
||||||
cpucaps.hasStreamingSIMD3Extensions = 1;
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
|
static bool _test_instruction( void* pfnCall )
|
||||||
|
{
|
||||||
__try {
|
__try {
|
||||||
((void (*)())pfnCallSSE3)();
|
((void (*)())pfnCall)();
|
||||||
}
|
}
|
||||||
__except(EXCEPTION_EXECUTE_HANDLER) {
|
__except(EXCEPTION_EXECUTE_HANDLER) {
|
||||||
cpucaps.hasStreamingSIMD3Extensions = 0;
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
#else // linux
|
|
||||||
|
|
||||||
#ifdef PCSX2_FORCESSE3
|
static char* bool_to_char( bool testcond )
|
||||||
cpucaps.hasStreamingSIMD3Extensions = 1;
|
{
|
||||||
#else
|
return testcond ? "true" : "false";
|
||||||
// exception handling doesn't work, so disable for x86 builds of linux
|
|
||||||
cpucaps.hasStreamingSIMD3Extensions = 0;
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined __LINUX__
|
#if defined __LINUX__
|
||||||
|
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
|
@ -180,6 +180,8 @@ void cpudetectSSE3(void* pfnCallSSE3)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
s64 CPUSpeedHz( unsigned int time )
|
s64 CPUSpeedHz( unsigned int time )
|
||||||
{
|
{
|
||||||
s64 timeStart,
|
s64 timeStart,
|
||||||
|
@ -200,6 +202,7 @@ s64 CPUSpeedHz( unsigned int time )
|
||||||
{
|
{
|
||||||
timeStart = timeGetTime( );
|
timeStart = timeGetTime( );
|
||||||
}
|
}
|
||||||
|
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
timeStop = timeGetTime( );
|
timeStop = timeGetTime( );
|
||||||
|
@ -294,6 +297,7 @@ void cpudetectInit()
|
||||||
if ( iCpuId( 0x80000001, regs ) != -1 )
|
if ( iCpuId( 0x80000001, regs ) != -1 )
|
||||||
{
|
{
|
||||||
x86_64_12BITBRANDID = regs[1] & 0xfff;
|
x86_64_12BITBRANDID = regs[1] & 0xfff;
|
||||||
|
cpuinfo.x86EFlags2 = regs[ 2 ];
|
||||||
cpuinfo.x86EFlags = regs[ 3 ];
|
cpuinfo.x86EFlags = regs[ 3 ];
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -364,40 +368,85 @@ void cpudetectInit()
|
||||||
cpucaps.hasMultiThreading = ( cpuinfo.x86Flags >> 28 ) & 1;
|
cpucaps.hasMultiThreading = ( cpuinfo.x86Flags >> 28 ) & 1;
|
||||||
cpucaps.hasThermalMonitor = ( cpuinfo.x86Flags >> 29 ) & 1;
|
cpucaps.hasThermalMonitor = ( cpuinfo.x86Flags >> 29 ) & 1;
|
||||||
cpucaps.hasIntel64BitArchitecture = ( cpuinfo.x86Flags >> 30 ) & 1;
|
cpucaps.hasIntel64BitArchitecture = ( cpuinfo.x86Flags >> 30 ) & 1;
|
||||||
|
|
||||||
//that is only for AMDs
|
//that is only for AMDs
|
||||||
cpucaps.hasMultimediaExtensionsExt = ( cpuinfo.x86EFlags >> 22 ) & 1; //mmx2
|
cpucaps.hasMultimediaExtensionsExt = ( cpuinfo.x86EFlags >> 22 ) & 1; //mmx2
|
||||||
cpucaps.hasAMD64BitArchitecture = ( cpuinfo.x86EFlags >> 29 ) & 1; //64bit cpu
|
cpucaps.hasAMD64BitArchitecture = ( cpuinfo.x86EFlags >> 29 ) & 1; //64bit cpu
|
||||||
cpucaps.has3DNOWInstructionExtensionsExt = ( cpuinfo.x86EFlags >> 30 ) & 1; //3dnow+
|
cpucaps.has3DNOWInstructionExtensionsExt = ( cpuinfo.x86EFlags >> 30 ) & 1; //3dnow+
|
||||||
cpucaps.has3DNOWInstructionExtensions = ( cpuinfo.x86EFlags >> 31 ) & 1; //3dnow
|
cpucaps.has3DNOWInstructionExtensions = ( cpuinfo.x86EFlags >> 31 ) & 1; //3dnow
|
||||||
|
cpucaps.hasStreamingSIMD4ExtensionsA = ( cpuinfo.x86EFlags2 >> 6 ) & 1; //INSERTQ / EXTRQ / MOVNT
|
||||||
|
|
||||||
cpuinfo.cpuspeed = (u32)(CPUSpeedHz( 1000 ) / 1000000);
|
|
||||||
|
|
||||||
// --> SSE 4.1 detection <--
|
cpuinfo.cpuspeed = (u32)(CPUSpeedHz( 600 ) / 1000000);
|
||||||
// We don't care about the small subset of CPUs using SSE4 (which is also hard to
|
|
||||||
// detect, in addition to being of limited use due to the abbreviated instruction set).
|
|
||||||
// So we'll just leave it at SSE 4.1. SSE4 cpu detection is ignored.
|
|
||||||
|
|
||||||
cpucaps.hasStreamingSIMD4Extensions = ( cpuinfo.x86Flags2 >> 19 ) & 1; //sse4.1
|
// --> SSE3 / SSSE3 / SSE4.1 / SSE 4.2 detection <--
|
||||||
|
|
||||||
// --> SSSE3 detection <--
|
|
||||||
|
|
||||||
|
cpucaps.hasStreamingSIMD3Extensions = ( cpuinfo.x86Flags2 >> 0 ) & 1; //sse3
|
||||||
cpucaps.hasSupplementalStreamingSIMD3Extensions = ( cpuinfo.x86Flags2 >> 9 ) & 1; //ssse3
|
cpucaps.hasSupplementalStreamingSIMD3Extensions = ( cpuinfo.x86Flags2 >> 9 ) & 1; //ssse3
|
||||||
|
cpucaps.hasStreamingSIMD4Extensions = ( cpuinfo.x86Flags2 >> 19 ) & 1; //sse4.1
|
||||||
|
cpucaps.hasStreamingSIMD4Extensions2 = ( cpuinfo.x86Flags2 >> 20 ) & 1; //sse4.2
|
||||||
|
|
||||||
// --> SSE3 detection <--
|
// Can the SSE3 / SSE4.1 bits be trusted? Using an instruction test is a very "complete"
|
||||||
// These instructions may not be recognized by some compilers, or may not have
|
// approach to ensuring the bit is accurate, and at least one reported case of a Q9550 not
|
||||||
// intrinsic equivalents available. So we use our own ix86 emitter to generate
|
// having SSE 4.1 set but still supporting it properly is fixed by this --air
|
||||||
// some code and run it that way. :)
|
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
u8* recSSE = (u8*)HostSys::Mmap( NULL, 0x1000 );
|
u8* recSSE = (u8*)HostSys::Mmap( NULL, 0x1000 );
|
||||||
if( recSSE != NULL )
|
if( recSSE != NULL )
|
||||||
{
|
{
|
||||||
x86SetPtr(recSSE);
|
xSetPtr( recSSE );
|
||||||
SSE3_MOVSLDUP_XMM_to_XMM(XMM0, XMM0);
|
xMOVSLDUP( xmm1, xmm0 );
|
||||||
RET();
|
RET();
|
||||||
cpudetectSSE3(recSSE);
|
|
||||||
|
u8* funcSSSE3 = xGetPtr();
|
||||||
|
xPABS.W( xmm0, xmm1 );
|
||||||
|
RET();
|
||||||
|
|
||||||
|
u8* funcSSE41 = xGetPtr();
|
||||||
|
xBLEND.VPD( xmm1, xmm0 );
|
||||||
|
RET();
|
||||||
|
|
||||||
|
bool sse3_result = _test_instruction( recSSE ); // sse3
|
||||||
|
bool ssse3_result = _test_instruction( funcSSSE3 );
|
||||||
|
bool sse41_result = _test_instruction( funcSSE41 );
|
||||||
|
|
||||||
HostSys::Munmap( recSSE, 0x1000 );
|
HostSys::Munmap( recSSE, 0x1000 );
|
||||||
|
|
||||||
|
// Test for and log any irregularities here.
|
||||||
|
// We take the instruction test result over cpuid since (in theory) it should be a
|
||||||
|
// more reliable gauge of the cpu's actual ability.
|
||||||
|
|
||||||
|
if( sse3_result != cpucaps.hasStreamingSIMD3Extensions )
|
||||||
|
{
|
||||||
|
Console::Notice( "SSE3 Detection Inconsistency: cpuid=%s, test_result=%s",
|
||||||
|
params bool_to_char( cpucaps.hasStreamingSIMD3Extensions ), bool_to_char( sse3_result ) );
|
||||||
|
|
||||||
|
cpucaps.hasStreamingSIMD3Extensions = sse3_result;
|
||||||
}
|
}
|
||||||
else { Console::Error("Error: Failed to allocate memory for SSE3 State detection."); }
|
|
||||||
|
if( ssse3_result != cpucaps.hasSupplementalStreamingSIMD3Extensions )
|
||||||
|
{
|
||||||
|
Console::Notice( "SSSE3 Detection Inconsistency: cpuid=%s, test_result=%s",
|
||||||
|
params bool_to_char( cpucaps.hasSupplementalStreamingSIMD3Extensions ), bool_to_char( ssse3_result ) );
|
||||||
|
|
||||||
|
cpucaps.hasSupplementalStreamingSIMD3Extensions = ssse3_result;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( sse41_result != cpucaps.hasStreamingSIMD4Extensions )
|
||||||
|
{
|
||||||
|
Console::Notice( "SSE4 Detection Inconsistency: cpuid=%s, test_result=%s",
|
||||||
|
params bool_to_char( cpucaps.hasStreamingSIMD4Extensions ), bool_to_char( sse41_result ) );
|
||||||
|
|
||||||
|
cpucaps.hasStreamingSIMD4Extensions = sse41_result;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
Console::Notice(
|
||||||
|
"Notice: Could not allocate memory for SSE3/4 detection.\n"
|
||||||
|
"\tRelying on CPUID results. [this is not an error]"
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
|
||||||
//////////////////////////////////////
|
//////////////////////////////////////
|
||||||
// Core Counting!
|
// Core Counting!
|
||||||
|
|
|
@ -54,12 +54,14 @@ struct CAPABILITIES
|
||||||
u32 hasStreamingSIMD3Extensions;
|
u32 hasStreamingSIMD3Extensions;
|
||||||
u32 hasSupplementalStreamingSIMD3Extensions;
|
u32 hasSupplementalStreamingSIMD3Extensions;
|
||||||
u32 hasStreamingSIMD4Extensions;
|
u32 hasStreamingSIMD4Extensions;
|
||||||
|
u32 hasStreamingSIMD4Extensions2;
|
||||||
|
|
||||||
// AMD-specific CPU Features
|
// AMD-specific CPU Features
|
||||||
u32 hasMultimediaExtensionsExt;
|
u32 hasMultimediaExtensionsExt;
|
||||||
u32 hasAMD64BitArchitecture;
|
u32 hasAMD64BitArchitecture;
|
||||||
u32 has3DNOWInstructionExtensionsExt;
|
u32 has3DNOWInstructionExtensionsExt;
|
||||||
u32 has3DNOWInstructionExtensions;
|
u32 has3DNOWInstructionExtensions;
|
||||||
|
u32 hasStreamingSIMD4ExtensionsA;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern CAPABILITIES cpucaps;
|
extern CAPABILITIES cpucaps;
|
||||||
|
@ -73,6 +75,7 @@ struct CPUINFO
|
||||||
u32 x86Flags; // Feature Flags
|
u32 x86Flags; // Feature Flags
|
||||||
u32 x86Flags2; // More Feature Flags
|
u32 x86Flags2; // More Feature Flags
|
||||||
u32 x86EFlags; // Extended Feature Flags
|
u32 x86EFlags; // Extended Feature Flags
|
||||||
|
u32 x86EFlags2; // Extended Feature Flags pg2
|
||||||
|
|
||||||
u32 PhysicalCores;
|
u32 PhysicalCores;
|
||||||
u32 LogicalCores;
|
u32 LogicalCores;
|
||||||
|
|
Loading…
Reference in New Issue