mirror of https://github.com/PCSX2/pcsx2.git
Improved SSE detection:
* SSE3 detection via cpuid implemented. [fixes Linux -- the force_sse3 option should no longer be needed!] * Instruction tests are now done for SSE3, SSE4, and SSE4.1 to confirm cpuid results (I doubt this is necessary, but the old code did it for SSE3, so I figured I'd keep it and log results anytime an inconsistency is detected). * SSE4.2 and SSE4a detection added. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1086 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
6e82c77e92
commit
41079415fa
|
@ -135,13 +135,15 @@ void SysDetect()
|
|||
"\t%sDetected SSE2\n"
|
||||
"\t%sDetected SSE3\n"
|
||||
"\t%sDetected SSSE3\n"
|
||||
"\t%sDetected SSE4.1\n", params
|
||||
"\t%sDetected SSE4.1\n"
|
||||
"\t%sDetected SSE4.2\n", params
|
||||
cpucaps.hasMultimediaExtensions ? "" : "Not ",
|
||||
cpucaps.hasStreamingSIMDExtensions ? "" : "Not ",
|
||||
cpucaps.hasStreamingSIMD2Extensions ? "" : "Not ",
|
||||
cpucaps.hasStreamingSIMD3Extensions ? "" : "Not ",
|
||||
cpucaps.hasSupplementalStreamingSIMD3Extensions ? "" : "Not ",
|
||||
cpucaps.hasStreamingSIMD4Extensions ? "" : "Not "
|
||||
cpucaps.hasStreamingSIMD4Extensions ? "" : "Not ",
|
||||
cpucaps.hasStreamingSIMD4Extensions2 ? "" : "Not "
|
||||
);
|
||||
|
||||
if ( cpuinfo.x86ID[0] == 'A' ) //AMD cpu
|
||||
|
@ -150,10 +152,12 @@ void SysDetect()
|
|||
WriteLn(
|
||||
"\t%sDetected MMX2\n"
|
||||
"\t%sDetected 3DNOW\n"
|
||||
"\t%sDetected 3DNOW2\n", params
|
||||
"\t%sDetected 3DNOW2\n"
|
||||
"\t%sDetected SSE4a\n", params
|
||||
cpucaps.hasMultimediaExtensionsExt ? "" : "Not ",
|
||||
cpucaps.has3DNOWInstructionExtensions ? "" : "Not ",
|
||||
cpucaps.has3DNOWInstructionExtensionsExt ? "" : "Not "
|
||||
cpucaps.has3DNOWInstructionExtensionsExt ? "" : "Not ",
|
||||
cpucaps.hasStreamingSIMD4ExtensionsA ? "" : "Not "
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
|
||||
#include "RedtapeWindows.h"
|
||||
|
||||
using namespace x86Emitter;
|
||||
|
||||
#if defined (_MSC_VER) && _MSC_VER >= 1400
|
||||
|
||||
extern "C"
|
||||
|
@ -148,31 +150,29 @@ u64 GetCPUTick( void )
|
|||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Note: This function doesn't support GCC/Linux. Looking online it seems the only
|
||||
// way to simulate the Micrsoft SEH model is to use unix signals, and the 'sigaction'
|
||||
// function specifically. Maybe a project for a linux developer at a later date. :)
|
||||
void cpudetectSSE3(void* pfnCallSSE3)
|
||||
{
|
||||
cpucaps.hasStreamingSIMD3Extensions = 1;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
static bool _test_instruction( void* pfnCall )
|
||||
{
|
||||
__try {
|
||||
((void (*)())pfnCallSSE3)();
|
||||
((void (*)())pfnCall)();
|
||||
}
|
||||
__except(EXCEPTION_EXECUTE_HANDLER) {
|
||||
cpucaps.hasStreamingSIMD3Extensions = 0;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#else // linux
|
||||
|
||||
#ifdef PCSX2_FORCESSE3
|
||||
cpucaps.hasStreamingSIMD3Extensions = 1;
|
||||
#else
|
||||
// exception handling doesn't work, so disable for x86 builds of linux
|
||||
cpucaps.hasStreamingSIMD3Extensions = 0;
|
||||
#endif
|
||||
#endif
|
||||
static char* bool_to_char( bool testcond )
|
||||
{
|
||||
return testcond ? "true" : "false";
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined __LINUX__
|
||||
|
||||
#include <sys/time.h>
|
||||
|
@ -180,6 +180,8 @@ void cpudetectSSE3(void* pfnCallSSE3)
|
|||
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
s64 CPUSpeedHz( unsigned int time )
|
||||
{
|
||||
s64 timeStart,
|
||||
|
@ -200,6 +202,7 @@ s64 CPUSpeedHz( unsigned int time )
|
|||
{
|
||||
timeStart = timeGetTime( );
|
||||
}
|
||||
|
||||
for(;;)
|
||||
{
|
||||
timeStop = timeGetTime( );
|
||||
|
@ -294,6 +297,7 @@ void cpudetectInit()
|
|||
if ( iCpuId( 0x80000001, regs ) != -1 )
|
||||
{
|
||||
x86_64_12BITBRANDID = regs[1] & 0xfff;
|
||||
cpuinfo.x86EFlags2 = regs[ 2 ];
|
||||
cpuinfo.x86EFlags = regs[ 3 ];
|
||||
|
||||
}
|
||||
|
@ -364,40 +368,85 @@ void cpudetectInit()
|
|||
cpucaps.hasMultiThreading = ( cpuinfo.x86Flags >> 28 ) & 1;
|
||||
cpucaps.hasThermalMonitor = ( cpuinfo.x86Flags >> 29 ) & 1;
|
||||
cpucaps.hasIntel64BitArchitecture = ( cpuinfo.x86Flags >> 30 ) & 1;
|
||||
|
||||
//that is only for AMDs
|
||||
cpucaps.hasMultimediaExtensionsExt = ( cpuinfo.x86EFlags >> 22 ) & 1; //mmx2
|
||||
cpucaps.hasAMD64BitArchitecture = ( cpuinfo.x86EFlags >> 29 ) & 1; //64bit cpu
|
||||
cpucaps.has3DNOWInstructionExtensionsExt = ( cpuinfo.x86EFlags >> 30 ) & 1; //3dnow+
|
||||
cpucaps.has3DNOWInstructionExtensions = ( cpuinfo.x86EFlags >> 31 ) & 1; //3dnow
|
||||
cpucaps.hasStreamingSIMD4ExtensionsA = ( cpuinfo.x86EFlags2 >> 6 ) & 1; //INSERTQ / EXTRQ / MOVNT
|
||||
|
||||
cpuinfo.cpuspeed = (u32)(CPUSpeedHz( 1000 ) / 1000000);
|
||||
|
||||
// --> SSE 4.1 detection <--
|
||||
// We don't care about the small subset of CPUs using SSE4 (which is also hard to
|
||||
// detect, in addition to being of limited use due to the abbreviated instruction set).
|
||||
// So we'll just leave it at SSE 4.1. SSE4 cpu detection is ignored.
|
||||
cpuinfo.cpuspeed = (u32)(CPUSpeedHz( 600 ) / 1000000);
|
||||
|
||||
cpucaps.hasStreamingSIMD4Extensions = ( cpuinfo.x86Flags2 >> 19 ) & 1; //sse4.1
|
||||
|
||||
// --> SSSE3 detection <--
|
||||
// --> SSE3 / SSSE3 / SSE4.1 / SSE 4.2 detection <--
|
||||
|
||||
cpucaps.hasStreamingSIMD3Extensions = ( cpuinfo.x86Flags2 >> 0 ) & 1; //sse3
|
||||
cpucaps.hasSupplementalStreamingSIMD3Extensions = ( cpuinfo.x86Flags2 >> 9 ) & 1; //ssse3
|
||||
cpucaps.hasStreamingSIMD4Extensions = ( cpuinfo.x86Flags2 >> 19 ) & 1; //sse4.1
|
||||
cpucaps.hasStreamingSIMD4Extensions2 = ( cpuinfo.x86Flags2 >> 20 ) & 1; //sse4.2
|
||||
|
||||
// --> SSE3 detection <--
|
||||
// These instructions may not be recognized by some compilers, or may not have
|
||||
// intrinsic equivalents available. So we use our own ix86 emitter to generate
|
||||
// some code and run it that way. :)
|
||||
// Can the SSE3 / SSE4.1 bits be trusted? Using an instruction test is a very "complete"
|
||||
// approach to ensuring the bit is accurate, and at least one reported case of a Q9550 not
|
||||
// having SSE 4.1 set but still supporting it properly is fixed by this --air
|
||||
|
||||
#ifdef _MSC_VER
|
||||
u8* recSSE = (u8*)HostSys::Mmap( NULL, 0x1000 );
|
||||
if( recSSE != NULL )
|
||||
{
|
||||
x86SetPtr(recSSE);
|
||||
SSE3_MOVSLDUP_XMM_to_XMM(XMM0, XMM0);
|
||||
xSetPtr( recSSE );
|
||||
xMOVSLDUP( xmm1, xmm0 );
|
||||
RET();
|
||||
cpudetectSSE3(recSSE);
|
||||
|
||||
u8* funcSSSE3 = xGetPtr();
|
||||
xPABS.W( xmm0, xmm1 );
|
||||
RET();
|
||||
|
||||
u8* funcSSE41 = xGetPtr();
|
||||
xBLEND.VPD( xmm1, xmm0 );
|
||||
RET();
|
||||
|
||||
bool sse3_result = _test_instruction( recSSE ); // sse3
|
||||
bool ssse3_result = _test_instruction( funcSSSE3 );
|
||||
bool sse41_result = _test_instruction( funcSSE41 );
|
||||
|
||||
HostSys::Munmap( recSSE, 0x1000 );
|
||||
|
||||
// Test for and log any irregularities here.
|
||||
// We take the instruction test result over cpuid since (in theory) it should be a
|
||||
// more reliable gauge of the cpu's actual ability.
|
||||
|
||||
if( sse3_result != cpucaps.hasStreamingSIMD3Extensions )
|
||||
{
|
||||
Console::Notice( "SSE3 Detection Inconsistency: cpuid=%s, test_result=%s",
|
||||
params bool_to_char( cpucaps.hasStreamingSIMD3Extensions ), bool_to_char( sse3_result ) );
|
||||
|
||||
cpucaps.hasStreamingSIMD3Extensions = sse3_result;
|
||||
}
|
||||
else { Console::Error("Error: Failed to allocate memory for SSE3 State detection."); }
|
||||
|
||||
if( ssse3_result != cpucaps.hasSupplementalStreamingSIMD3Extensions )
|
||||
{
|
||||
Console::Notice( "SSSE3 Detection Inconsistency: cpuid=%s, test_result=%s",
|
||||
params bool_to_char( cpucaps.hasSupplementalStreamingSIMD3Extensions ), bool_to_char( ssse3_result ) );
|
||||
|
||||
cpucaps.hasSupplementalStreamingSIMD3Extensions = ssse3_result;
|
||||
}
|
||||
|
||||
if( sse41_result != cpucaps.hasStreamingSIMD4Extensions )
|
||||
{
|
||||
Console::Notice( "SSE4 Detection Inconsistency: cpuid=%s, test_result=%s",
|
||||
params bool_to_char( cpucaps.hasStreamingSIMD4Extensions ), bool_to_char( sse41_result ) );
|
||||
|
||||
cpucaps.hasStreamingSIMD4Extensions = sse41_result;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
Console::Notice(
|
||||
"Notice: Could not allocate memory for SSE3/4 detection.\n"
|
||||
"\tRelying on CPUID results. [this is not an error]"
|
||||
);
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////
|
||||
// Core Counting!
|
||||
|
|
|
@ -54,12 +54,14 @@ struct CAPABILITIES
|
|||
u32 hasStreamingSIMD3Extensions;
|
||||
u32 hasSupplementalStreamingSIMD3Extensions;
|
||||
u32 hasStreamingSIMD4Extensions;
|
||||
u32 hasStreamingSIMD4Extensions2;
|
||||
|
||||
// AMD-specific CPU Features
|
||||
u32 hasMultimediaExtensionsExt;
|
||||
u32 hasAMD64BitArchitecture;
|
||||
u32 has3DNOWInstructionExtensionsExt;
|
||||
u32 has3DNOWInstructionExtensions;
|
||||
u32 hasStreamingSIMD4ExtensionsA;
|
||||
};
|
||||
|
||||
extern CAPABILITIES cpucaps;
|
||||
|
@ -73,6 +75,7 @@ struct CPUINFO
|
|||
u32 x86Flags; // Feature Flags
|
||||
u32 x86Flags2; // More Feature Flags
|
||||
u32 x86EFlags; // Extended Feature Flags
|
||||
u32 x86EFlags2; // Extended Feature Flags pg2
|
||||
|
||||
u32 PhysicalCores;
|
||||
u32 LogicalCores;
|
||||
|
|
Loading…
Reference in New Issue