diff --git a/common/include/x86emitter/tools.h b/common/include/x86emitter/tools.h index c204430938..52cc1d2f21 100644 --- a/common/include/x86emitter/tools.h +++ b/common/include/x86emitter/tools.h @@ -17,82 +17,111 @@ #include "x86emitter.h" -// this is all that needs to be called and will fill up the below structs -extern void cpudetectInit(); - -// Returns the number of available logical CPUs (cores plus hyperthreaded cpus) -extern void CountLogicalCores( int LogicalCoresPerPhysicalCPU, int PhysicalCoresPerPhysicalCPU ); - -// -------------------------------------------------------------------------------------- -// x86CPU_INFO -// -------------------------------------------------------------------------------------- -struct x86CPU_INFO +enum x86VendorType { - u32 FamilyID; // Processor Family - u32 Model; // Processor Model - u32 TypeID; // Processor Type - u32 StepID; // Stepping ID + x86Vendor_Intel=0, + x86Vendor_AMD, + x86Vendor_Unknown, +}; + +// -------------------------------------------------------------------------------------- +// x86capabilities +// -------------------------------------------------------------------------------------- +class x86capabilities +{ +public: + bool isIdentified; + u32 LogicalCoresPerPhysicalCPU; + u32 PhysicalCoresPerPhysicalCPU; + +public: + x86VendorType VendorID; + + uint FamilyID; // Processor Family + uint Model; // Processor Model + uint TypeID; // Processor Type + uint StepID; // Stepping ID + u32 Flags; // Feature Flags u32 Flags2; // More Feature Flags u32 EFlags; // Extended Feature Flags u32 EFlags2; // Extended Feature Flags pg2 - u32 PhysicalCores; - u32 LogicalCores; - char VendorName[16]; // Vendor/Creator ID - char TypeName[20]; // cpu type char FamilyName[50]; // the original cpu name - // Speed - speed of cpu in mhz - // This is a rough "real" measure of the cpu speed, taken at application startup. - // Not to be considered totally accurate: Power saving CPUs and SpeedStep can skew - // results considerably. - u32 Speed; - // ---------------------------------------------------------------------------- // x86 CPU Capabilities Section (all boolean flags!) // ---------------------------------------------------------------------------- - u32 hasFloatingPointUnit:1; - u32 hasVirtual8086ModeEnhancements:1; - u32 hasDebuggingExtensions:1; - u32 hasPageSizeExtensions:1; - u32 hasTimeStampCounter:1; - u32 hasModelSpecificRegisters:1; - u32 hasPhysicalAddressExtension:1; - u32 hasCOMPXCHG8BInstruction:1; - u32 hasAdvancedProgrammableInterruptController:1; - u32 hasSEPFastSystemCall:1; - u32 hasMemoryTypeRangeRegisters:1; - u32 hasPTEGlobalFlag:1; - u32 hasMachineCheckArchitecture:1; - u32 hasConditionalMoveAndCompareInstructions:1; - u32 hasFGPageAttributeTable:1; - u32 has36bitPageSizeExtension:1; - u32 hasProcessorSerialNumber:1; - u32 hasCFLUSHInstruction:1; - u32 hasDebugStore:1; - u32 hasACPIThermalMonitorAndClockControl:1; - u32 hasMultimediaExtensions:1; - u32 hasFastStreamingSIMDExtensionsSaveRestore:1; - u32 hasStreamingSIMDExtensions:1; - u32 hasStreamingSIMD2Extensions:1; - u32 hasSelfSnoop:1; - u32 hasMultiThreading:1; // is TRUE for both multi-core and Hyperthreaded CPUs. - u32 hasThermalMonitor:1; - u32 hasIntel64BitArchitecture:1; - u32 hasStreamingSIMD3Extensions:1; - u32 hasSupplementalStreamingSIMD3Extensions:1; - u32 hasStreamingSIMD4Extensions:1; - u32 hasStreamingSIMD4Extensions2:1; + u32 hasFloatingPointUnit :1; + u32 hasVirtual8086ModeEnhancements :1; + u32 hasDebuggingExtensions :1; + u32 hasPageSizeExtensions :1; + u32 hasTimeStampCounter :1; + u32 hasModelSpecificRegisters :1; + u32 hasPhysicalAddressExtension :1; + u32 hasCOMPXCHG8BInstruction :1; + u32 hasAdvancedProgrammableInterruptController :1; + u32 hasSEPFastSystemCall :1; + u32 hasMemoryTypeRangeRegisters :1; + u32 hasPTEGlobalFlag :1; + u32 hasMachineCheckArchitecture :1; + u32 hasConditionalMoveAndCompareInstructions :1; + u32 hasFGPageAttributeTable :1; + u32 has36bitPageSizeExtension :1; + u32 hasProcessorSerialNumber :1; + u32 hasCFLUSHInstruction :1; + u32 hasDebugStore :1; + u32 hasACPIThermalMonitorAndClockControl :1; + u32 hasMultimediaExtensions :1; + u32 hasFastStreamingSIMDExtensionsSaveRestore :1; + u32 hasStreamingSIMDExtensions :1; + u32 hasStreamingSIMD2Extensions :1; + u32 hasSelfSnoop :1; - // AMD-specific CPU Features - u32 hasMultimediaExtensionsExt:1; - u32 hasAMD64BitArchitecture:1; - u32 has3DNOWInstructionExtensionsExt:1; - u32 has3DNOWInstructionExtensions:1; - u32 hasStreamingSIMD4ExtensionsA:1; + // is TRUE for both multi-core and Hyperthreaded CPUs. + u32 hasMultiThreading :1; + + u32 hasThermalMonitor :1; + u32 hasIntel64BitArchitecture :1; + u32 hasStreamingSIMD3Extensions :1; + u32 hasSupplementalStreamingSIMD3Extensions :1; + u32 hasStreamingSIMD4Extensions :1; + u32 hasStreamingSIMD4Extensions2 :1; + + // AMD-specific CPU Features + u32 hasMultimediaExtensionsExt :1; + u32 hasAMD64BitArchitecture :1; + u32 has3DNOWInstructionExtensionsExt :1; + u32 has3DNOWInstructionExtensions :1; + u32 hasStreamingSIMD4ExtensionsA :1; + + // Core Counts! + u32 PhysicalCores; + u32 LogicalCores; + +public: + x86capabilities() + { + isIdentified = false; + VendorID = x86Vendor_Unknown; + LogicalCoresPerPhysicalCPU = 1; + PhysicalCoresPerPhysicalCPU = 1; + } + + void Identify(); + void CountCores(); + wxString GetTypeName() const; + + u32 CalculateMHz() const; + + void SIMD_ExceptionTest(); + void SIMD_EstablishMXCSRmask(); + +protected: + s64 _CPUSpeedHz( u64 time ) const; + void CountLogicalCores(); }; enum SSE_RoundMode @@ -168,7 +197,7 @@ extern SSE_MXCSR MXCSR_Mask; ////////////////////////////////////////////////////////////////////////////////////////// -extern __aligned16 x86CPU_INFO x86caps; +extern __aligned16 x86capabilities x86caps; extern bool g_EEFreezeRegs; diff --git a/common/src/x86emitter/LnxCpuDetect.cpp b/common/src/x86emitter/LnxCpuDetect.cpp index aad7076f09..e79cd37685 100644 --- a/common/src/x86emitter/LnxCpuDetect.cpp +++ b/common/src/x86emitter/LnxCpuDetect.cpp @@ -19,20 +19,20 @@ // Note: Apparently this solution is Linux/Solaris only. // FreeBSD/OsX need something far more complicated (apparently) -void CountLogicalCores( int LogicalCoresPerPhysicalCPU, int PhysicalCoresPerPhysicalCPU ) +void x86capabilities::CountLogicalCores() { const uint numCPU = sysconf( _SC_NPROCESSORS_ONLN ); if( numCPU > 0 ) { //isMultiCore = numCPU > 1; - x86caps.LogicalCores = numCPU; - x86caps.PhysicalCores = ( numCPU / LogicalCoresPerPhysicalCPU ) * PhysicalCoresPerPhysicalCPU; + LogicalCores = numCPU; + PhysicalCores = ( numCPU / LogicalCoresPerPhysicalCPU ) * PhysicalCoresPerPhysicalCPU; } else { // Indeterminate? - x86caps.LogicalCores = 1; - x86caps.PhysicalCores = 1; + LogicalCores = 1; + PhysicalCores = 1; } } diff --git a/common/src/x86emitter/WinCpuDetect.cpp b/common/src/x86emitter/WinCpuDetect.cpp index bfb85c8c1e..2011275032 100644 --- a/common/src/x86emitter/WinCpuDetect.cpp +++ b/common/src/x86emitter/WinCpuDetect.cpp @@ -17,12 +17,12 @@ #include "PrecompiledHeader.h" #include "cpudetect_internal.h" -void CountLogicalCores( int LogicalCoresPerPhysicalCPU, int PhysicalCoresPerPhysicalCPU ) +void x86capabilities::CountLogicalCores() { DWORD vProcessCPUs; DWORD vSystemCPUs; - x86caps.LogicalCores = 1; + LogicalCores = 1; if( !GetProcessAffinityMask (GetCurrentProcess (), &vProcessCPUs, &vSystemCPUs) ) return; @@ -36,11 +36,11 @@ void CountLogicalCores( int LogicalCoresPerPhysicalCPU, int PhysicalCoresPerPhys CPUs++; } - x86caps.LogicalCores = CPUs; + LogicalCores = CPUs; if( LogicalCoresPerPhysicalCPU > CPUs) // for 1-socket HTT-disabled machines LogicalCoresPerPhysicalCPU = CPUs; - x86caps.PhysicalCores = ( CPUs / LogicalCoresPerPhysicalCPU ) * PhysicalCoresPerPhysicalCPU; + PhysicalCores = ( CPUs / LogicalCoresPerPhysicalCPU ) * PhysicalCoresPerPhysicalCPU; } bool _test_instruction( void* pfnCall ) diff --git a/common/src/x86emitter/cpudetect.cpp b/common/src/x86emitter/cpudetect.cpp index b5529f17f7..940af429c2 100644 --- a/common/src/x86emitter/cpudetect.cpp +++ b/common/src/x86emitter/cpudetect.cpp @@ -19,7 +19,11 @@ using namespace x86Emitter; -__aligned16 x86CPU_INFO x86caps; +__aligned16 x86capabilities x86caps; + +// Recompiled code buffer for SSE and MXCSR feature testing. +static __pagealigned u8 recSSE[__pagesize]; +static __pagealigned u8 targetFXSAVE[512]; #ifdef __LINUX__ # include @@ -31,34 +35,71 @@ static const char* bool_to_char( bool testcond ) return testcond ? "true" : "false"; } -static s64 CPUSpeedHz( u64 time ) +// Warning! We've had problems with the MXCSR detection code causing stack corruption in +// MSVC PGO builds. The problem was fixed when I moved the MXCSR code to this function, and +// moved the recSSE[] array to a global static (it was local to cpudetectInit). Commented +// here in case the nutty crash ever re-surfaces. >_< +void x86capabilities::SIMD_EstablishMXCSRmask() +{ + if( !hasStreamingSIMDExtensions ) return; + + MXCSR_Mask.bitmask = 0xFFBF; // MMX/SSE default + + if( hasStreamingSIMD2Extensions ) + { + // This is generally safe assumption, but FXSAVE is the "correct" way to + // detect MXCSR masking features of the cpu, so we use it's result below + // and override this. + + MXCSR_Mask.bitmask = 0xFFFF; // SSE2 features added + } + + if( !CanEmitShit() ) return; + + // the fxsave buffer must be 16-byte aligned to avoid GPF. I just save it to an + // unused portion of recSSE, since it has plenty of room to spare. + + HostSys::MemProtectStatic( recSSE, Protect_ReadWrite, true ); + + xSetPtr( recSSE ); + xFXSAVE( targetFXSAVE ); + xRET(); + + HostSys::MemProtectStatic( recSSE, Protect_ReadOnly, true ); + + CallAddress( recSSE ); + + u32 result = (u32&)targetFXSAVE[28]; // bytes 28->32 are the MXCSR_Mask. + if( result != 0 ) + MXCSR_Mask.bitmask = result; +} + +// Counts the number of cpu cycles executed over the requested number of PerformanceCounter +// ticks. Returns that exact count. +// For best results you should pick a period of time long enough to get a reading that won't +// be prone to rounding error; but short enough that it'll be highly unlikely to be interrupted +// by the operating system task switches. +s64 x86capabilities::_CPUSpeedHz( u64 time ) const { u64 timeStart, timeStop; s64 startCycle, endCycle; - if( ! x86caps.hasTimeStampCounter ) + if( ! hasTimeStampCounter ) return 0; SingleCoreAffinity affinity_lock; // Align the cpu execution to a cpuTick boundary. - do { timeStart = GetCPUTicks(); + do { + timeStart = GetCPUTicks(); + startCycle = __rdtsc(); } while( GetCPUTicks() == timeStart ); - do - { - timeStop = GetCPUTicks(); - startCycle = __rdtsc(); - } while( ( timeStop - timeStart ) == 0 ); - - timeStart = timeStop; - do - { + do { timeStop = GetCPUTicks(); endCycle = __rdtsc(); - } - while( ( timeStop - timeStart ) < time ); + } while( ( timeStop - timeStart ) < time ); s64 cycleCount = endCycle - startCycle; s64 timeCount = timeStop - timeStart; @@ -73,92 +114,111 @@ static s64 CPUSpeedHz( u64 time ) return (s64)newCycleCount; } -// Recompiled code buffer for SSE and MXCSR feature testing. -static __pagealigned u8 recSSE[__pagesize]; - -// Warning! We've had problems with the MXCSR detection code causing stack corruption in -// MSVC PGO builds. The problem was fixed when I moved the MXCSR code to this function, and -// moved the recSSE[] array to a global static (it was local to cpudetectInit). Commented -// here in case the nutty crash ever re-surfaces. >_< - -void EstablishMXCSRmask() +wxString x86capabilities::GetTypeName() const { - if( !x86caps.hasStreamingSIMDExtensions ) return; - - MXCSR_Mask.bitmask = 0xFFBF; // MMX/SSE default - - if( x86caps.hasStreamingSIMD2Extensions ) + switch( TypeID ) { - // This is generally safe assumption, but FXSAVE is the "correct" way to - // detect MXCSR masking features of the cpu, so we use it's result below - // and override this. - - MXCSR_Mask.bitmask = 0xFFFF; // SSE2 features added + case 0: return L"Standard OEM"; + case 1: return L"Overdrive"; + case 2: return L"Dual"; + case 3: return L"Reserved"; + default: return L"Unknown"; } - - if( !CanEmitShit() ) return; - - // the fxsave buffer must be 16-byte aligned to avoid GPF. I just save it to an - // unused portion of recSSE, since it has plenty of room to spare. - - xSetPtr( recSSE ); - xFXSAVE( recSSE + 1024 ); - xRET(); - - CallAddress( recSSE ); - - u32 result = (u32&)recSSE[1024+28]; // bytes 28->32 are the MXCSR_Mask. - if( result != 0 ) - MXCSR_Mask.bitmask = result; } -void cpudetectInit() +void x86capabilities::CountCores() { + Identify(); + s32 regs[ 4 ]; u32 cmds; + + LogicalCoresPerPhysicalCPU = 0; + PhysicalCoresPerPhysicalCPU = 1; + + // detect multicore for Intel cpu + + __cpuid( regs, 0 ); + cmds = regs[ 0 ]; + + if( cmds >= 0x00000001 ) + LogicalCoresPerPhysicalCPU = ( regs[1] >> 16 ) & 0xff; + + if ((cmds >= 0x00000004) && (VendorID == x86Vendor_Intel)) + { + __cpuid( regs, 0x00000004 ); + PhysicalCoresPerPhysicalCPU += ( regs[0] >> 26) & 0x3f; + } + + __cpuid( regs, 0x80000000 ); + cmds = regs[ 0 ]; + + // detect multicore for AMD cpu + + if ((cmds >= 0x80000008) && (VendorID == x86Vendor_AMD) ) + { + __cpuid( regs, 0x80000008 ); + PhysicalCoresPerPhysicalCPU += ( regs[2] ) & 0xff; + } + + if( !hasMultiThreading || LogicalCoresPerPhysicalCPU == 0 ) + LogicalCoresPerPhysicalCPU = 1; + + // This will assign values into LogicalCores and PhysicalCores + CountLogicalCores(); +} + +static const char* tbl_x86vendors[] = +{ + "GenuineIntel", + "AuthenticAMD" + "Unknown ", +}; + +// Performs all _cpuid-related activity. This fills *most* of the x86caps structure, except for +// the cpuSpeed and the mxcsr masks. Those must be completed manually. +void x86capabilities::Identify() +{ + if( isIdentified ) return; + isIdentified = true; + + s32 regs[ 4 ]; + u32 cmds; + //AMD 64 STUFF u32 x86_64_8BITBRANDID; u32 x86_64_12BITBRANDID; - memzero( x86caps.VendorName ); - x86caps.FamilyID = 0; - x86caps.Model = 0; - x86caps.TypeID = 0; - x86caps.StepID = 0; - x86caps.Flags = 0; - x86caps.EFlags = 0; - + memzero( VendorName ); __cpuid( regs, 0 ); cmds = regs[ 0 ]; - ((u32*)x86caps.VendorName)[ 0 ] = regs[ 1 ]; - ((u32*)x86caps.VendorName)[ 1 ] = regs[ 3 ]; - ((u32*)x86caps.VendorName)[ 2 ] = regs[ 2 ]; + ((u32*)VendorName)[ 0 ] = regs[ 1 ]; + ((u32*)VendorName)[ 1 ] = regs[ 3 ]; + ((u32*)VendorName)[ 2 ] = regs[ 2 ]; - u32 LogicalCoresPerPhysicalCPU = 0; - u32 PhysicalCoresPerPhysicalCPU = 1; + // Determine Vendor Specifics! + // It's really not recommended that we base much (if anything) on CPU vendor names, + // however it's currently necessary in order to gain a (pseudo)reliable count of cores + // and threads used by the CPU (AMD and Intel can't agree on how to make this info available). + + int& vid = (int&)VendorID; + for( vid=0; vid= 0x00000001 ) { __cpuid( regs, 0x00000001 ); - x86caps.StepID = regs[ 0 ] & 0xf; - x86caps.Model = (regs[ 0 ] >> 4) & 0xf; - x86caps.FamilyID = (regs[ 0 ] >> 8) & 0xf; - x86caps.TypeID = (regs[ 0 ] >> 12) & 0x3; + StepID = regs[ 0 ] & 0xf; + Model = (regs[ 0 ] >> 4) & 0xf; + FamilyID = (regs[ 0 ] >> 8) & 0xf; + TypeID = (regs[ 0 ] >> 12) & 0x3; x86_64_8BITBRANDID = regs[ 1 ] & 0xff; - x86caps.Flags = regs[ 3 ]; - x86caps.Flags2 = regs[ 2 ]; - - LogicalCoresPerPhysicalCPU = ( regs[1] >> 16 ) & 0xff; - } - - // detect multicore for Intel cpu - - if ((cmds >= 0x00000004) && !strcmp("GenuineIntel",x86caps.VendorName)) - { - __cpuid( regs, 0x00000004 ); - PhysicalCoresPerPhysicalCPU += ( regs[0] >> 26) & 0x3f; + Flags = regs[ 3 ]; + Flags2 = regs[ 2 ]; } __cpuid( regs, 0x80000000 ); @@ -168,111 +228,82 @@ void cpudetectInit() __cpuid( regs, 0x80000001 ); x86_64_12BITBRANDID = regs[1] & 0xfff; - x86caps.EFlags2 = regs[ 2 ]; - x86caps.EFlags = regs[ 3 ]; + EFlags2 = regs[ 2 ]; + EFlags = regs[ 3 ]; } - // detect multicore for AMD cpu + memzero( FamilyName ); + __cpuid( (int*)FamilyName, 0x80000002); + __cpuid( (int*)(FamilyName+16), 0x80000003); + __cpuid( (int*)(FamilyName+32), 0x80000004); - if ((cmds >= 0x80000008) && !strcmp("AuthenticAMD",x86caps.VendorName)) - { - __cpuid( regs, 0x80000008 ); - PhysicalCoresPerPhysicalCPU += ( regs[2] ) & 0xff; - } + hasFloatingPointUnit = ( Flags >> 0 ) & 1; + hasVirtual8086ModeEnhancements = ( Flags >> 1 ) & 1; + hasDebuggingExtensions = ( Flags >> 2 ) & 1; + hasPageSizeExtensions = ( Flags >> 3 ) & 1; + hasTimeStampCounter = ( Flags >> 4 ) & 1; + hasModelSpecificRegisters = ( Flags >> 5 ) & 1; + hasPhysicalAddressExtension = ( Flags >> 6 ) & 1; + hasMachineCheckArchitecture = ( Flags >> 7 ) & 1; + hasCOMPXCHG8BInstruction = ( Flags >> 8 ) & 1; + hasAdvancedProgrammableInterruptController = ( Flags >> 9 ) & 1; + hasSEPFastSystemCall = ( Flags >> 11 ) & 1; + hasMemoryTypeRangeRegisters = ( Flags >> 12 ) & 1; + hasPTEGlobalFlag = ( Flags >> 13 ) & 1; + hasMachineCheckArchitecture = ( Flags >> 14 ) & 1; + hasConditionalMoveAndCompareInstructions = ( Flags >> 15 ) & 1; + hasFGPageAttributeTable = ( Flags >> 16 ) & 1; + has36bitPageSizeExtension = ( Flags >> 17 ) & 1; + hasProcessorSerialNumber = ( Flags >> 18 ) & 1; + hasCFLUSHInstruction = ( Flags >> 19 ) & 1; + hasDebugStore = ( Flags >> 21 ) & 1; + hasACPIThermalMonitorAndClockControl = ( Flags >> 22 ) & 1; + hasMultimediaExtensions = ( Flags >> 23 ) & 1; //mmx + hasFastStreamingSIMDExtensionsSaveRestore = ( Flags >> 24 ) & 1; + hasStreamingSIMDExtensions = ( Flags >> 25 ) & 1; //sse + hasStreamingSIMD2Extensions = ( Flags >> 26 ) & 1; //sse2 + hasSelfSnoop = ( Flags >> 27 ) & 1; + hasMultiThreading = ( Flags >> 28 ) & 1; + hasThermalMonitor = ( Flags >> 29 ) & 1; + hasIntel64BitArchitecture = ( Flags >> 30 ) & 1; - switch(x86caps.TypeID) - { - case 0: - strcpy( x86caps.TypeName, "Standard OEM"); - break; - case 1: - strcpy( x86caps.TypeName, "Overdrive"); - break; - case 2: - strcpy( x86caps.TypeName, "Dual"); - break; - case 3: - strcpy( x86caps.TypeName, "Reserved"); - break; - default: - strcpy( x86caps.TypeName, "Unknown"); - break; - } + // ------------------------------------------------- + // --> SSE3 / SSSE3 / SSE4.1 / SSE 4.2 detection <-- + // ------------------------------------------------- - #if 0 - // vendor identification, currently unneeded. - // It's really not recommended that we base much (if anything) on CPU vendor names. - // But the code is left in as an ifdef, for possible future reference. + hasStreamingSIMD3Extensions = ( Flags2 >> 0 ) & 1; //sse3 + hasSupplementalStreamingSIMD3Extensions = ( Flags2 >> 9 ) & 1; //ssse3 + hasStreamingSIMD4Extensions = ( Flags2 >> 19 ) & 1; //sse4.1 + hasStreamingSIMD4Extensions2 = ( Flags2 >> 20 ) & 1; //sse4.2 - int cputype=0; // Cpu type - static const char* Vendor_Intel = "GenuineIntel"; - static const char* Vendor_AMD = "AuthenticAMD"; + // Ones only for AMDs: + hasMultimediaExtensionsExt = ( EFlags >> 22 ) & 1; //mmx2 + hasAMD64BitArchitecture = ( EFlags >> 29 ) & 1; //64bit cpu + has3DNOWInstructionExtensionsExt = ( EFlags >> 30 ) & 1; //3dnow+ + has3DNOWInstructionExtensions = ( EFlags >> 31 ) & 1; //3dnow + hasStreamingSIMD4ExtensionsA = ( EFlags2 >> 6 ) & 1; //INSERTQ / EXTRQ / MOVNT - if( memcmp( x86caps.VendorName, Vendor_Intel, 12 ) == 0 ) { cputype = 0; } else - if( memcmp( x86caps.VendorName, Vendor_AMD, 12 ) == 0 ) { cputype = 1; } - - if ( x86caps.VendorName[ 0 ] == 'G' ) { cputype = 0; } - if ( x86caps.VendorName[ 0 ] == 'A' ) { cputype = 1; } - #endif - - memzero( x86caps.FamilyName ); - __cpuid( (int*)x86caps.FamilyName, 0x80000002); - __cpuid( (int*)(x86caps.FamilyName+16), 0x80000003); - __cpuid( (int*)(x86caps.FamilyName+32), 0x80000004); - - //capabilities - x86caps.hasFloatingPointUnit = ( x86caps.Flags >> 0 ) & 1; - x86caps.hasVirtual8086ModeEnhancements = ( x86caps.Flags >> 1 ) & 1; - x86caps.hasDebuggingExtensions = ( x86caps.Flags >> 2 ) & 1; - x86caps.hasPageSizeExtensions = ( x86caps.Flags >> 3 ) & 1; - x86caps.hasTimeStampCounter = ( x86caps.Flags >> 4 ) & 1; - x86caps.hasModelSpecificRegisters = ( x86caps.Flags >> 5 ) & 1; - x86caps.hasPhysicalAddressExtension = ( x86caps.Flags >> 6 ) & 1; - x86caps.hasMachineCheckArchitecture = ( x86caps.Flags >> 7 ) & 1; - x86caps.hasCOMPXCHG8BInstruction = ( x86caps.Flags >> 8 ) & 1; - x86caps.hasAdvancedProgrammableInterruptController = ( x86caps.Flags >> 9 ) & 1; - x86caps.hasSEPFastSystemCall = ( x86caps.Flags >> 11 ) & 1; - x86caps.hasMemoryTypeRangeRegisters = ( x86caps.Flags >> 12 ) & 1; - x86caps.hasPTEGlobalFlag = ( x86caps.Flags >> 13 ) & 1; - x86caps.hasMachineCheckArchitecture = ( x86caps.Flags >> 14 ) & 1; - x86caps.hasConditionalMoveAndCompareInstructions = ( x86caps.Flags >> 15 ) & 1; - x86caps.hasFGPageAttributeTable = ( x86caps.Flags >> 16 ) & 1; - x86caps.has36bitPageSizeExtension = ( x86caps.Flags >> 17 ) & 1; - x86caps.hasProcessorSerialNumber = ( x86caps.Flags >> 18 ) & 1; - x86caps.hasCFLUSHInstruction = ( x86caps.Flags >> 19 ) & 1; - x86caps.hasDebugStore = ( x86caps.Flags >> 21 ) & 1; - x86caps.hasACPIThermalMonitorAndClockControl = ( x86caps.Flags >> 22 ) & 1; - x86caps.hasMultimediaExtensions = ( x86caps.Flags >> 23 ) & 1; //mmx - x86caps.hasFastStreamingSIMDExtensionsSaveRestore = ( x86caps.Flags >> 24 ) & 1; - x86caps.hasStreamingSIMDExtensions = ( x86caps.Flags >> 25 ) & 1; //sse - x86caps.hasStreamingSIMD2Extensions = ( x86caps.Flags >> 26 ) & 1; //sse2 - x86caps.hasSelfSnoop = ( x86caps.Flags >> 27 ) & 1; - x86caps.hasMultiThreading = ( x86caps.Flags >> 28 ) & 1; - x86caps.hasThermalMonitor = ( x86caps.Flags >> 29 ) & 1; - x86caps.hasIntel64BitArchitecture = ( x86caps.Flags >> 30 ) & 1; - - //that is only for AMDs - x86caps.hasMultimediaExtensionsExt = ( x86caps.EFlags >> 22 ) & 1; //mmx2 - x86caps.hasAMD64BitArchitecture = ( x86caps.EFlags >> 29 ) & 1; //64bit cpu - x86caps.has3DNOWInstructionExtensionsExt = ( x86caps.EFlags >> 30 ) & 1; //3dnow+ - x86caps.has3DNOWInstructionExtensions = ( x86caps.EFlags >> 31 ) & 1; //3dnow - x86caps.hasStreamingSIMD4ExtensionsA = ( x86caps.EFlags2 >> 6 ) & 1; //INSERTQ / EXTRQ / MOVNT + isIdentified = true; +} +u32 x86capabilities::CalculateMHz() const +{ InitCPUTicks(); u64 span = GetTickFrequency(); if( (span % 1000) < 400 ) // helps minimize rounding errors - x86caps.Speed = (u32)( CPUSpeedHz( span / 1000 ) / 1000 ); + return (u32)( _CPUSpeedHz( span / 1000 ) / 1000 ); else - x86caps.Speed = (u32)( CPUSpeedHz( span / 500 ) / 2000 ); - - // --> SSE3 / SSSE3 / SSE4.1 / SSE 4.2 detection <-- - - x86caps.hasStreamingSIMD3Extensions = ( x86caps.Flags2 >> 0 ) & 1; //sse3 - x86caps.hasSupplementalStreamingSIMD3Extensions = ( x86caps.Flags2 >> 9 ) & 1; //ssse3 - x86caps.hasStreamingSIMD4Extensions = ( x86caps.Flags2 >> 19 ) & 1; //sse4.1 - x86caps.hasStreamingSIMD4Extensions2 = ( x86caps.Flags2 >> 20 ) & 1; //sse4.2 + return (u32)( _CPUSpeedHz( span / 500 ) / 2000 ); +} +// Special extended version of SIMD testning, which uses exceptions to double-check the presence +// of SSE2/3/4 instructions. Useful if you don't trust cpuid (at least one report of an invalid +// cpuid has been reported on a Core2 Quad -- the user fixed it by clearing his CMOS). +// +// Results of CPU +void x86capabilities::SIMD_ExceptionTest() +{ HostSys::MemProtectStatic( recSSE, Protect_ReadWrite, true ); ////////////////////////////////////////////////////////////////////////////////////////// @@ -299,6 +330,8 @@ void cpudetectInit() xMOVDQU( xmm1, ptr[ecx] ); xRET(); + HostSys::MemProtectStatic( recSSE, Protect_ReadOnly, true ); + bool sse3_result = _test_instruction( recSSE ); // sse3 bool ssse3_result = _test_instruction( funcSSSE3 ); bool sse41_result = _test_instruction( funcSSE41 ); @@ -308,52 +341,32 @@ void cpudetectInit() // more reliable gauge of the cpu's actual ability. But since a difference in bit // and actual ability may represent a cmos/bios problem, we report it to the user. - if( sse3_result != !!x86caps.hasStreamingSIMD3Extensions ) + if( sse3_result != !!hasStreamingSIMD3Extensions ) { Console.Warning( "SSE3 Detection Inconsistency: cpuid=%s, test_result=%s", - bool_to_char( !!x86caps.hasStreamingSIMD3Extensions ), bool_to_char( sse3_result ) ); + bool_to_char( !!hasStreamingSIMD3Extensions ), bool_to_char( sse3_result ) ); - x86caps.hasStreamingSIMD3Extensions = sse3_result; + hasStreamingSIMD3Extensions = sse3_result; } - if( ssse3_result != !!x86caps.hasSupplementalStreamingSIMD3Extensions ) + if( ssse3_result != !!hasSupplementalStreamingSIMD3Extensions ) { Console.Warning( "SSSE3 Detection Inconsistency: cpuid=%s, test_result=%s", - bool_to_char( !!x86caps.hasSupplementalStreamingSIMD3Extensions ), bool_to_char( ssse3_result ) ); + bool_to_char( !!hasSupplementalStreamingSIMD3Extensions ), bool_to_char( ssse3_result ) ); - x86caps.hasSupplementalStreamingSIMD3Extensions = ssse3_result; + hasSupplementalStreamingSIMD3Extensions = ssse3_result; } - if( sse41_result != !!x86caps.hasStreamingSIMD4Extensions ) + if( sse41_result != !!hasStreamingSIMD4Extensions ) { Console.Warning( "SSE4 Detection Inconsistency: cpuid=%s, test_result=%s", - bool_to_char( !!x86caps.hasStreamingSIMD4Extensions ), bool_to_char( sse41_result ) ); + bool_to_char( !!hasStreamingSIMD4Extensions ), bool_to_char( sse41_result ) ); - x86caps.hasStreamingSIMD4Extensions = sse41_result; + hasStreamingSIMD4Extensions = sse41_result; } } - //////////////////////////////////////////////////////////////////////////////////////////// - // Establish MXCSR Mask... - - // HACK! For some reason the "proper" fxsave code below causes some kind of stackframe - // corruption in MSVC PGO builds. The culprit appears to be execution of FXSAVE itself, - // since only by not executing FXSAVE is the crash avoided. (note: crash happens later - // in SysDetect). Using a #pragma optimize("",off) also fixes it. - // - // Workaround: We assume the MXCSR mask from the settings of the CPU. SSE2 CPUs have - // a full mask available. SSE and earlier CPUs have a few bits reserved (must be zero). - - EstablishMXCSRmask(); - - //////////////////////////////////////////////////////////////////////////////////////////// - // Core Counting! - - if( !x86caps.hasMultiThreading || LogicalCoresPerPhysicalCPU == 0 ) - LogicalCoresPerPhysicalCPU = 1; - - // This will assign values into x86caps.LogicalCores and PhysicalCores - CountLogicalCores( LogicalCoresPerPhysicalCPU, PhysicalCoresPerPhysicalCPU ); + SIMD_EstablishMXCSRmask(); } diff --git a/pcsx2/System.cpp b/pcsx2/System.cpp index 56f760adeb..8522c3b6fa 100644 --- a/pcsx2/System.cpp +++ b/pcsx2/System.cpp @@ -116,6 +116,8 @@ void SysLogMachineCaps() Console.WriteLn( Color_StrongBlack, "x86-32 Init:" ); + u32 speed = x86caps.CalculateMHz(); + Console.Indent().WriteLn( L"CPU vendor name = %s\n" L"FamilyID = %x\n" @@ -127,9 +129,9 @@ void SysLogMachineCaps() L"x86EFlags = %8.8x", fromUTF8( x86caps.VendorName ).c_str(), x86caps.StepID, fromUTF8( x86caps.FamilyName ).Trim().Trim(false).c_str(), - x86caps.Speed / 1000, x86caps.Speed % 1000, + speed / 1000, speed % 1000, x86caps.PhysicalCores, x86caps.LogicalCores, - fromUTF8( x86caps.TypeName ).c_str(), + x86caps.GetTypeName().c_str(), x86caps.Flags, x86caps.Flags2, x86caps.EFlags ); diff --git a/pcsx2/gui/AppInit.cpp b/pcsx2/gui/AppInit.cpp index c652cb2695..307372c482 100644 --- a/pcsx2/gui/AppInit.cpp +++ b/pcsx2/gui/AppInit.cpp @@ -189,9 +189,9 @@ void Pcsx2App::ReadUserModeSettings() void Pcsx2App::DetectCpuAndUserMode() { - cpudetectInit(); - //x86caps.Identify(); - //x86caps.SIMD_EstablishMXCSRmask(); + x86caps.Identify(); + x86caps.CountCores(); + x86caps.SIMD_EstablishMXCSRmask(); if( !x86caps.hasMultimediaExtensions ) { @@ -413,6 +413,8 @@ typedef void (wxEvtHandler::*pxStuckThreadEventHandler)(pxMessageBoxEvent&); bool Pcsx2App::OnInit() { + InitCPUTicks(); + #define pxAppMethodEventHandler(func) \ (wxObjectEventFunction)(wxEventFunction)wxStaticCastEvent(pxInvokeAppMethodEventFunction, &func )