/* Cpudetection lib * Copyright (C) 2002-2010 PCSX2 Dev Team * * PCSX2 is free software: you can redistribute it and/or modify it under the terms * of the GNU Lesser General Public License as published by the Free Software Found- * ation, either version 3 of the License, or (at your option) any later version. * * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with PCSX2. * If not, see . */ #include "PrecompiledHeader.h" #include "cpudetect_internal.h" #include "internal.h" using namespace x86Emitter; __aligned16 x86capabilities x86caps; // Recompiled code buffer for SSE and MXCSR feature testing. static __pagealigned u8 targetFXSAVE[512]; // Warning! We've had problems with the MXCSR detection code causing stack corruption in // MSVC PGO builds. The problem was fixed when I moved the MXCSR code to this function, and // moved the recSSE[] array to a global static (it was local to cpudetectInit). Commented // here in case the nutty crash ever re-surfaces. >_< // Note: recSSE was deleted void x86capabilities::SIMD_EstablishMXCSRmask() { if( !hasStreamingSIMDExtensions ) return; MXCSR_Mask.bitmask = 0xFFBF; // MMX/SSE default if( hasStreamingSIMD2Extensions ) { // This is generally safe assumption, but FXSAVE is the "correct" way to // detect MXCSR masking features of the cpu, so we use it's result below // and override this. MXCSR_Mask.bitmask = 0xFFFF; // SSE2 features added } // Work for recent enough GCC/CLANG/MSVC 2012 _fxsave(&targetFXSAVE); u32 result = (u32&)targetFXSAVE[28]; // bytes 28->32 are the MXCSR_Mask. if( result != 0 ) MXCSR_Mask.bitmask = result; } // Counts the number of cpu cycles executed over the requested number of PerformanceCounter // ticks. Returns that exact count. // For best results you should pick a period of time long enough to get a reading that won't // be prone to rounding error; but short enough that it'll be highly unlikely to be interrupted // by the operating system task switches. s64 x86capabilities::_CPUSpeedHz( u64 time ) const { u64 timeStart, timeStop; s64 startCycle, endCycle; if( ! hasTimeStampCounter ) return 0; SingleCoreAffinity affinity_lock; // Align the cpu execution to a cpuTick boundary. // GCC 4.8 has __rdtsc but apparently it causes a crash. Only known working on MSVC do { timeStart = GetCPUTicks(); #ifdef _MSC_VER startCycle = __rdtsc(); #elif defined(_M_X86_64) unsigned long long low, high; __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high)); startCycle = low | (high << 32); #else __asm__ __volatile__("rdtsc" : "=A"(startCycle)); #endif } while( GetCPUTicks() == timeStart ); do { timeStop = GetCPUTicks(); #ifdef _MSC_VER endCycle = __rdtsc(); #elif defined(_M_X86_64) unsigned long long low, high; __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high)); endCycle = low | (high << 32); #else __asm__ __volatile__("rdtsc" : "=A"(endCycle)); #endif } while( ( timeStop - timeStart ) < time ); s64 cycleCount = endCycle - startCycle; s64 timeCount = timeStop - timeStart; s64 overrun = timeCount - time; if( !overrun ) return cycleCount; // interference could cause us to overshoot the target time, compensate: double cyclesPerTick = (double)cycleCount / (double)timeCount; double newCycleCount = (double)cycleCount - (cyclesPerTick * overrun); return (s64)newCycleCount; } wxString x86capabilities::GetTypeName() const { switch( TypeID ) { case 0: return L"Standard OEM"; case 1: return L"Overdrive"; case 2: return L"Dual"; case 3: return L"Reserved"; default: return L"Unknown"; } } void x86capabilities::CountCores() { Identify(); s32 regs[ 4 ]; u32 cmds; __cpuid( regs, 0x80000000 ); cmds = regs[ 0 ]; // detect multicore for AMD cpu if ((cmds >= 0x80000008) && (VendorID == x86Vendor_AMD) ) { // AMD note: they don't support hyperthreading, but they like to flag this true // anyway. Let's force-unflag it until we come up with a better solution. // (note: seems to affect some Phenom II's only? -- Athlon X2's and PhenomI's do // not seem to do this) --air hasMultiThreading = 0; } // This will assign values into LogicalCores and PhysicalCores CountLogicalCores(); } static const char* tbl_x86vendors[] = { "GenuineIntel", "AuthenticAMD", "Unknown ", }; // Performs all _cpuid-related activity. This fills *most* of the x86caps structure, except for // the cpuSpeed and the mxcsr masks. Those must be completed manually. void x86capabilities::Identify() { if( isIdentified ) return; isIdentified = true; s32 regs[ 4 ]; u32 cmds; //AMD 64 STUFF #ifdef __x86_64__ u32 x86_64_8BITBRANDID; u32 x86_64_12BITBRANDID; #endif memzero( VendorName ); __cpuid( regs, 0 ); cmds = regs[ 0 ]; ((u32*)VendorName)[ 0 ] = regs[ 1 ]; ((u32*)VendorName)[ 1 ] = regs[ 3 ]; ((u32*)VendorName)[ 2 ] = regs[ 2 ]; // Determine Vendor Specifics! // It's really not recommended that we base much (if anything) on CPU vendor names, // however it's currently necessary in order to gain a (pseudo)reliable count of cores // and threads used by the CPU (AMD and Intel can't agree on how to make this info available). int& vid = (int&)VendorID; for( vid=0; vid= 0x00000001 ) { __cpuid( regs, 0x00000001 ); StepID = regs[ 0 ] & 0xf; Model = (regs[ 0 ] >> 4) & 0xf; FamilyID = (regs[ 0 ] >> 8) & 0xf; TypeID = (regs[ 0 ] >> 12) & 0x3; #ifdef __x86_64__ x86_64_8BITBRANDID = regs[ 1 ] & 0xff; #endif Flags = regs[ 3 ]; Flags2 = regs[ 2 ]; } if ( cmds >= 0x00000007 ) { // Note: ECX must be 0 for AVX2 detection. __cpuidex( regs, 0x00000007, 0 ); SEFlag = regs[ 1 ]; } __cpuid( regs, 0x80000000 ); cmds = regs[ 0 ]; if ( cmds >= 0x80000001 ) { __cpuid( regs, 0x80000001 ); #ifdef __x86_64__ x86_64_12BITBRANDID = regs[1] & 0xfff; #endif EFlags2 = regs[ 2 ]; EFlags = regs[ 3 ]; } memzero( FamilyName ); __cpuid( (int*)FamilyName, 0x80000002); __cpuid( (int*)(FamilyName+16), 0x80000003); __cpuid( (int*)(FamilyName+32), 0x80000004); hasFloatingPointUnit = ( Flags >> 0 ) & 1; hasVirtual8086ModeEnhancements = ( Flags >> 1 ) & 1; hasDebuggingExtensions = ( Flags >> 2 ) & 1; hasPageSizeExtensions = ( Flags >> 3 ) & 1; hasTimeStampCounter = ( Flags >> 4 ) & 1; hasModelSpecificRegisters = ( Flags >> 5 ) & 1; hasPhysicalAddressExtension = ( Flags >> 6 ) & 1; hasMachineCheckArchitecture = ( Flags >> 7 ) & 1; hasCOMPXCHG8BInstruction = ( Flags >> 8 ) & 1; hasAdvancedProgrammableInterruptController = ( Flags >> 9 ) & 1; hasSEPFastSystemCall = ( Flags >> 11 ) & 1; hasMemoryTypeRangeRegisters = ( Flags >> 12 ) & 1; hasPTEGlobalFlag = ( Flags >> 13 ) & 1; hasMachineCheckArchitecture = ( Flags >> 14 ) & 1; hasConditionalMoveAndCompareInstructions = ( Flags >> 15 ) & 1; hasFGPageAttributeTable = ( Flags >> 16 ) & 1; has36bitPageSizeExtension = ( Flags >> 17 ) & 1; hasProcessorSerialNumber = ( Flags >> 18 ) & 1; hasCFLUSHInstruction = ( Flags >> 19 ) & 1; hasDebugStore = ( Flags >> 21 ) & 1; hasACPIThermalMonitorAndClockControl = ( Flags >> 22 ) & 1; hasFastStreamingSIMDExtensionsSaveRestore = ( Flags >> 24 ) & 1; hasStreamingSIMDExtensions = ( Flags >> 25 ) & 1; //sse hasStreamingSIMD2Extensions = ( Flags >> 26 ) & 1; //sse2 hasSelfSnoop = ( Flags >> 27 ) & 1; hasMultiThreading = ( Flags >> 28 ) & 1; hasThermalMonitor = ( Flags >> 29 ) & 1; hasIntel64BitArchitecture = ( Flags >> 30 ) & 1; // ------------------------------------------------- // --> SSE3 / SSSE3 / SSE4.1 / SSE 4.2 detection <-- // ------------------------------------------------- hasStreamingSIMD3Extensions = ( Flags2 >> 0 ) & 1; //sse3 hasSupplementalStreamingSIMD3Extensions = ( Flags2 >> 9 ) & 1; //ssse3 hasStreamingSIMD4Extensions = ( Flags2 >> 19 ) & 1; //sse4.1 hasStreamingSIMD4Extensions2 = ( Flags2 >> 20 ) & 1; //sse4.2 if((Flags2 >> 27) & 1) // OSXSAVE { if((_xgetbv(0) & 6) == 6) // XFEATURE_ENABLED_MASK[2:1] = '11b' (XMM state and YMM state are enabled by OS). { hasAVX = ( Flags2 >> 28 ) & 1; //avx hasFMA = ( Flags2 >> 12 ) & 1; //fma hasAVX2 = ( SEFlag >> 5 ) & 1; //avx2 } } hasBMI1 = ( SEFlag >> 3 ) & 1; hasBMI2 = ( SEFlag >> 8 ) & 1; // Ones only for AMDs: hasAMD64BitArchitecture = ( EFlags >> 29 ) & 1; //64bit cpu hasStreamingSIMD4ExtensionsA = ( EFlags2 >> 6 ) & 1; //INSERTQ / EXTRQ / MOVNT isIdentified = true; } u32 x86capabilities::CalculateMHz() const { InitCPUTicks(); u64 span = GetTickFrequency(); if( (span % 1000) < 400 ) // helps minimize rounding errors return (u32)( _CPUSpeedHz( span / 1000 ) / 1000 ); else return (u32)( _CPUSpeedHz( span / 500 ) / 2000 ); }