2009-07-03 00:49:40 +00:00
|
|
|
/* Cpudetection lib
|
2010-05-03 14:08:02 +00:00
|
|
|
* Copyright (C) 2002-2010 PCSX2 Dev Team
|
2009-10-05 11:05:11 +00:00
|
|
|
*
|
2009-09-08 12:08:10 +00:00
|
|
|
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
|
|
|
* of the GNU Lesser General Public License as published by the Free Software Found-
|
|
|
|
* ation, either version 3 of the License, or (at your option) any later version.
|
2009-07-03 00:49:40 +00:00
|
|
|
*
|
2009-09-08 12:08:10 +00:00
|
|
|
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
|
|
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
|
|
* PURPOSE. See the GNU General Public License for more details.
|
2009-07-03 20:12:33 +00:00
|
|
|
*
|
2009-09-08 12:08:10 +00:00
|
|
|
* You should have received a copy of the GNU General Public License along with PCSX2.
|
|
|
|
* If not, see <http://www.gnu.org/licenses/>.
|
2009-07-03 00:49:40 +00:00
|
|
|
*/
|
2009-10-05 11:05:11 +00:00
|
|
|
|
2009-07-03 00:49:40 +00:00
|
|
|
#include "PrecompiledHeader.h"
|
2009-11-16 13:54:32 +00:00
|
|
|
#include "cpudetect_internal.h"
|
2009-10-07 19:20:11 +00:00
|
|
|
#include "internal.h"
|
2016-04-18 17:27:06 +00:00
|
|
|
#include "x86_intrin.h"
|
2009-10-07 19:20:11 +00:00
|
|
|
|
2009-07-03 00:49:40 +00:00
|
|
|
using namespace x86Emitter;
|
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
__aligned16 x86capabilities x86caps;
|
|
|
|
|
|
|
|
// Recompiled code buffer for SSE and MXCSR feature testing.
|
|
|
|
static __pagealigned u8 targetFXSAVE[512];
|
2009-07-03 00:49:40 +00:00
|
|
|
|
2016-03-28 12:33:21 +00:00
|
|
|
x86capabilities::x86capabilities() :
|
|
|
|
isIdentified(false),
|
|
|
|
VendorID(x86Vendor_Unknown),
|
|
|
|
FamilyID(0),
|
|
|
|
Model(0),
|
|
|
|
TypeID(0),
|
|
|
|
StepID(0),
|
|
|
|
Flags(0),
|
|
|
|
Flags2(0),
|
|
|
|
EFlags(0),
|
|
|
|
EFlags2(0),
|
|
|
|
SEFlag(0),
|
|
|
|
AllCapabilities(0),
|
|
|
|
PhysicalCores(0),
|
|
|
|
LogicalCores(0)
|
|
|
|
{
|
|
|
|
memzero(VendorName);
|
|
|
|
memzero(FamilyName);
|
|
|
|
}
|
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
// Warning! We've had problems with the MXCSR detection code causing stack corruption in
|
|
|
|
// MSVC PGO builds. The problem was fixed when I moved the MXCSR code to this function, and
|
|
|
|
// moved the recSSE[] array to a global static (it was local to cpudetectInit). Commented
|
|
|
|
// here in case the nutty crash ever re-surfaces. >_<
|
2015-11-28 21:09:33 +00:00
|
|
|
// Note: recSSE was deleted
|
2010-06-01 03:33:51 +00:00
|
|
|
void x86capabilities::SIMD_EstablishMXCSRmask()
|
|
|
|
{
|
|
|
|
if( !hasStreamingSIMDExtensions ) return;
|
|
|
|
|
|
|
|
MXCSR_Mask.bitmask = 0xFFBF; // MMX/SSE default
|
|
|
|
|
|
|
|
if( hasStreamingSIMD2Extensions )
|
|
|
|
{
|
|
|
|
// This is generally safe assumption, but FXSAVE is the "correct" way to
|
|
|
|
// detect MXCSR masking features of the cpu, so we use it's result below
|
|
|
|
// and override this.
|
|
|
|
|
|
|
|
MXCSR_Mask.bitmask = 0xFFFF; // SSE2 features added
|
|
|
|
}
|
|
|
|
|
2015-11-28 21:09:33 +00:00
|
|
|
// Work for recent enough GCC/CLANG/MSVC 2012
|
|
|
|
_fxsave(&targetFXSAVE);
|
2010-06-01 03:33:51 +00:00
|
|
|
|
|
|
|
u32 result = (u32&)targetFXSAVE[28]; // bytes 28->32 are the MXCSR_Mask.
|
|
|
|
if( result != 0 )
|
|
|
|
MXCSR_Mask.bitmask = result;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Counts the number of cpu cycles executed over the requested number of PerformanceCounter
|
|
|
|
// ticks. Returns that exact count.
|
|
|
|
// For best results you should pick a period of time long enough to get a reading that won't
|
|
|
|
// be prone to rounding error; but short enough that it'll be highly unlikely to be interrupted
|
|
|
|
// by the operating system task switches.
|
|
|
|
s64 x86capabilities::_CPUSpeedHz( u64 time ) const
|
2009-07-03 00:49:40 +00:00
|
|
|
{
|
|
|
|
u64 timeStart, timeStop;
|
2009-12-20 01:28:23 +00:00
|
|
|
s64 startCycle, endCycle;
|
2009-07-03 00:49:40 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
if( ! hasTimeStampCounter )
|
2009-11-16 13:54:32 +00:00
|
|
|
return 0;
|
2009-07-03 00:49:40 +00:00
|
|
|
|
2009-11-16 13:54:32 +00:00
|
|
|
SingleCoreAffinity affinity_lock;
|
2009-07-03 00:49:40 +00:00
|
|
|
|
|
|
|
// Align the cpu execution to a cpuTick boundary.
|
|
|
|
|
2014-08-27 08:49:44 +00:00
|
|
|
// GCC 4.8 has __rdtsc but apparently it causes a crash. Only known working on MSVC
|
2010-06-01 03:33:51 +00:00
|
|
|
do {
|
|
|
|
timeStart = GetCPUTicks();
|
2014-08-27 08:49:44 +00:00
|
|
|
#ifdef _MSC_VER
|
2014-02-23 16:00:55 +00:00
|
|
|
startCycle = __rdtsc();
|
2014-08-27 08:49:44 +00:00
|
|
|
#elif defined(_M_X86_64)
|
|
|
|
unsigned long long low, high;
|
|
|
|
__asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
|
|
|
|
startCycle = low | (high << 32);
|
|
|
|
#else
|
|
|
|
__asm__ __volatile__("rdtsc" : "=A"(startCycle));
|
2014-02-23 16:00:55 +00:00
|
|
|
#endif
|
2010-06-01 03:33:51 +00:00
|
|
|
} while( GetCPUTicks() == timeStart );
|
2009-07-03 00:49:40 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
do {
|
2009-07-03 00:49:40 +00:00
|
|
|
timeStop = GetCPUTicks();
|
2014-08-27 08:49:44 +00:00
|
|
|
#ifdef _MSC_VER
|
2014-02-23 16:00:55 +00:00
|
|
|
endCycle = __rdtsc();
|
2014-08-27 08:49:44 +00:00
|
|
|
#elif defined(_M_X86_64)
|
|
|
|
unsigned long long low, high;
|
|
|
|
__asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
|
|
|
|
endCycle = low | (high << 32);
|
|
|
|
#else
|
|
|
|
__asm__ __volatile__("rdtsc" : "=A"(endCycle));
|
2014-02-23 16:00:55 +00:00
|
|
|
#endif
|
2010-06-01 03:33:51 +00:00
|
|
|
} while( ( timeStop - timeStart ) < time );
|
2009-07-03 00:49:40 +00:00
|
|
|
|
2009-12-20 01:28:23 +00:00
|
|
|
s64 cycleCount = endCycle - startCycle;
|
|
|
|
s64 timeCount = timeStop - timeStart;
|
|
|
|
s64 overrun = timeCount - time;
|
|
|
|
if( !overrun ) return cycleCount;
|
|
|
|
|
|
|
|
// interference could cause us to overshoot the target time, compensate:
|
2010-04-25 00:31:27 +00:00
|
|
|
|
2009-12-20 01:28:23 +00:00
|
|
|
double cyclesPerTick = (double)cycleCount / (double)timeCount;
|
|
|
|
double newCycleCount = (double)cycleCount - (cyclesPerTick * overrun);
|
|
|
|
|
|
|
|
return (s64)newCycleCount;
|
2009-07-03 00:49:40 +00:00
|
|
|
}
|
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
wxString x86capabilities::GetTypeName() const
|
|
|
|
{
|
|
|
|
switch( TypeID )
|
|
|
|
{
|
|
|
|
case 0: return L"Standard OEM";
|
|
|
|
case 1: return L"Overdrive";
|
|
|
|
case 2: return L"Dual";
|
|
|
|
case 3: return L"Reserved";
|
|
|
|
default: return L"Unknown";
|
|
|
|
}
|
|
|
|
}
|
2009-12-08 02:38:13 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
void x86capabilities::CountCores()
|
2009-12-08 02:38:13 +00:00
|
|
|
{
|
2010-06-01 03:33:51 +00:00
|
|
|
Identify();
|
2009-12-08 02:38:13 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
s32 regs[ 4 ];
|
|
|
|
u32 cmds;
|
2009-12-08 02:38:13 +00:00
|
|
|
|
2016-07-16 14:40:29 +00:00
|
|
|
cpuid( regs, 0x80000000 );
|
2010-06-01 03:33:51 +00:00
|
|
|
cmds = regs[ 0 ];
|
2009-12-24 22:22:34 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
// detect multicore for AMD cpu
|
2009-12-08 02:38:13 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
if ((cmds >= 0x80000008) && (VendorID == x86Vendor_AMD) )
|
|
|
|
{
|
2010-06-08 12:09:28 +00:00
|
|
|
// AMD note: they don't support hyperthreading, but they like to flag this true
|
|
|
|
// anyway. Let's force-unflag it until we come up with a better solution.
|
|
|
|
// (note: seems to affect some Phenom II's only? -- Athlon X2's and PhenomI's do
|
|
|
|
// not seem to do this) --air
|
|
|
|
hasMultiThreading = 0;
|
2010-06-01 03:33:51 +00:00
|
|
|
}
|
2009-12-08 02:38:13 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
// This will assign values into LogicalCores and PhysicalCores
|
|
|
|
CountLogicalCores();
|
2009-12-08 02:38:13 +00:00
|
|
|
}
|
|
|
|
|
2016-03-28 12:33:21 +00:00
|
|
|
static const char* tbl_x86vendors[] =
|
2009-07-03 00:49:40 +00:00
|
|
|
{
|
2010-06-01 03:33:51 +00:00
|
|
|
"GenuineIntel",
|
2015-09-10 12:41:55 +00:00
|
|
|
"AuthenticAMD",
|
2010-06-01 03:33:51 +00:00
|
|
|
"Unknown ",
|
|
|
|
};
|
|
|
|
|
|
|
|
// Performs all _cpuid-related activity. This fills *most* of the x86caps structure, except for
|
|
|
|
// the cpuSpeed and the mxcsr masks. Those must be completed manually.
|
|
|
|
void x86capabilities::Identify()
|
|
|
|
{
|
|
|
|
if( isIdentified ) return;
|
|
|
|
isIdentified = true;
|
|
|
|
|
2009-12-08 02:38:13 +00:00
|
|
|
s32 regs[ 4 ];
|
2009-12-07 22:43:16 +00:00
|
|
|
u32 cmds;
|
2010-06-01 03:33:51 +00:00
|
|
|
|
2009-12-07 22:43:16 +00:00
|
|
|
//AMD 64 STUFF
|
2015-11-06 22:18:03 +00:00
|
|
|
#ifdef __x86_64__
|
2009-12-07 22:43:16 +00:00
|
|
|
u32 x86_64_8BITBRANDID;
|
|
|
|
u32 x86_64_12BITBRANDID;
|
2015-11-06 22:18:03 +00:00
|
|
|
#endif
|
2009-12-07 22:43:16 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
memzero( VendorName );
|
2016-07-16 14:40:29 +00:00
|
|
|
cpuid( regs, 0 );
|
2009-12-07 22:43:16 +00:00
|
|
|
|
|
|
|
cmds = regs[ 0 ];
|
2010-06-01 03:33:51 +00:00
|
|
|
((u32*)VendorName)[ 0 ] = regs[ 1 ];
|
|
|
|
((u32*)VendorName)[ 1 ] = regs[ 3 ];
|
|
|
|
((u32*)VendorName)[ 2 ] = regs[ 2 ];
|
2009-12-07 22:43:16 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
// Determine Vendor Specifics!
|
|
|
|
// It's really not recommended that we base much (if anything) on CPU vendor names,
|
|
|
|
// however it's currently necessary in order to gain a (pseudo)reliable count of cores
|
|
|
|
// and threads used by the CPU (AMD and Intel can't agree on how to make this info available).
|
|
|
|
|
|
|
|
int& vid = (int&)VendorID;
|
|
|
|
for( vid=0; vid<x86Vendor_Unknown; ++vid )
|
|
|
|
{
|
|
|
|
if( memcmp( VendorName, tbl_x86vendors[vid], 12 ) == 0 ) break;
|
|
|
|
}
|
2009-12-07 22:43:16 +00:00
|
|
|
|
|
|
|
if ( cmds >= 0x00000001 )
|
|
|
|
{
|
2016-07-16 14:40:29 +00:00
|
|
|
cpuid( regs, 0x00000001 );
|
2009-12-07 22:43:16 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
StepID = regs[ 0 ] & 0xf;
|
|
|
|
Model = (regs[ 0 ] >> 4) & 0xf;
|
|
|
|
FamilyID = (regs[ 0 ] >> 8) & 0xf;
|
|
|
|
TypeID = (regs[ 0 ] >> 12) & 0x3;
|
2015-11-06 22:18:03 +00:00
|
|
|
#ifdef __x86_64__
|
2009-12-07 22:43:16 +00:00
|
|
|
x86_64_8BITBRANDID = regs[ 1 ] & 0xff;
|
2015-11-06 22:18:03 +00:00
|
|
|
#endif
|
2010-06-01 03:33:51 +00:00
|
|
|
Flags = regs[ 3 ];
|
|
|
|
Flags2 = regs[ 2 ];
|
2009-12-07 22:43:16 +00:00
|
|
|
}
|
|
|
|
|
2015-11-28 22:39:06 +00:00
|
|
|
if ( cmds >= 0x00000007 )
|
|
|
|
{
|
2015-11-29 10:52:50 +00:00
|
|
|
// Note: ECX must be 0 for AVX2 detection.
|
2016-07-16 14:40:29 +00:00
|
|
|
cpuidex( regs, 0x00000007, 0 );
|
2015-11-28 22:39:06 +00:00
|
|
|
|
|
|
|
SEFlag = regs[ 1 ];
|
|
|
|
}
|
|
|
|
|
2016-07-16 14:40:29 +00:00
|
|
|
cpuid( regs, 0x80000000 );
|
2009-12-07 22:43:16 +00:00
|
|
|
cmds = regs[ 0 ];
|
|
|
|
if ( cmds >= 0x80000001 )
|
|
|
|
{
|
2016-07-16 14:40:29 +00:00
|
|
|
cpuid( regs, 0x80000001 );
|
2009-12-07 22:43:16 +00:00
|
|
|
|
2015-11-06 22:18:03 +00:00
|
|
|
#ifdef __x86_64__
|
2009-12-07 22:43:16 +00:00
|
|
|
x86_64_12BITBRANDID = regs[1] & 0xfff;
|
2015-11-06 22:18:03 +00:00
|
|
|
#endif
|
2010-06-01 03:33:51 +00:00
|
|
|
EFlags2 = regs[ 2 ];
|
|
|
|
EFlags = regs[ 3 ];
|
2009-12-07 22:43:16 +00:00
|
|
|
}
|
2010-04-25 00:31:27 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
memzero( FamilyName );
|
2016-07-16 14:40:29 +00:00
|
|
|
cpuid( (int*)FamilyName, 0x80000002);
|
|
|
|
cpuid( (int*)(FamilyName+16), 0x80000003);
|
|
|
|
cpuid( (int*)(FamilyName+32), 0x80000004);
|
2010-06-01 03:33:51 +00:00
|
|
|
|
|
|
|
hasFloatingPointUnit = ( Flags >> 0 ) & 1;
|
|
|
|
hasVirtual8086ModeEnhancements = ( Flags >> 1 ) & 1;
|
|
|
|
hasDebuggingExtensions = ( Flags >> 2 ) & 1;
|
|
|
|
hasPageSizeExtensions = ( Flags >> 3 ) & 1;
|
|
|
|
hasTimeStampCounter = ( Flags >> 4 ) & 1;
|
|
|
|
hasModelSpecificRegisters = ( Flags >> 5 ) & 1;
|
|
|
|
hasPhysicalAddressExtension = ( Flags >> 6 ) & 1;
|
|
|
|
hasMachineCheckArchitecture = ( Flags >> 7 ) & 1;
|
|
|
|
hasCOMPXCHG8BInstruction = ( Flags >> 8 ) & 1;
|
|
|
|
hasAdvancedProgrammableInterruptController = ( Flags >> 9 ) & 1;
|
|
|
|
hasSEPFastSystemCall = ( Flags >> 11 ) & 1;
|
|
|
|
hasMemoryTypeRangeRegisters = ( Flags >> 12 ) & 1;
|
|
|
|
hasPTEGlobalFlag = ( Flags >> 13 ) & 1;
|
|
|
|
hasMachineCheckArchitecture = ( Flags >> 14 ) & 1;
|
|
|
|
hasConditionalMoveAndCompareInstructions = ( Flags >> 15 ) & 1;
|
|
|
|
hasFGPageAttributeTable = ( Flags >> 16 ) & 1;
|
|
|
|
has36bitPageSizeExtension = ( Flags >> 17 ) & 1;
|
|
|
|
hasProcessorSerialNumber = ( Flags >> 18 ) & 1;
|
|
|
|
hasCFLUSHInstruction = ( Flags >> 19 ) & 1;
|
|
|
|
hasDebugStore = ( Flags >> 21 ) & 1;
|
|
|
|
hasACPIThermalMonitorAndClockControl = ( Flags >> 22 ) & 1;
|
|
|
|
hasFastStreamingSIMDExtensionsSaveRestore = ( Flags >> 24 ) & 1;
|
|
|
|
hasStreamingSIMDExtensions = ( Flags >> 25 ) & 1; //sse
|
|
|
|
hasStreamingSIMD2Extensions = ( Flags >> 26 ) & 1; //sse2
|
|
|
|
hasSelfSnoop = ( Flags >> 27 ) & 1;
|
|
|
|
hasMultiThreading = ( Flags >> 28 ) & 1;
|
|
|
|
hasThermalMonitor = ( Flags >> 29 ) & 1;
|
|
|
|
hasIntel64BitArchitecture = ( Flags >> 30 ) & 1;
|
|
|
|
|
|
|
|
// -------------------------------------------------
|
|
|
|
// --> SSE3 / SSSE3 / SSE4.1 / SSE 4.2 detection <--
|
|
|
|
// -------------------------------------------------
|
2009-12-07 22:43:16 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
hasStreamingSIMD3Extensions = ( Flags2 >> 0 ) & 1; //sse3
|
|
|
|
hasSupplementalStreamingSIMD3Extensions = ( Flags2 >> 9 ) & 1; //ssse3
|
|
|
|
hasStreamingSIMD4Extensions = ( Flags2 >> 19 ) & 1; //sse4.1
|
|
|
|
hasStreamingSIMD4Extensions2 = ( Flags2 >> 20 ) & 1; //sse4.2
|
2016-03-28 12:33:21 +00:00
|
|
|
|
2011-02-12 21:45:16 +00:00
|
|
|
if((Flags2 >> 27) & 1) // OSXSAVE
|
|
|
|
{
|
2015-11-29 10:52:50 +00:00
|
|
|
if((_xgetbv(0) & 6) == 6) // XFEATURE_ENABLED_MASK[2:1] = '11b' (XMM state and YMM state are enabled by OS).
|
2011-02-12 21:45:16 +00:00
|
|
|
{
|
|
|
|
hasAVX = ( Flags2 >> 28 ) & 1; //avx
|
|
|
|
hasFMA = ( Flags2 >> 12 ) & 1; //fma
|
2015-11-28 22:39:06 +00:00
|
|
|
hasAVX2 = ( SEFlag >> 5 ) & 1; //avx2
|
2011-02-12 21:45:16 +00:00
|
|
|
}
|
|
|
|
}
|
2009-07-03 20:12:33 +00:00
|
|
|
|
2016-01-09 21:31:49 +00:00
|
|
|
hasBMI1 = ( SEFlag >> 3 ) & 1;
|
|
|
|
hasBMI2 = ( SEFlag >> 8 ) & 1;
|
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
// Ones only for AMDs:
|
|
|
|
hasAMD64BitArchitecture = ( EFlags >> 29 ) & 1; //64bit cpu
|
|
|
|
hasStreamingSIMD4ExtensionsA = ( EFlags2 >> 6 ) & 1; //INSERTQ / EXTRQ / MOVNT
|
2009-11-02 07:00:59 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
isIdentified = true;
|
|
|
|
}
|
2009-07-03 00:49:40 +00:00
|
|
|
|
2010-06-01 03:33:51 +00:00
|
|
|
u32 x86capabilities::CalculateMHz() const
|
|
|
|
{
|
2009-07-03 00:49:40 +00:00
|
|
|
InitCPUTicks();
|
|
|
|
u64 span = GetTickFrequency();
|
|
|
|
|
|
|
|
if( (span % 1000) < 400 ) // helps minimize rounding errors
|
2010-06-01 03:33:51 +00:00
|
|
|
return (u32)( _CPUSpeedHz( span / 1000 ) / 1000 );
|
2009-07-03 00:49:40 +00:00
|
|
|
else
|
2010-06-01 03:33:51 +00:00
|
|
|
return (u32)( _CPUSpeedHz( span / 500 ) / 2000 );
|
|
|
|
}
|