From 4d9708999004b73d3bb9cf015ab0d4d4deb05264 Mon Sep 17 00:00:00 2001 From: Jonathan Li Date: Sun, 29 Nov 2015 10:52:50 +0000 Subject: [PATCH] x86emitter: Fix Windows AVX2 detection Technically it did work, but it was undefined behaviour. Use __cpuidex, which allows setting ecx as well. Also fix the _xgetbv intrinsic - I have no idea how PCSX2 even compiled on Windows. --- common/include/Pcsx2Defs.h | 1 - common/include/intrin_x86.h | 10 +++++++--- common/src/x86emitter/cpudetect.cpp | 6 +++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/common/include/Pcsx2Defs.h b/common/include/Pcsx2Defs.h index 1402e19a97..528521b6e0 100644 --- a/common/include/Pcsx2Defs.h +++ b/common/include/Pcsx2Defs.h @@ -27,7 +27,6 @@ #ifdef _MSC_VER # include -extern "C" unsigned __int64 __xgetbv(int); #else # include #endif diff --git a/common/include/intrin_x86.h b/common/include/intrin_x86.h index 076ae49dfb..9665dd141c 100644 --- a/common/include/intrin_x86.h +++ b/common/include/intrin_x86.h @@ -96,11 +96,15 @@ static __inline__ __attribute__((always_inline)) s32 _InterlockedIncrement(volat /*** System information ***/ static __inline__ __attribute__((always_inline)) void __cpuid(int CPUInfo[], const int InfoType) { - // ECX allow to select the leaf. Leaf 0 is the one that you want to get, so I just xor the register - __asm__ __volatile__("xor %%ecx, %%ecx\n" "cpuid": "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType)); + __asm__ __volatile__("cpuid": "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType)); } -static __inline__ __attribute__((always_inline)) unsigned long long __xgetbv(unsigned int index) +static __inline__ __attribute__((always_inline)) void __cpuidex(int CPUInfo[], const int level, const int count) +{ + __asm__ __volatile__("cpuid": "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (level), "c" (count)); +} + +static __inline__ __attribute__((always_inline)) unsigned long long _xgetbv(unsigned int index) { unsigned int eax, edx; __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); diff --git a/common/src/x86emitter/cpudetect.cpp b/common/src/x86emitter/cpudetect.cpp index f6140a6675..2faebbc34d 100644 --- a/common/src/x86emitter/cpudetect.cpp +++ b/common/src/x86emitter/cpudetect.cpp @@ -210,8 +210,8 @@ void x86capabilities::Identify() if ( cmds >= 0x00000007 ) { - // Note: ECX must be 0. I did it directly in the __cpuid asm instrinsic - __cpuid( regs, 0x00000007 ); + // Note: ECX must be 0 for AVX2 detection. + __cpuidex( regs, 0x00000007, 0 ); SEFlag = regs[ 1 ]; } @@ -275,7 +275,7 @@ void x86capabilities::Identify() if((Flags2 >> 27) & 1) // OSXSAVE { - if((__xgetbv(0) & 6) == 6) // XFEATURE_ENABLED_MASK[2:1] = '11b' (XMM state and YMM state are enabled by OS). + if((_xgetbv(0) & 6) == 6) // XFEATURE_ENABLED_MASK[2:1] = '11b' (XMM state and YMM state are enabled by OS). { hasAVX = ( Flags2 >> 28 ) & 1; //avx hasFMA = ( Flags2 >> 12 ) & 1; //fma