mirror of https://github.com/PCSX2/pcsx2.git
x86emitter: Fix Windows AVX2 detection
Technically it did work, but it was undefined behaviour. Use __cpuidex, which allows setting ecx as well. Also fix the _xgetbv intrinsic - I have no idea how PCSX2 even compiled on Windows.
This commit is contained in:
parent
7055e29670
commit
4d97089990
|
@ -27,7 +27,6 @@
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
# include <intrin.h>
|
# include <intrin.h>
|
||||||
extern "C" unsigned __int64 __xgetbv(int);
|
|
||||||
#else
|
#else
|
||||||
# include <intrin_x86.h>
|
# include <intrin_x86.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -96,11 +96,15 @@ static __inline__ __attribute__((always_inline)) s32 _InterlockedIncrement(volat
|
||||||
/*** System information ***/
|
/*** System information ***/
|
||||||
static __inline__ __attribute__((always_inline)) void __cpuid(int CPUInfo[], const int InfoType)
|
static __inline__ __attribute__((always_inline)) void __cpuid(int CPUInfo[], const int InfoType)
|
||||||
{
|
{
|
||||||
// ECX allow to select the leaf. Leaf 0 is the one that you want to get, so I just xor the register
|
__asm__ __volatile__("cpuid": "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
|
||||||
__asm__ __volatile__("xor %%ecx, %%ecx\n" "cpuid": "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __attribute__((always_inline)) unsigned long long __xgetbv(unsigned int index)
|
static __inline__ __attribute__((always_inline)) void __cpuidex(int CPUInfo[], const int level, const int count)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__("cpuid": "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (level), "c" (count));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ __attribute__((always_inline)) unsigned long long _xgetbv(unsigned int index)
|
||||||
{
|
{
|
||||||
unsigned int eax, edx;
|
unsigned int eax, edx;
|
||||||
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
||||||
|
|
|
@ -210,8 +210,8 @@ void x86capabilities::Identify()
|
||||||
|
|
||||||
if ( cmds >= 0x00000007 )
|
if ( cmds >= 0x00000007 )
|
||||||
{
|
{
|
||||||
// Note: ECX must be 0. I did it directly in the __cpuid asm instrinsic
|
// Note: ECX must be 0 for AVX2 detection.
|
||||||
__cpuid( regs, 0x00000007 );
|
__cpuidex( regs, 0x00000007, 0 );
|
||||||
|
|
||||||
SEFlag = regs[ 1 ];
|
SEFlag = regs[ 1 ];
|
||||||
}
|
}
|
||||||
|
@ -275,7 +275,7 @@ void x86capabilities::Identify()
|
||||||
|
|
||||||
if((Flags2 >> 27) & 1) // OSXSAVE
|
if((Flags2 >> 27) & 1) // OSXSAVE
|
||||||
{
|
{
|
||||||
if((__xgetbv(0) & 6) == 6) // XFEATURE_ENABLED_MASK[2:1] = '11b' (XMM state and YMM state are enabled by OS).
|
if((_xgetbv(0) & 6) == 6) // XFEATURE_ENABLED_MASK[2:1] = '11b' (XMM state and YMM state are enabled by OS).
|
||||||
{
|
{
|
||||||
hasAVX = ( Flags2 >> 28 ) & 1; //avx
|
hasAVX = ( Flags2 >> 28 ) & 1; //avx
|
||||||
hasFMA = ( Flags2 >> 12 ) & 1; //fma
|
hasFMA = ( Flags2 >> 12 ) & 1; //fma
|
||||||
|
|
Loading…
Reference in New Issue