Interpreter: support non-IEEE mode emulation

v2: fix fxsave on visual studio, thx @ rodolfo for this patch
This commit is contained in:
Tillmann Karras 2013-10-24 13:52:22 +02:00
parent 710a0ff435
commit 466a7afde3
6 changed files with 53 additions and 19 deletions

View File

@ -43,6 +43,9 @@ struct CPUInfo
bool bAVX; bool bAVX;
bool bFMA; bool bFMA;
bool bAES; bool bAES;
// FXSAVE/FXRSTOR
bool bFXSR;
bool bDAZ;
bool bLAHFSAHF64; bool bLAHFSAHF64;
bool bLongMode; bool bLongMode;

View File

@ -36,7 +36,7 @@ namespace FPURoundMode
void SetPrecisionMode(u32 mode); void SetPrecisionMode(u32 mode);
void SetSIMDMode(u32 mode); void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode);
/* /*
* There are two different flavors of float to int conversion: * There are two different flavors of float to int conversion:

View File

@ -26,7 +26,7 @@ namespace FPURoundMode
void SetPrecisionMode(u32 mode) void SetPrecisionMode(u32 mode)
{ {
} }
void SetSIMDMode(u32 mode) void SetSIMDMode(u32 mode, u32 nonIEEEMode)
{ {
} }
void SaveSIMDState() void SaveSIMDState()

View File

@ -162,6 +162,28 @@ void CPUInfo::Detect()
if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true; if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true;
if ((cpu_id[2] >> 25) & 1) bAES = true; if ((cpu_id[2] >> 25) & 1) bAES = true;
if ((cpu_id[3] >> 24) & 1)
{
// We can use FXSAVE.
bFXSR = true;
GC_ALIGNED16(u8 fx_state[512]);
memset(fx_state, 0, sizeof(fx_state));
#ifdef _WIN32
#ifdef _M_IX86
_fxsave(fx_state);
#elif defined (_M_X64)
_fxsave64(fx_state);
#endif
#else
__asm__("fxsave %0" : "=m" (fx_state));
#endif
// lowest byte of MXCSR_MASK
if ((fx_state[0x1C] >> 6) & 1)
bDAZ = true;
}
// AVX support requires 3 separate checks: // AVX support requires 3 separate checks:
// - Is the AVX bit set in CPUID? // - Is the AVX bit set in CPUID?
// - Is the XSAVE bit set in CPUID? // - Is the XSAVE bit set in CPUID?

View File

@ -4,6 +4,7 @@
#include "Common.h" #include "Common.h"
#include "FPURoundMode.h" #include "FPURoundMode.h"
#include "CPUDetect.h"
#ifndef _WIN32 #ifndef _WIN32
static const unsigned short FPU_ROUND_NEAR = 0 << 10; static const unsigned short FPU_ROUND_NEAR = 0 << 10;
@ -14,8 +15,11 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
#include <xmmintrin.h> #include <xmmintrin.h>
#endif #endif
const u32 MASKS = 0x1F80; // mask away the interrupts. // OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
const u32 EXCEPTION_MASK = 0x1F80;
// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
const u32 DAZ = 0x40; const u32 DAZ = 0x40;
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
const u32 FTZ = 0x8000; const u32 FTZ = 0x8000;
namespace FPURoundMode namespace FPURoundMode
@ -79,16 +83,28 @@ namespace FPURoundMode
//but still - set any useful sse options here //but still - set any useful sse options here
#endif #endif
} }
void SetSIMDMode(u32 mode)
void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode)
{ {
static const u32 ssetable[4] = // lookup table for FPSCR.RN-to-MXCSR.RC translation
static const u32 roundingModeLUT[4] =
{ {
(0 << 13) | MASKS, (0 << 13) | EXCEPTION_MASK, // nearest
(3 << 13) | MASKS, (3 << 13) | EXCEPTION_MASK, // -inf
(2 << 13) | MASKS, (2 << 13) | EXCEPTION_MASK, // +inf
(1 << 13) | MASKS, (1 << 13) | EXCEPTION_MASK, // zero
}; };
u32 csr = ssetable[mode]; u32 csr = roundingModeLUT[roundingMode];
static const u32 denormalLUT[2] =
{
FTZ, // flush-to-zero only
FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
};
if (nonIEEEMode)
{
csr |= denormalLUT[cpu_info.bDAZ];
}
_mm_setcsr(csr); _mm_setcsr(csr);
} }

View File

@ -48,15 +48,8 @@ static void FPSCRtoFPUSettings(UReg_FPSCR fp)
// Pokemon Colosseum does this. Gah. // Pokemon Colosseum does this. Gah.
} }
// Also corresponding SSE rounding mode setting // Set SSE rounding mode and denormal handling
if (FPSCR.NI) FPURoundMode::SetSIMDMode(FPSCR.RN, FPSCR.NI);
{
// Either one of these two breaks Beyond Good & Evil.
// if (cpu_info.bSSSE3)
// csr |= DAZ;
// csr |= FTZ;
}
FPURoundMode::SetSIMDMode(FPSCR.RN);
} }
void Interpreter::mtfsb0x(UGeckoInstruction _inst) void Interpreter::mtfsb0x(UGeckoInstruction _inst)