Interpreter: support non-IEEE mode emulation

v2: fix fxsave on visual studio, thx @ rodolfo for this patch
2013-10-24 13:52:22 +02:00 · 2013-10-24 13:52:22 +02:00 · 466a7afde3
parent 710a0ff435
commit 466a7afde3
6 changed files with 53 additions and 19 deletions
--- a/Source/Core/Common/Src/CPUDetect.h
+++ b/Source/Core/Common/Src/CPUDetect.h
@ -43,6 +43,9 @@ struct CPUInfo
 	bool bAVX;
 	bool bFMA;
 	bool bAES;
 	// FXSAVE/FXRSTOR
 	bool bFXSR;
 	bool bDAZ;
 	bool bLAHFSAHF64;
 	bool bLongMode;
--- a/Source/Core/Common/Src/FPURoundMode.h
+++ b/Source/Core/Common/Src/FPURoundMode.h
@ -36,7 +36,7 @@ namespace FPURoundMode
 	void SetPrecisionMode(u32 mode);
-	void SetSIMDMode(u32 mode);
+	void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode);
 /*
 * There are two different flavors of float to int conversion:
--- a/Source/Core/Common/Src/GenericFPURoundMode.cpp
+++ b/Source/Core/Common/Src/GenericFPURoundMode.cpp
@ -26,7 +26,7 @@ namespace FPURoundMode
 	void SetPrecisionMode(u32 mode)
 	{
 	}
-	void SetSIMDMode(u32 mode)
+	void SetSIMDMode(u32 mode, u32 nonIEEEMode)
 	{
 	}
 	void SaveSIMDState()
--- a/Source/Core/Common/Src/x64CPUDetect.cpp
+++ b/Source/Core/Common/Src/x64CPUDetect.cpp
@ -162,6 +162,28 @@ void CPUInfo::Detect()
 		if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true;
 		if ((cpu_id[2] >> 25) & 1) bAES = true;
 		if ((cpu_id[3] >> 24) & 1)
 		{
 			// We can use FXSAVE.
 			bFXSR = true;
 			GC_ALIGNED16(u8 fx_state[512]);
 			memset(fx_state, 0, sizeof(fx_state));
 #ifdef _WIN32
 #ifdef _M_IX86
 			_fxsave(fx_state);
 #elif defined (_M_X64)
 			_fxsave64(fx_state);
 #endif
 #else
 			__asm__("fxsave %0" : "=m" (fx_state));
 #endif
 			// lowest byte of MXCSR_MASK
 			if ((fx_state[0x1C] >> 6) & 1)
 				bDAZ = true;
 		}
 		// AVX support requires 3 separate checks:
 		//  - Is the AVX bit set in CPUID?
 		//  - Is the XSAVE bit set in CPUID?
--- a/Source/Core/Common/Src/x64FPURoundMode.cpp
+++ b/Source/Core/Common/Src/x64FPURoundMode.cpp
@ -4,6 +4,7 @@
 #include "Common.h"
 #include "FPURoundMode.h"
 #include "CPUDetect.h"
 #ifndef _WIN32
 static const unsigned short FPU_ROUND_NEAR = 0 << 10;
@ -14,8 +15,11 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
 #include <xmmintrin.h>
 #endif
-const u32 MASKS = 0x1F80;  // mask away the interrupts.
+// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
 const u32 EXCEPTION_MASK = 0x1F80;
 // Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
 const u32 DAZ = 0x40;
 // Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
 const u32 FTZ = 0x8000;
 namespace FPURoundMode
@ -79,16 +83,28 @@ namespace FPURoundMode
 			//but still - set any useful sse options here
 		#endif
 	}
-	void SetSIMDMode(u32 mode)
+
 	void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode)
 	{
-		static const u32 ssetable[4] =
+		// lookup table for FPSCR.RN-to-MXCSR.RC translation
 		static const u32 roundingModeLUT[4] =
 		{
-			(0 << 13) | MASKS,
+			(0 << 13) | EXCEPTION_MASK, // nearest
-			(3 << 13) | MASKS,
+			(3 << 13) | EXCEPTION_MASK, // -inf
-			(2 << 13) | MASKS,
+			(2 << 13) | EXCEPTION_MASK, // +inf
-			(1 << 13) | MASKS,
+			(1 << 13) | EXCEPTION_MASK, // zero
 		};
-		u32 csr = ssetable[mode];
+		u32 csr = roundingModeLUT[roundingMode];
 		static const u32 denormalLUT[2] =
 		{
 			FTZ,       // flush-to-zero only
 			FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
 		};
 		if (nonIEEEMode)
 		{
 			csr |= denormalLUT[cpu_info.bDAZ];
 		}
 		_mm_setcsr(csr);
 	}
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
@ -48,15 +48,8 @@ static void FPSCRtoFPUSettings(UReg_FPSCR fp)
 		// Pokemon Colosseum does this. Gah.
 	}
-	// Also corresponding SSE rounding mode setting
+	// Set SSE rounding mode and denormal handling
-	if (FPSCR.NI)
+	FPURoundMode::SetSIMDMode(FPSCR.RN, FPSCR.NI);
 	{
 		// Either one of these two breaks Beyond Good & Evil.
 		// if (cpu_info.bSSSE3)
 		//     csr |= DAZ;
 		// csr |= FTZ;
 	}
 	FPURoundMode::SetSIMDMode(FPSCR.RN);
 }
 void Interpreter::mtfsb0x(UGeckoInstruction _inst)