Fix PPC_FP on non-sse4.1 code paths.

The Invalid bit on the x87 fpu is sticky, so once a single NaN goes through the old code on CPUs without sse4.1 all future floats are mutilated. Patch to emulate PTEST by Fiora. Fixes issue 7237 and issue 7510.
2014-07-27 20:55:47 +12:00 · 2014-07-27 20:55:47 +12:00 · 8c857b45f8
parent 96cfbd1bb0
commit 8c857b45f8
2 changed files with 18 additions and 5 deletions
--- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
@ -551,6 +551,8 @@ void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) {
 static u32 GC_ALIGNED16(temp32);
 static u64 GC_ALIGNED16(temp64);

+static const float GC_ALIGNED16(m_zero[]) = { 0.0f, 0.0f, 0.0f, 0.0f };
+
 #if _M_X86_64
 static const __m128i GC_ALIGNED16(single_qnan_bit) = _mm_set_epi64x(0, 0x0000000000400000);
 static const __m128i GC_ALIGNED16(single_exponent) = _mm_set_epi64x(0, 0x000000007f800000);
@ -669,8 +671,13 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
 		PTEST(XMM1, M((void *)&double_exponent));
 		cond = CC_NC;
 	} else {
-		FNSTSW_AX();
-		TEST(16, R(AX), Imm16(x87_InvalidOperation));
+		// emulate PTEST; checking FPU flags is incorrect because the NaN bits
+		// are sticky (persist between instructions)
+		MOVSD(XMM0, M((void *)&double_exponent));
+		PAND(XMM0, R(XMM1));
+		PCMPEQB(XMM0, M((void *)&m_zero));
+		PMOVMSKB(EAX, R(XMM0));
+		CMP(32, R(EAX), Imm32(0xffff));
 		cond = CC_Z;
 	}
 	FSTP(32, M(&temp32));
@ -706,8 +713,13 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
 		PTEST(XMM1, M((void *)&single_exponent));
 		cond = CC_NC;
 	} else {
-		FNSTSW_AX();
-		TEST(16, R(AX), Imm16(x87_InvalidOperation));
+		// emulate PTEST; checking FPU flags is incorrect because the NaN bits
+		// are sticky (persist between instructions)
+		MOVSS(XMM0, M((void *)&single_exponent));
+		PAND(XMM0, R(XMM1));
+		PCMPEQB(XMM0, M((void *)&m_zero));
+		PMOVMSKB(EAX, R(XMM0));
+		CMP(32, R(EAX), Imm32(0xffff));
 		cond = CC_Z;
 	}
 	FSTP(64, M(&temp64));
--- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h
+++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h
@ -57,8 +57,9 @@ public:
 	void ForceSinglePrecisionS(Gen::X64Reg xmm);
 	void ForceSinglePrecisionP(Gen::X64Reg xmm);

-	// AX might get trashed
+	// EAX might get trashed
 	void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false);
+	// EAX might get trashed
 	void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
 protected:
 	std::unordered_map<u8 *, u32> registersInUseAtLoc;