From 2795376b61371069ef2949eb31dbbddd7d9e209c Mon Sep 17 00:00:00 2001
From: JMC47 <JMC4789@gmail.com>
Date: Thu, 31 May 2018 08:41:47 -0400
Subject: [PATCH] Enable Accurate Double to Single Conversion

---
 .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 76 ++-----------------
 1 file changed, 6 insertions(+), 70 deletions(-)
diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
index 8c437ac5f6..18c78e872d 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
@@ -864,20 +864,15 @@ void EmuCodeBlock::Force25BitPrecision(X64Reg output, const OpArg& input, X64Reg
   }
 }
 
-// Since the following float conversion functions are used in non-arithmetic PPC float instructions,
-// they must convert floats bitexact and never flush denormals to zero or turn SNaNs into QNaNs.
-// This means we can't use CVTSS2SD/CVTSD2SS :(
-// The x87 FPU doesn't even support flush-to-zero so we can use FLD+FSTP even on denormals.
+// Since the following float conversion functions are used in non-arithmetic PPC float
+// instructions, they must convert floats bitexact and never flush denormals to zero or turn SNaNs
+// into QNaNs. This means we can't use CVTSS2SD/CVTSD2SS. The x87 FPU doesn't even support
+// flush-to-zero so we can use FLD+FSTP even on denormals.
 // If the number is a NaN, make sure to set the QNaN bit back to its original value.
 
 // Another problem is that officially, converting doubles to single format results in undefined
-// behavior.
-// Relying on undefined behavior is a bug so no software should ever do this.
-// In case it does happen, phire's more accurate implementation of ConvertDoubleToSingle() is
-// reproduced below.
-
-//#define MORE_ACCURATE_DOUBLETOSINGLE
-#ifdef MORE_ACCURATE_DOUBLETOSINGLE
+// behavior.  Relying on undefined behavior is a bug so no software should ever do this.
+// Super Mario 64 (on Wii VC) accidentally relies on this behavior.  See issue #11173
 
 alignas(16) static const __m128i double_exponent = _mm_set_epi64x(0, 0x7ff0000000000000);
 alignas(16) static const __m128i double_fraction = _mm_set_epi64x(0, 0x000fffffffffffff);
@@ -954,65 +949,6 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
   MOVDDUP(dst, R(XMM1));
 }
 
-#else   // MORE_ACCURATE_DOUBLETOSINGLE
-
-alignas(16) static const __m128i double_sign_bit = _mm_set_epi64x(0xffffffffffffffff,
-                                                                  0x7fffffffffffffff);
-alignas(16) static const __m128i single_qnan_bit = _mm_set_epi64x(0xffffffffffffffff,
-                                                                  0xffffffffffbfffff);
-alignas(16) static const __m128i double_qnan_bit = _mm_set_epi64x(0xffffffffffffffff,
-                                                                  0xfff7ffffffffffff);
-
-// Smallest positive double that results in a normalized single.
-alignas(16) static const double min_norm_single = std::numeric_limits<float>::min();
-
-void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
-{
-  // Most games have flush-to-zero enabled, which causes the single -> double -> single process here
-  // to be lossy.
-  // This is a problem when games use float operations to copy non-float data.
-  // Changing the FPU mode is very expensive, so we can't do that.
-  // Here, check to see if the source is small enough that it will result in a denormal, and pass it
-  // to the x87 unit
-  // if it is.
-  avx_op(&XEmitter::VPAND, &XEmitter::PAND, XMM0, R(src), MConst(double_sign_bit), true, true);
-  UCOMISD(XMM0, MConst(min_norm_single));
-  FixupBranch nanConversion = J_CC(CC_P, true);
-  FixupBranch denormalConversion = J_CC(CC_B, true);
-  CVTSD2SS(dst, R(src));
-
-  SwitchToFarCode();
-  SetJumpTarget(nanConversion);
-  MOVQ_xmm(R(RSCRATCH), src);
-  // Put the quiet bit into CF.
-  BT(64, R(RSCRATCH), Imm8(51));
-  CVTSD2SS(dst, R(src));
-  FixupBranch continue1 = J_CC(CC_C, true);
-  // Clear the quiet bit of the SNaN, which was 0 (signalling) but got set to 1 (quiet) by
-  // conversion.
-  ANDPS(dst, MConst(single_qnan_bit));
-  FixupBranch continue2 = J(true);
-
-  SetJumpTarget(denormalConversion);
-  // We're using 8 bytes on the stack
-  SUB(64, R(RSP), Imm8(8));
-  MOVSD(MatR(RSP), src);
-  FLD(64, MatR(RSP));
-  FSTP(32, MatR(RSP));
-  MOVSS(dst, MatR(RSP));
-  ADD(64, R(RSP), Imm8(8));
-  FixupBranch continue3 = J(true);
-  SwitchToNearCode();
-
-  SetJumpTarget(continue1);
-  SetJumpTarget(continue2);
-  SetJumpTarget(continue3);
-  // We'd normally need to MOVDDUP here to put the single in the top half of the output register
-  // too, but
-  // this function is only used to go directly to a following store, so we omit the MOVDDUP here.
-}
-#endif  // MORE_ACCURATE_DOUBLETOSINGLE
-
 // Converting single->double is a bit easier because all single denormals are double normals.
 void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr)
 {