From 00e7de9297f9353aafd08ecfa99d4c5efb340c47 Mon Sep 17 00:00:00 2001 From: Joel Linn Date: Wed, 16 Feb 2022 00:51:24 +0100 Subject: [PATCH] [CPU] Improve vrsqrtefp accuracy --- src/xenia/cpu/backend/x64/x64_sequences.cc | 30 +++++++++++++++++----- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 08badaa9c..34cef4f7d 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -1,4 +1,4 @@ -/** +/** ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** @@ -2354,21 +2354,39 @@ EMITTER_OPCODE_TABLE(OPCODE_SQRT, SQRT_F32, SQRT_F64, SQRT_V128); // ============================================================================ // OPCODE_RSQRT // ============================================================================ +// Altivec guarantees an error of < 1/4096 for vrsqrtefp while AVX only gives +// < 1.5*2^-12 ≈ 1/2730 for vrsqrtps. struct RSQRT_F32 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vrsqrtss(i.dest, i.src1); + if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) { + e.vrsqrt14ss(i.dest, i.src1, i.src1); + } else { + e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMOne)); + e.vsqrtss(e.xmm1, i.src1, i.src1); + e.vdivss(i.dest, e.xmm0, e.xmm1); + } } }; struct RSQRT_F64 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vcvtsd2ss(i.dest, i.src1); - e.vrsqrtss(i.dest, i.dest); - e.vcvtss2sd(i.dest, i.dest); + if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) { + e.vrsqrt14sd(i.dest, i.src1, i.src1); + } else { + e.vmovapd(e.xmm0, e.GetXmmConstPtr(XMMOnePD)); + e.vsqrtsd(e.xmm1, i.src1, i.src1); + e.vdivsd(i.dest, e.xmm0, e.xmm1); + } } }; struct RSQRT_V128 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vrsqrtps(i.dest, i.src1); + if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) { + e.vrsqrt14ps(i.dest, i.src1); + } else { + e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMOne)); + e.vsqrtps(e.xmm1, i.src1); + e.vdivps(i.dest, e.xmm0, e.xmm1); + } } }; EMITTER_OPCODE_TABLE(OPCODE_RSQRT, RSQRT_F32, RSQRT_F64, RSQRT_V128);