From d64848245d324a50835dc5cd96cb00513a8249b5 Mon Sep 17 00:00:00 2001 From: Joel Linn Date: Wed, 16 Feb 2022 00:17:28 +0100 Subject: [PATCH] [CPU] Improve vrefp accuracy --- src/xenia/cpu/backend/x64/x64_emitter.cc | 3 ++- src/xenia/cpu/backend/x64/x64_emitter.h | 3 ++- src/xenia/cpu/backend/x64/x64_sequences.cc | 28 +++++++++++++++++----- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 5cd6780be..97b14e03e 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -653,6 +653,7 @@ void X64Emitter::MovMem64(const Xbyak::RegExp& addr, uint64_t v) { static const vec128_t xmm_consts[] = { /* XMMZero */ vec128f(0.0f), /* XMMOne */ vec128f(1.0f), + /* XMMOnePD */ vec128d(1.0), /* XMMNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f), /* XMMFFFF */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu), diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index 840e355fc..9117beea3 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2019 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -49,6 +49,7 @@ enum RegisterFlags { enum XmmConst { XMMZero = 0, XMMOne, + XMMOnePD, XMMNegativeOne, XMMFFFF, XMMMaskX16Y16, diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 62f6e9a3f..08badaa9c 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -2376,21 +2376,37 @@ EMITTER_OPCODE_TABLE(OPCODE_RSQRT, RSQRT_F32, RSQRT_F64, RSQRT_V128); // ============================================================================ // OPCODE_RECIP // ============================================================================ +// Altivec guarantees an error of < 1/4096 for vrefp while AVX only gives +// < 1.5*2^-12 ≈ 1/2730 for rcpps. This breaks camp, horse and random event +// spawning, breaks cactus collision as well as flickering grass in 5454082B struct RECIP_F32 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vrcpss(i.dest, i.src1); + if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) { + e.vrcp14ss(i.dest, i.src1, i.src1); + } else { + e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMOne)); + e.vdivss(i.dest, e.xmm0, i.src1); + } } }; struct RECIP_F64 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vcvtsd2ss(i.dest, i.src1); - e.vrcpss(i.dest, i.dest); - e.vcvtss2sd(i.dest, i.dest); + if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) { + e.vrcp14sd(i.dest, i.src1, i.src1); + } else { + e.vmovapd(e.xmm0, e.GetXmmConstPtr(XMMOnePD)); + e.vdivsd(i.dest, e.xmm0, i.src1); + } } }; struct RECIP_V128 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vrcpps(i.dest, i.src1); + if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) { + e.vrcp14ps(i.dest, i.src1); + } else { + e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMOne)); + e.vdivps(i.dest, e.xmm0, i.src1); + } } }; EMITTER_OPCODE_TABLE(OPCODE_RECIP, RECIP_F32, RECIP_F64, RECIP_V128);