[CPU] Improve vrefp accuracy

This commit is contained in:
Joel Linn 2022-02-16 00:17:28 +01:00 committed by Rick Gibbed
parent 294c76f7c4
commit d64848245d
3 changed files with 26 additions and 8 deletions

View File

@ -2,7 +2,7 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. * * Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. * * Released under the BSD license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
@ -653,6 +653,7 @@ void X64Emitter::MovMem64(const Xbyak::RegExp& addr, uint64_t v) {
static const vec128_t xmm_consts[] = { static const vec128_t xmm_consts[] = {
/* XMMZero */ vec128f(0.0f), /* XMMZero */ vec128f(0.0f),
/* XMMOne */ vec128f(1.0f), /* XMMOne */ vec128f(1.0f),
/* XMMOnePD */ vec128d(1.0),
/* XMMNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f), /* XMMNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f),
/* XMMFFFF */ /* XMMFFFF */
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu), vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu),

View File

@ -2,7 +2,7 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2019 Ben Vanik. All rights reserved. * * Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. * * Released under the BSD license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
@ -49,6 +49,7 @@ enum RegisterFlags {
enum XmmConst { enum XmmConst {
XMMZero = 0, XMMZero = 0,
XMMOne, XMMOne,
XMMOnePD,
XMMNegativeOne, XMMNegativeOne,
XMMFFFF, XMMFFFF,
XMMMaskX16Y16, XMMMaskX16Y16,

View File

@ -2,7 +2,7 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. * * Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. * * Released under the BSD license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
@ -2376,21 +2376,37 @@ EMITTER_OPCODE_TABLE(OPCODE_RSQRT, RSQRT_F32, RSQRT_F64, RSQRT_V128);
// ============================================================================ // ============================================================================
// OPCODE_RECIP // OPCODE_RECIP
// ============================================================================ // ============================================================================
// Altivec guarantees an error of < 1/4096 for vrefp while AVX only gives
// < 1.5*2^-12 ≈ 1/2730 for rcpps. This breaks camp, horse and random event
// spawning, breaks cactus collision as well as flickering grass in 5454082B
struct RECIP_F32 : Sequence<RECIP_F32, I<OPCODE_RECIP, F32Op, F32Op>> { struct RECIP_F32 : Sequence<RECIP_F32, I<OPCODE_RECIP, F32Op, F32Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vrcpss(i.dest, i.src1); if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
e.vrcp14ss(i.dest, i.src1, i.src1);
} else {
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMOne));
e.vdivss(i.dest, e.xmm0, i.src1);
}
} }
}; };
struct RECIP_F64 : Sequence<RECIP_F64, I<OPCODE_RECIP, F64Op, F64Op>> { struct RECIP_F64 : Sequence<RECIP_F64, I<OPCODE_RECIP, F64Op, F64Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vcvtsd2ss(i.dest, i.src1); if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
e.vrcpss(i.dest, i.dest); e.vrcp14sd(i.dest, i.src1, i.src1);
e.vcvtss2sd(i.dest, i.dest); } else {
e.vmovapd(e.xmm0, e.GetXmmConstPtr(XMMOnePD));
e.vdivsd(i.dest, e.xmm0, i.src1);
}
} }
}; };
struct RECIP_V128 : Sequence<RECIP_V128, I<OPCODE_RECIP, V128Op, V128Op>> { struct RECIP_V128 : Sequence<RECIP_V128, I<OPCODE_RECIP, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vrcpps(i.dest, i.src1); if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
e.vrcp14ps(i.dest, i.src1);
} else {
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMOne));
e.vdivps(i.dest, e.xmm0, i.src1);
}
} }
}; };
EMITTER_OPCODE_TABLE(OPCODE_RECIP, RECIP_F32, RECIP_F64, RECIP_V128); EMITTER_OPCODE_TABLE(OPCODE_RECIP, RECIP_F32, RECIP_F64, RECIP_V128);