[CPU] Improve vrsqrtefp accuracy
This commit is contained in:
parent
d64848245d
commit
00e7de9297
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/**
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
* Xenia : Xbox 360 Emulator Research Project *
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
|
@ -2354,21 +2354,39 @@ EMITTER_OPCODE_TABLE(OPCODE_SQRT, SQRT_F32, SQRT_F64, SQRT_V128);
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// OPCODE_RSQRT
|
// OPCODE_RSQRT
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
// Altivec guarantees an error of < 1/4096 for vrsqrtefp while AVX only gives
|
||||||
|
// < 1.5*2^-12 ≈ 1/2730 for vrsqrtps.
|
||||||
struct RSQRT_F32 : Sequence<RSQRT_F32, I<OPCODE_RSQRT, F32Op, F32Op>> {
|
struct RSQRT_F32 : Sequence<RSQRT_F32, I<OPCODE_RSQRT, F32Op, F32Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.vrsqrtss(i.dest, i.src1);
|
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
|
||||||
|
e.vrsqrt14ss(i.dest, i.src1, i.src1);
|
||||||
|
} else {
|
||||||
|
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMOne));
|
||||||
|
e.vsqrtss(e.xmm1, i.src1, i.src1);
|
||||||
|
e.vdivss(i.dest, e.xmm0, e.xmm1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct RSQRT_F64 : Sequence<RSQRT_F64, I<OPCODE_RSQRT, F64Op, F64Op>> {
|
struct RSQRT_F64 : Sequence<RSQRT_F64, I<OPCODE_RSQRT, F64Op, F64Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.vcvtsd2ss(i.dest, i.src1);
|
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
|
||||||
e.vrsqrtss(i.dest, i.dest);
|
e.vrsqrt14sd(i.dest, i.src1, i.src1);
|
||||||
e.vcvtss2sd(i.dest, i.dest);
|
} else {
|
||||||
|
e.vmovapd(e.xmm0, e.GetXmmConstPtr(XMMOnePD));
|
||||||
|
e.vsqrtsd(e.xmm1, i.src1, i.src1);
|
||||||
|
e.vdivsd(i.dest, e.xmm0, e.xmm1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct RSQRT_V128 : Sequence<RSQRT_V128, I<OPCODE_RSQRT, V128Op, V128Op>> {
|
struct RSQRT_V128 : Sequence<RSQRT_V128, I<OPCODE_RSQRT, V128Op, V128Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.vrsqrtps(i.dest, i.src1);
|
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
|
||||||
|
e.vrsqrt14ps(i.dest, i.src1);
|
||||||
|
} else {
|
||||||
|
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMOne));
|
||||||
|
e.vsqrtps(e.xmm1, i.src1);
|
||||||
|
e.vdivps(i.dest, e.xmm0, e.xmm1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER_OPCODE_TABLE(OPCODE_RSQRT, RSQRT_F32, RSQRT_F64, RSQRT_V128);
|
EMITTER_OPCODE_TABLE(OPCODE_RSQRT, RSQRT_F32, RSQRT_F64, RSQRT_V128);
|
||||||
|
|
Loading…
Reference in New Issue