Jit64: Turn SNaN into QNaN in HandleNaNs

Improves accuracy but isn't known to affect any games.

This turned out to be fairly convenient to implement; ORing with the
PPC default NaN will quieten SNaNs and do nothing to QNaNs.
This commit is contained in:
JosJuice 2022-10-08 17:45:17 +02:00
parent 6cf99195c6
commit c361f9155b
1 changed files with 31 additions and 7 deletions

View File

@ -116,13 +116,17 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
if (std::find(inputs.begin(), inputs.end(), i) == inputs.end()) if (std::find(inputs.begin(), inputs.end(), i) == inputs.end())
inputs.push_back(i); inputs.push_back(i);
} }
if (inst.OPCD != 4) if (inst.OPCD != 4)
{ {
// not paired-single // not paired-single
UCOMISD(xmm, R(xmm)); UCOMISD(xmm, R(xmm));
FixupBranch handle_nan = J_CC(CC_P, true); FixupBranch handle_nan = J_CC(CC_P, true);
SwitchToFarCode(); SwitchToFarCode();
SetJumpTarget(handle_nan); SetJumpTarget(handle_nan);
// If any inputs are NaNs, pick the first NaN of them
std::vector<FixupBranch> fixups; std::vector<FixupBranch> fixups;
for (u32 x : inputs) for (u32 x : inputs)
{ {
@ -132,9 +136,15 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
UCOMISD(xmm, R(xmm)); UCOMISD(xmm, R(xmm));
fixups.push_back(J_CC(CC_P)); fixups.push_back(J_CC(CC_P));
} }
MOVDDUP(xmm, MConst(psGeneratedQNaN));
// Otherwise, pick the PPC default NaN (will be finished below)
XORPD(xmm, R(xmm));
// Turn SNaNs into QNaNs (or finish writing the PPC default NaN)
for (FixupBranch fixup : fixups) for (FixupBranch fixup : fixups)
SetJumpTarget(fixup); SetJumpTarget(fixup);
ORPD(xmm, MConst(psGeneratedQNaN));
FixupBranch done = J(true); FixupBranch done = J(true);
SwitchToNearCode(); SwitchToNearCode();
SetJumpTarget(done); SetJumpTarget(done);
@ -142,7 +152,9 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
else else
{ {
// paired-single // paired-single
std::reverse(inputs.begin(), inputs.end()); std::reverse(inputs.begin(), inputs.end());
if (cpu_info.bSSE4_1) if (cpu_info.bSSE4_1)
{ {
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, R(xmm), R(xmm), CMP_UNORD); avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, R(xmm), R(xmm), CMP_UNORD);
@ -150,8 +162,12 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
FixupBranch handle_nan = J_CC(CC_NZ, true); FixupBranch handle_nan = J_CC(CC_NZ, true);
SwitchToFarCode(); SwitchToFarCode();
SetJumpTarget(handle_nan); SetJumpTarget(handle_nan);
// Replace NaNs with PPC default NaN
ASSERT_MSG(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0"); ASSERT_MSG(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0");
BLENDVPD(xmm, MConst(psGeneratedQNaN)); BLENDVPD(xmm, MConst(psGeneratedQNaN));
// If any inputs are NaNs, use those instead
for (u32 x : inputs) for (u32 x : inputs)
{ {
RCOpArg Rx = fpr.Use(x, RCMode::Read); RCOpArg Rx = fpr.Use(x, RCMode::Read);
@ -159,13 +175,11 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, Rx, Rx, CMP_UNORD); avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, Rx, Rx, CMP_UNORD);
BLENDVPD(xmm, Rx); BLENDVPD(xmm, Rx);
} }
FixupBranch done = J(true);
SwitchToNearCode();
SetJumpTarget(done);
} }
else else
{ {
// SSE2 fallback // SSE2 fallback
RCX64Reg tmp = fpr.Scratch(); RCX64Reg tmp = fpr.Scratch();
RegCache::Realize(tmp); RegCache::Realize(tmp);
MOVAPD(clobber, R(xmm)); MOVAPD(clobber, R(xmm));
@ -175,11 +189,15 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
FixupBranch handle_nan = J_CC(CC_NZ, true); FixupBranch handle_nan = J_CC(CC_NZ, true);
SwitchToFarCode(); SwitchToFarCode();
SetJumpTarget(handle_nan); SetJumpTarget(handle_nan);
// Replace NaNs with PPC default NaN
MOVAPD(tmp, R(clobber)); MOVAPD(tmp, R(clobber));
ANDNPD(clobber, R(xmm)); ANDNPD(clobber, R(xmm));
ANDPD(tmp, MConst(psGeneratedQNaN)); ANDPD(tmp, MConst(psGeneratedQNaN));
ORPD(tmp, R(clobber)); ORPD(tmp, R(clobber));
MOVAPD(xmm, tmp); MOVAPD(xmm, tmp);
// If any inputs are NaNs, use those instead
for (u32 x : inputs) for (u32 x : inputs)
{ {
RCOpArg Rx = fpr.Use(x, RCMode::Read); RCOpArg Rx = fpr.Use(x, RCMode::Read);
@ -191,11 +209,17 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
ANDPD(xmm, tmp); ANDPD(xmm, tmp);
ORPD(xmm, R(clobber)); ORPD(xmm, R(clobber));
} }
}
// Turn SNaNs into QNaNs
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, R(xmm), R(xmm), CMP_UNORD);
ANDPD(clobber, MConst(psGeneratedQNaN));
ORPD(xmm, R(clobber));
FixupBranch done = J(true); FixupBranch done = J(true);
SwitchToNearCode(); SwitchToNearCode();
SetJumpTarget(done); SetJumpTarget(done);
} }
}
} }
void Jit64::fp_arith(UGeckoInstruction inst) void Jit64::fp_arith(UGeckoInstruction inst)