Jit64: Turn SNaN into QNaN in HandleNaNs

Improves accuracy but isn't known to affect any games.

This turned out to be fairly convenient to implement; ORing with the
PPC default NaN will quieten SNaNs and do nothing to QNaNs.
This commit is contained in:
JosJuice 2022-10-08 17:45:17 +02:00
parent 6cf99195c6
commit c361f9155b
1 changed files with 31 additions and 7 deletions

View File

@ -116,13 +116,17 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
if (std::find(inputs.begin(), inputs.end(), i) == inputs.end())
inputs.push_back(i);
}
if (inst.OPCD != 4)
{
// not paired-single
UCOMISD(xmm, R(xmm));
FixupBranch handle_nan = J_CC(CC_P, true);
SwitchToFarCode();
SetJumpTarget(handle_nan);
// If any inputs are NaNs, pick the first NaN of them
std::vector<FixupBranch> fixups;
for (u32 x : inputs)
{
@ -132,9 +136,15 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
UCOMISD(xmm, R(xmm));
fixups.push_back(J_CC(CC_P));
}
MOVDDUP(xmm, MConst(psGeneratedQNaN));
// Otherwise, pick the PPC default NaN (will be finished below)
XORPD(xmm, R(xmm));
// Turn SNaNs into QNaNs (or finish writing the PPC default NaN)
for (FixupBranch fixup : fixups)
SetJumpTarget(fixup);
ORPD(xmm, MConst(psGeneratedQNaN));
FixupBranch done = J(true);
SwitchToNearCode();
SetJumpTarget(done);
@ -142,7 +152,9 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
else
{
// paired-single
std::reverse(inputs.begin(), inputs.end());
if (cpu_info.bSSE4_1)
{
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, R(xmm), R(xmm), CMP_UNORD);
@ -150,8 +162,12 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
FixupBranch handle_nan = J_CC(CC_NZ, true);
SwitchToFarCode();
SetJumpTarget(handle_nan);
// Replace NaNs with PPC default NaN
ASSERT_MSG(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0");
BLENDVPD(xmm, MConst(psGeneratedQNaN));
// If any inputs are NaNs, use those instead
for (u32 x : inputs)
{
RCOpArg Rx = fpr.Use(x, RCMode::Read);
@ -159,13 +175,11 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, Rx, Rx, CMP_UNORD);
BLENDVPD(xmm, Rx);
}
FixupBranch done = J(true);
SwitchToNearCode();
SetJumpTarget(done);
}
else
{
// SSE2 fallback
RCX64Reg tmp = fpr.Scratch();
RegCache::Realize(tmp);
MOVAPD(clobber, R(xmm));
@ -175,11 +189,15 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
FixupBranch handle_nan = J_CC(CC_NZ, true);
SwitchToFarCode();
SetJumpTarget(handle_nan);
// Replace NaNs with PPC default NaN
MOVAPD(tmp, R(clobber));
ANDNPD(clobber, R(xmm));
ANDPD(tmp, MConst(psGeneratedQNaN));
ORPD(tmp, R(clobber));
MOVAPD(xmm, tmp);
// If any inputs are NaNs, use those instead
for (u32 x : inputs)
{
RCOpArg Rx = fpr.Use(x, RCMode::Read);
@ -191,12 +209,18 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
ANDPD(xmm, tmp);
ORPD(xmm, R(clobber));
}
}
// Turn SNaNs into QNaNs
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, R(xmm), R(xmm), CMP_UNORD);
ANDPD(clobber, MConst(psGeneratedQNaN));
ORPD(xmm, R(clobber));
FixupBranch done = J(true);
SwitchToNearCode();
SetJumpTarget(done);
}
}
}
void Jit64::fp_arith(UGeckoInstruction inst)
{