Jit64: Turn SNaN into QNaN in HandleNaNs
Improves accuracy but isn't known to affect any games. This turned out to be fairly convenient to implement; ORing with the PPC default NaN will quieten SNaNs and do nothing to QNaNs.
This commit is contained in:
parent
6cf99195c6
commit
c361f9155b
|
@ -116,13 +116,17 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
|
||||||
if (std::find(inputs.begin(), inputs.end(), i) == inputs.end())
|
if (std::find(inputs.begin(), inputs.end(), i) == inputs.end())
|
||||||
inputs.push_back(i);
|
inputs.push_back(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inst.OPCD != 4)
|
if (inst.OPCD != 4)
|
||||||
{
|
{
|
||||||
// not paired-single
|
// not paired-single
|
||||||
|
|
||||||
UCOMISD(xmm, R(xmm));
|
UCOMISD(xmm, R(xmm));
|
||||||
FixupBranch handle_nan = J_CC(CC_P, true);
|
FixupBranch handle_nan = J_CC(CC_P, true);
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(handle_nan);
|
SetJumpTarget(handle_nan);
|
||||||
|
|
||||||
|
// If any inputs are NaNs, pick the first NaN of them
|
||||||
std::vector<FixupBranch> fixups;
|
std::vector<FixupBranch> fixups;
|
||||||
for (u32 x : inputs)
|
for (u32 x : inputs)
|
||||||
{
|
{
|
||||||
|
@ -132,9 +136,15 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
|
||||||
UCOMISD(xmm, R(xmm));
|
UCOMISD(xmm, R(xmm));
|
||||||
fixups.push_back(J_CC(CC_P));
|
fixups.push_back(J_CC(CC_P));
|
||||||
}
|
}
|
||||||
MOVDDUP(xmm, MConst(psGeneratedQNaN));
|
|
||||||
|
// Otherwise, pick the PPC default NaN (will be finished below)
|
||||||
|
XORPD(xmm, R(xmm));
|
||||||
|
|
||||||
|
// Turn SNaNs into QNaNs (or finish writing the PPC default NaN)
|
||||||
for (FixupBranch fixup : fixups)
|
for (FixupBranch fixup : fixups)
|
||||||
SetJumpTarget(fixup);
|
SetJumpTarget(fixup);
|
||||||
|
ORPD(xmm, MConst(psGeneratedQNaN));
|
||||||
|
|
||||||
FixupBranch done = J(true);
|
FixupBranch done = J(true);
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
SetJumpTarget(done);
|
SetJumpTarget(done);
|
||||||
|
@ -142,7 +152,9 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// paired-single
|
// paired-single
|
||||||
|
|
||||||
std::reverse(inputs.begin(), inputs.end());
|
std::reverse(inputs.begin(), inputs.end());
|
||||||
|
|
||||||
if (cpu_info.bSSE4_1)
|
if (cpu_info.bSSE4_1)
|
||||||
{
|
{
|
||||||
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, R(xmm), R(xmm), CMP_UNORD);
|
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, R(xmm), R(xmm), CMP_UNORD);
|
||||||
|
@ -150,8 +162,12 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
|
||||||
FixupBranch handle_nan = J_CC(CC_NZ, true);
|
FixupBranch handle_nan = J_CC(CC_NZ, true);
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(handle_nan);
|
SetJumpTarget(handle_nan);
|
||||||
|
|
||||||
|
// Replace NaNs with PPC default NaN
|
||||||
ASSERT_MSG(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0");
|
ASSERT_MSG(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0");
|
||||||
BLENDVPD(xmm, MConst(psGeneratedQNaN));
|
BLENDVPD(xmm, MConst(psGeneratedQNaN));
|
||||||
|
|
||||||
|
// If any inputs are NaNs, use those instead
|
||||||
for (u32 x : inputs)
|
for (u32 x : inputs)
|
||||||
{
|
{
|
||||||
RCOpArg Rx = fpr.Use(x, RCMode::Read);
|
RCOpArg Rx = fpr.Use(x, RCMode::Read);
|
||||||
|
@ -159,13 +175,11 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
|
||||||
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, Rx, Rx, CMP_UNORD);
|
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, Rx, Rx, CMP_UNORD);
|
||||||
BLENDVPD(xmm, Rx);
|
BLENDVPD(xmm, Rx);
|
||||||
}
|
}
|
||||||
FixupBranch done = J(true);
|
|
||||||
SwitchToNearCode();
|
|
||||||
SetJumpTarget(done);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// SSE2 fallback
|
// SSE2 fallback
|
||||||
|
|
||||||
RCX64Reg tmp = fpr.Scratch();
|
RCX64Reg tmp = fpr.Scratch();
|
||||||
RegCache::Realize(tmp);
|
RegCache::Realize(tmp);
|
||||||
MOVAPD(clobber, R(xmm));
|
MOVAPD(clobber, R(xmm));
|
||||||
|
@ -175,11 +189,15 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
|
||||||
FixupBranch handle_nan = J_CC(CC_NZ, true);
|
FixupBranch handle_nan = J_CC(CC_NZ, true);
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(handle_nan);
|
SetJumpTarget(handle_nan);
|
||||||
|
|
||||||
|
// Replace NaNs with PPC default NaN
|
||||||
MOVAPD(tmp, R(clobber));
|
MOVAPD(tmp, R(clobber));
|
||||||
ANDNPD(clobber, R(xmm));
|
ANDNPD(clobber, R(xmm));
|
||||||
ANDPD(tmp, MConst(psGeneratedQNaN));
|
ANDPD(tmp, MConst(psGeneratedQNaN));
|
||||||
ORPD(tmp, R(clobber));
|
ORPD(tmp, R(clobber));
|
||||||
MOVAPD(xmm, tmp);
|
MOVAPD(xmm, tmp);
|
||||||
|
|
||||||
|
// If any inputs are NaNs, use those instead
|
||||||
for (u32 x : inputs)
|
for (u32 x : inputs)
|
||||||
{
|
{
|
||||||
RCOpArg Rx = fpr.Use(x, RCMode::Read);
|
RCOpArg Rx = fpr.Use(x, RCMode::Read);
|
||||||
|
@ -191,11 +209,17 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
|
||||||
ANDPD(xmm, tmp);
|
ANDPD(xmm, tmp);
|
||||||
ORPD(xmm, R(clobber));
|
ORPD(xmm, R(clobber));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Turn SNaNs into QNaNs
|
||||||
|
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, R(xmm), R(xmm), CMP_UNORD);
|
||||||
|
ANDPD(clobber, MConst(psGeneratedQNaN));
|
||||||
|
ORPD(xmm, R(clobber));
|
||||||
|
|
||||||
FixupBranch done = J(true);
|
FixupBranch done = J(true);
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
SetJumpTarget(done);
|
SetJumpTarget(done);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::fp_arith(UGeckoInstruction inst)
|
void Jit64::fp_arith(UGeckoInstruction inst)
|
||||||
|
|
Loading…
Reference in New Issue