Merge pull request #2652 from Tilka/ps_sum1

Jit64: fix pre-SSE4.1 fallback of ps_sum1 with AccurateNaNs=True
This commit is contained in:
Jules Blok 2015-06-24 18:28:33 +02:00
commit 519a7c4a29
3 changed files with 20 additions and 18 deletions

View File

@ -135,7 +135,8 @@ public:
Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true); Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true);
void SetFPRFIfNeeded(Gen::X64Reg xmm); void SetFPRFIfNeeded(Gen::X64Reg xmm);
void HandleNaNs(UGeckoInstruction inst, Gen::X64Reg xmm_out, Gen::X64Reg xmm_in); void HandleNaNs(UGeckoInstruction inst, Gen::X64Reg xmm_out, Gen::X64Reg xmm_in,
Gen::X64Reg clobber = Gen::XMM0);
void MultiplyImmediate(u32 imm, int a, int d, bool overflow); void MultiplyImmediate(u32 imm, int a, int d, bool overflow);

View File

@ -55,7 +55,7 @@ void Jit64::SetFPRFIfNeeded(X64Reg xmm)
SetFPRF(xmm); SetFPRF(xmm);
} }
void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm) void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Reg clobber)
{ {
// | PowerPC | x86 // | PowerPC | x86
// ---------------------+----------+--------- // ---------------------+----------+---------
@ -72,7 +72,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm)
return; return;
} }
_assert_(xmm != XMM0); _assert_(xmm != clobber);
std::vector<u32> inputs; std::vector<u32> inputs;
u32 a = inst.FA, b = inst.FB, c = inst.FC; u32 a = inst.FA, b = inst.FB, c = inst.FC;
@ -110,15 +110,16 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm)
std::reverse(inputs.begin(), inputs.end()); std::reverse(inputs.begin(), inputs.end());
if (cpu_info.bSSE4_1) if (cpu_info.bSSE4_1)
{ {
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, XMM0, R(xmm), R(xmm), CMP_UNORD); avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, R(xmm), R(xmm), CMP_UNORD);
PTEST(XMM0, R(XMM0)); PTEST(clobber, R(clobber));
FixupBranch handle_nan = J_CC(CC_NZ, true); FixupBranch handle_nan = J_CC(CC_NZ, true);
SwitchToFarCode(); SwitchToFarCode();
SetJumpTarget(handle_nan); SetJumpTarget(handle_nan);
_assert_msg_(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0");
BLENDVPD(xmm, M(psGeneratedQNaN)); BLENDVPD(xmm, M(psGeneratedQNaN));
for (u32 x : inputs) for (u32 x : inputs)
{ {
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, XMM0, fpr.R(x), fpr.R(x), CMP_UNORD); avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, fpr.R(x), fpr.R(x), CMP_UNORD);
BLENDVPD(xmm, fpr.R(x)); BLENDVPD(xmm, fpr.R(x));
} }
FixupBranch done = J(true); FixupBranch done = J(true);
@ -130,26 +131,26 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm)
// SSE2 fallback // SSE2 fallback
X64Reg tmp = fpr.GetFreeXReg(); X64Reg tmp = fpr.GetFreeXReg();
fpr.FlushLockX(tmp); fpr.FlushLockX(tmp);
MOVAPD(XMM0, R(xmm)); MOVAPD(clobber, R(xmm));
CMPPD(XMM0, R(XMM0), CMP_UNORD); CMPPD(clobber, R(clobber), CMP_UNORD);
MOVMSKPD(RSCRATCH, R(XMM0)); MOVMSKPD(RSCRATCH, R(clobber));
TEST(32, R(RSCRATCH), R(RSCRATCH)); TEST(32, R(RSCRATCH), R(RSCRATCH));
FixupBranch handle_nan = J_CC(CC_NZ, true); FixupBranch handle_nan = J_CC(CC_NZ, true);
SwitchToFarCode(); SwitchToFarCode();
SetJumpTarget(handle_nan); SetJumpTarget(handle_nan);
MOVAPD(tmp, R(XMM0)); MOVAPD(tmp, R(clobber));
PANDN(XMM0, R(xmm)); PANDN(clobber, R(xmm));
PAND(tmp, M(psGeneratedQNaN)); PAND(tmp, M(psGeneratedQNaN));
POR(tmp, R(XMM0)); POR(tmp, R(clobber));
MOVAPD(xmm, R(tmp)); MOVAPD(xmm, R(tmp));
for (u32 x : inputs) for (u32 x : inputs)
{ {
MOVAPD(XMM0, fpr.R(x)); MOVAPD(clobber, fpr.R(x));
CMPPD(XMM0, R(XMM0), CMP_ORD); CMPPD(clobber, R(clobber), CMP_ORD);
MOVAPD(tmp, R(XMM0)); MOVAPD(tmp, R(clobber));
PANDN(XMM0, fpr.R(x)); PANDN(clobber, fpr.R(x));
PAND(xmm, R(tmp)); PAND(xmm, R(tmp));
POR(xmm, R(XMM0)); POR(xmm, R(clobber));
} }
FixupBranch done = J(true); FixupBranch done = J(true);
SwitchToNearCode(); SwitchToNearCode();

View File

@ -68,7 +68,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
default: default:
PanicAlert("ps_sum WTF!!!"); PanicAlert("ps_sum WTF!!!");
} }
HandleNaNs(inst, fpr.RX(d), tmp); HandleNaNs(inst, fpr.RX(d), tmp, tmp == XMM1 ? XMM0 : XMM1);
ForceSinglePrecision(fpr.RX(d), fpr.R(d)); ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d)); SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();