Jit64: use domain-appropriate instructions

Original commit:
commit 62d3423ccf
Author: Tillmann Karras <tilkax@gmail.com>
Date:   Wed Nov 18 20:32:20 2015 +0100
This commit is contained in:
Shawn Hoffman 2016-10-03 07:15:26 -07:00
parent cc66f0336f
commit f000bb7590
2 changed files with 16 additions and 16 deletions

View File

@ -149,18 +149,18 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
SwitchToFarCode(); SwitchToFarCode();
SetJumpTarget(handle_nan); SetJumpTarget(handle_nan);
MOVAPD(tmp, R(clobber)); MOVAPD(tmp, R(clobber));
PANDN(clobber, R(xmm)); ANDNPD(clobber, R(xmm));
PAND(tmp, M(psGeneratedQNaN)); ANDPD(tmp, M(psGeneratedQNaN));
POR(tmp, R(clobber)); ORPD(tmp, R(clobber));
MOVAPD(xmm, R(tmp)); MOVAPD(xmm, R(tmp));
for (u32 x : inputs) for (u32 x : inputs)
{ {
MOVAPD(clobber, fpr.R(x)); MOVAPD(clobber, fpr.R(x));
CMPPD(clobber, R(clobber), CMP_ORD); CMPPD(clobber, R(clobber), CMP_ORD);
MOVAPD(tmp, R(clobber)); MOVAPD(tmp, R(clobber));
PANDN(clobber, fpr.R(x)); ANDNPD(clobber, fpr.R(x));
PAND(xmm, R(tmp)); ANDPD(xmm, R(tmp));
POR(xmm, R(clobber)); ORPD(xmm, R(clobber));
} }
FixupBranch done = J(true); FixupBranch done = J(true);
SwitchToNearCode(); SwitchToNearCode();
@ -350,7 +350,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
ADDSD(XMM1, fpr.R(b)); ADDSD(XMM1, fpr.R(b));
} }
if (inst.SUBOP5 == 31) // nmadd if (inst.SUBOP5 == 31) // nmadd
PXOR(XMM1, M(packed ? psSignBits2 : psSignBits)); XORPD(XMM1, M(packed ? psSignBits2 : psSignBits));
} }
fpr.BindToRegister(d, !single); fpr.BindToRegister(d, !single);
if (single) if (single)
@ -384,15 +384,15 @@ void Jit64::fsign(UGeckoInstruction inst)
switch (inst.SUBOP10) switch (inst.SUBOP10)
{ {
case 40: // neg case 40: // neg
avx_op(&XEmitter::VPXOR, &XEmitter::PXOR, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits), avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, fpr.RX(d), src,
packed); M(packed ? psSignBits2 : psSignBits), packed);
break; break;
case 136: // nabs case 136: // nabs
avx_op(&XEmitter::VPOR, &XEmitter::POR, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits), avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits),
packed); packed);
break; break;
case 264: // abs case 264: // abs
avx_op(&XEmitter::VPAND, &XEmitter::PAND, fpr.RX(d), src, M(packed ? psAbsMask2 : psAbsMask), avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src, M(packed ? psAbsMask2 : psAbsMask),
packed); packed);
break; break;
default: default:
@ -416,7 +416,7 @@ void Jit64::fselx(UGeckoInstruction inst)
bool packed = inst.OPCD == 4; // ps_sel bool packed = inst.OPCD == 4; // ps_sel
fpr.Lock(a, b, c, d); fpr.Lock(a, b, c, d);
PXOR(XMM0, R(XMM0)); XORPD(XMM0, R(XMM0));
// This condition is very tricky; there's only one right way to handle both the case of // This condition is very tricky; there's only one right way to handle both the case of
// negative/positive zero and NaN properly. // negative/positive zero and NaN properly.
// (a >= -0.0 ? c : b) transforms into (0 > a ? b : c), hence the NLE. // (a >= -0.0 ? c : b) transforms into (0 > a ? b : c), hence the NLE.
@ -433,9 +433,9 @@ void Jit64::fselx(UGeckoInstruction inst)
else else
{ {
MOVAPD(XMM1, R(XMM0)); MOVAPD(XMM1, R(XMM0));
PAND(XMM0, fpr.R(b)); ANDPD(XMM0, fpr.R(b));
PANDN(XMM1, fpr.R(c)); ANDNPD(XMM1, fpr.R(c));
POR(XMM1, R(XMM0)); ORPD(XMM1, R(XMM0));
} }
fpr.BindToRegister(d, !packed); fpr.BindToRegister(d, !packed);

View File

@ -362,7 +362,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
FixupBranch slow = J_CC(CC_Z, true); FixupBranch slow = J_CC(CC_Z, true);
// Fast path: compute full address, then zero out 32 bytes of memory. // Fast path: compute full address, then zero out 32 bytes of memory.
PXOR(XMM0, R(XMM0)); XORPS(XMM0, R(XMM0));
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0); MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0); MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);