Jit64: use integer instructions for boolean logic
They are semantically equivalent and according to Agner Fog they can run on more execution ports than their floating point counterparts (at least on Intel CPUs).
This commit is contained in:
parent
12054e9f16
commit
688644dd18
|
@ -156,12 +156,12 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
case 30: //nmsub
|
||||
MULSD(XMM0, fpr.R(c));
|
||||
SUBSD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits2));
|
||||
PXOR(XMM0, M((void*)&psSignBits2));
|
||||
break;
|
||||
case 31: //nmadd
|
||||
MULSD(XMM0, fpr.R(c));
|
||||
ADDSD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits2));
|
||||
PXOR(XMM0, M((void*)&psSignBits2));
|
||||
break;
|
||||
}
|
||||
fpr.BindToRegister(d, false);
|
||||
|
@ -197,13 +197,13 @@ void Jit64::fsign(UGeckoInstruction inst)
|
|||
MOVSD(XMM0, fpr.R(b));
|
||||
switch (inst.SUBOP10) {
|
||||
case 40: // fnegx
|
||||
XORPD(XMM0, M((void*)&psSignBits2));
|
||||
PXOR(XMM0, M((void*)&psSignBits2));
|
||||
break;
|
||||
case 264: // fabsx
|
||||
ANDPD(XMM0, M((void*)&psAbsMask2));
|
||||
PAND(XMM0, M((void*)&psAbsMask2));
|
||||
break;
|
||||
case 136: // fnabs
|
||||
ORPD(XMM0, M((void*)&psSignBits2));
|
||||
POR(XMM0, M((void*)&psSignBits2));
|
||||
break;
|
||||
default:
|
||||
PanicAlert("fsign bleh");
|
||||
|
|
|
@ -253,7 +253,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
|||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
AND(32, R(EAX), Imm32(~31));
|
||||
XORPD(XMM0, R(XMM0));
|
||||
PXOR(XMM0, R(XMM0));
|
||||
#if _M_X86_64
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
||||
|
|
|
@ -58,7 +58,7 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||
#endif
|
||||
if (inst.W) {
|
||||
// One value
|
||||
XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
|
||||
PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
|
||||
CVTSD2SS(XMM0, fpr.R(s));
|
||||
CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.singleStoreQuantized));
|
||||
} else {
|
||||
|
|
|
@ -59,13 +59,13 @@ void Jit64::ps_sel(UGeckoInstruction inst)
|
|||
|
||||
fpr.Lock(a, b, c, d);
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
XORPD(XMM1, R(XMM1));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
// XMM0 = XMM0 < 0 ? all 1s : all 0s
|
||||
CMPPD(XMM0, R(XMM1), LT);
|
||||
MOVAPD(XMM1, R(XMM0));
|
||||
ANDPD(XMM0, fpr.R(b));
|
||||
ANDNPD(XMM1, fpr.R(c));
|
||||
ORPD(XMM0, R(XMM1));
|
||||
PAND(XMM0, fpr.R(b));
|
||||
PANDN(XMM1, fpr.R(c));
|
||||
POR(XMM0, R(XMM1));
|
||||
fpr.BindToRegister(d, false);
|
||||
MOVAPD(fpr.RX(d), R(XMM0));
|
||||
fpr.UnlockAll();
|
||||
|
@ -99,13 +99,13 @@ void Jit64::ps_sign(UGeckoInstruction inst)
|
|||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 40: //neg
|
||||
XORPD(fpr.RX(d), M((void*)&psSignBits));
|
||||
PXOR(fpr.RX(d), M((void*)&psSignBits));
|
||||
break;
|
||||
case 136: //nabs
|
||||
ORPD(fpr.RX(d), M((void*)&psSignBits));
|
||||
POR(fpr.RX(d), M((void*)&psSignBits));
|
||||
break;
|
||||
case 264: //abs
|
||||
ANDPD(fpr.RX(d), M((void*)&psAbsMask));
|
||||
PAND(fpr.RX(d), M((void*)&psAbsMask));
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -391,12 +391,12 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
|
|||
case 30: //nmsub
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
SUBPD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits));
|
||||
PXOR(XMM0, M((void*)&psSignBits));
|
||||
break;
|
||||
case 31: //nmadd
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits));
|
||||
PXOR(XMM0, M((void*)&psSignBits));
|
||||
break;
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
|
||||
|
|
|
@ -146,7 +146,7 @@ void JitILBase::dcbz(UGeckoInstruction inst)
|
|||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
AND(32, R(EAX), Imm32(~31));
|
||||
XORPD(XMM0, R(XMM0));
|
||||
PXOR(XMM0, R(XMM0));
|
||||
#if _M_X86_64
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
||||
|
|
Loading…
Reference in New Issue