Merge pull request #312 from Tilka/small_jit_optimizations
Small JIT optimizations
This commit is contained in:
commit
a093b41231
|
@ -156,12 +156,12 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
case 30: //nmsub
|
||||
MULSD(XMM0, fpr.R(c));
|
||||
SUBSD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits2));
|
||||
PXOR(XMM0, M((void*)&psSignBits2));
|
||||
break;
|
||||
case 31: //nmadd
|
||||
MULSD(XMM0, fpr.R(c));
|
||||
ADDSD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits2));
|
||||
PXOR(XMM0, M((void*)&psSignBits2));
|
||||
break;
|
||||
}
|
||||
fpr.BindToRegister(d, false);
|
||||
|
@ -197,13 +197,13 @@ void Jit64::fsign(UGeckoInstruction inst)
|
|||
MOVSD(XMM0, fpr.R(b));
|
||||
switch (inst.SUBOP10) {
|
||||
case 40: // fnegx
|
||||
XORPD(XMM0, M((void*)&psSignBits2));
|
||||
PXOR(XMM0, M((void*)&psSignBits2));
|
||||
break;
|
||||
case 264: // fabsx
|
||||
ANDPD(XMM0, M((void*)&psAbsMask2));
|
||||
PAND(XMM0, M((void*)&psAbsMask2));
|
||||
break;
|
||||
case 136: // fnabs
|
||||
ORPD(XMM0, M((void*)&psSignBits2));
|
||||
POR(XMM0, M((void*)&psSignBits2));
|
||||
break;
|
||||
default:
|
||||
PanicAlert("fsign bleh");
|
||||
|
|
|
@ -253,7 +253,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
|||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
AND(32, R(EAX), Imm32(~31));
|
||||
XORPD(XMM0, R(XMM0));
|
||||
PXOR(XMM0, R(XMM0));
|
||||
#if _M_X86_64
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
||||
|
|
|
@ -58,7 +58,7 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||
#endif
|
||||
if (inst.W) {
|
||||
// One value
|
||||
XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
|
||||
PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
|
||||
CVTSD2SS(XMM0, fpr.R(s));
|
||||
CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.singleStoreQuantized));
|
||||
} else {
|
||||
|
|
|
@ -59,13 +59,13 @@ void Jit64::ps_sel(UGeckoInstruction inst)
|
|||
|
||||
fpr.Lock(a, b, c, d);
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
XORPD(XMM1, R(XMM1));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
// XMM0 = XMM0 < 0 ? all 1s : all 0s
|
||||
CMPPD(XMM0, R(XMM1), LT);
|
||||
MOVAPD(XMM1, R(XMM0));
|
||||
ANDPD(XMM0, fpr.R(b));
|
||||
ANDNPD(XMM1, fpr.R(c));
|
||||
ORPD(XMM0, R(XMM1));
|
||||
PAND(XMM0, fpr.R(b));
|
||||
PANDN(XMM1, fpr.R(c));
|
||||
POR(XMM0, R(XMM1));
|
||||
fpr.BindToRegister(d, false);
|
||||
MOVAPD(fpr.RX(d), R(XMM0));
|
||||
fpr.UnlockAll();
|
||||
|
@ -99,13 +99,13 @@ void Jit64::ps_sign(UGeckoInstruction inst)
|
|||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 40: //neg
|
||||
XORPD(fpr.RX(d), M((void*)&psSignBits));
|
||||
PXOR(fpr.RX(d), M((void*)&psSignBits));
|
||||
break;
|
||||
case 136: //nabs
|
||||
ORPD(fpr.RX(d), M((void*)&psSignBits));
|
||||
POR(fpr.RX(d), M((void*)&psSignBits));
|
||||
break;
|
||||
case 264: //abs
|
||||
ANDPD(fpr.RX(d), M((void*)&psAbsMask));
|
||||
PAND(fpr.RX(d), M((void*)&psAbsMask));
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -391,12 +391,12 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
|
|||
case 30: //nmsub
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
SUBPD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits));
|
||||
PXOR(XMM0, M((void*)&psSignBits));
|
||||
break;
|
||||
case 31: //nmadd
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits));
|
||||
PXOR(XMM0, M((void*)&psSignBits));
|
||||
break;
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
|
||||
|
|
|
@ -114,11 +114,11 @@ static GekkoOPTemplate table4[] =
|
|||
{ //SUBOP10
|
||||
{0, &JitIL::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
|
||||
{32, &JitIL::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
|
||||
{40, &JitIL::ps_sign}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
|
||||
{136, &JitIL::ps_sign}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
|
||||
{264, &JitIL::ps_sign}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
|
||||
{40, &JitIL::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
|
||||
{136, &JitIL::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
|
||||
{264, &JitIL::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
|
||||
{64, &JitIL::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
|
||||
{72, &JitIL::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
|
||||
{72, &JitIL::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
|
||||
{96, &JitIL::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
|
||||
{528, &JitIL::ps_mergeXX}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
|
||||
{560, &JitIL::ps_mergeXX}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
|
||||
|
@ -139,17 +139,16 @@ static GekkoOPTemplate table4_2[] =
|
|||
{18, &JitIL::ps_arith}, //"ps_div", OPTYPE_PS, 0, 16}},
|
||||
{20, &JitIL::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
|
||||
{21, &JitIL::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
|
||||
{23, &JitIL::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
|
||||
{23, &JitIL::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}},
|
||||
{24, &JitIL::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}},
|
||||
{25, &JitIL::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
|
||||
{26, &JitIL::ps_rsqrte}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
||||
{26, &JitIL::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
||||
{28, &JitIL::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
|
||||
{29, &JitIL::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
|
||||
{30, &JitIL::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},
|
||||
{31, &JitIL::ps_maddXX}, //"ps_nmadd", OPTYPE_PS, 0}},
|
||||
};
|
||||
|
||||
|
||||
static GekkoOPTemplate table4_3[] =
|
||||
{
|
||||
{6, &JitIL::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}},
|
||||
|
|
|
@ -146,7 +146,7 @@ void JitILBase::dcbz(UGeckoInstruction inst)
|
|||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
AND(32, R(EAX), Imm32(~31));
|
||||
XORPD(XMM0, R(XMM0));
|
||||
PXOR(XMM0, R(XMM0));
|
||||
#if _M_X86_64
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
||||
|
|
|
@ -5,30 +5,6 @@
|
|||
#include "Common/Common.h"
|
||||
#include "Core/PowerPC/JitILCommon/JitILBase.h"
|
||||
|
||||
void JitILBase::ps_mr(UGeckoInstruction inst)
|
||||
{
|
||||
FallBackToInterpreter(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
void JitILBase::ps_sel(UGeckoInstruction inst)
|
||||
{
|
||||
FallBackToInterpreter(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
void JitILBase::ps_sign(UGeckoInstruction inst)
|
||||
{
|
||||
FallBackToInterpreter(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
void JitILBase::ps_rsqrte(UGeckoInstruction inst)
|
||||
{
|
||||
FallBackToInterpreter(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
void JitILBase::ps_arith(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
|
|
Loading…
Reference in New Issue