Merge pull request #312 from Tilka/small_jit_optimizations
Small JIT optimizations
This commit is contained in:
commit
a093b41231
|
@ -156,12 +156,12 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||||
case 30: //nmsub
|
case 30: //nmsub
|
||||||
MULSD(XMM0, fpr.R(c));
|
MULSD(XMM0, fpr.R(c));
|
||||||
SUBSD(XMM0, fpr.R(b));
|
SUBSD(XMM0, fpr.R(b));
|
||||||
XORPD(XMM0, M((void*)&psSignBits2));
|
PXOR(XMM0, M((void*)&psSignBits2));
|
||||||
break;
|
break;
|
||||||
case 31: //nmadd
|
case 31: //nmadd
|
||||||
MULSD(XMM0, fpr.R(c));
|
MULSD(XMM0, fpr.R(c));
|
||||||
ADDSD(XMM0, fpr.R(b));
|
ADDSD(XMM0, fpr.R(b));
|
||||||
XORPD(XMM0, M((void*)&psSignBits2));
|
PXOR(XMM0, M((void*)&psSignBits2));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
fpr.BindToRegister(d, false);
|
fpr.BindToRegister(d, false);
|
||||||
|
@ -197,13 +197,13 @@ void Jit64::fsign(UGeckoInstruction inst)
|
||||||
MOVSD(XMM0, fpr.R(b));
|
MOVSD(XMM0, fpr.R(b));
|
||||||
switch (inst.SUBOP10) {
|
switch (inst.SUBOP10) {
|
||||||
case 40: // fnegx
|
case 40: // fnegx
|
||||||
XORPD(XMM0, M((void*)&psSignBits2));
|
PXOR(XMM0, M((void*)&psSignBits2));
|
||||||
break;
|
break;
|
||||||
case 264: // fabsx
|
case 264: // fabsx
|
||||||
ANDPD(XMM0, M((void*)&psAbsMask2));
|
PAND(XMM0, M((void*)&psAbsMask2));
|
||||||
break;
|
break;
|
||||||
case 136: // fnabs
|
case 136: // fnabs
|
||||||
ORPD(XMM0, M((void*)&psSignBits2));
|
POR(XMM0, M((void*)&psSignBits2));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
PanicAlert("fsign bleh");
|
PanicAlert("fsign bleh");
|
||||||
|
|
|
@ -253,7 +253,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
||||||
if (inst.RA)
|
if (inst.RA)
|
||||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||||
AND(32, R(EAX), Imm32(~31));
|
AND(32, R(EAX), Imm32(~31));
|
||||||
XORPD(XMM0, R(XMM0));
|
PXOR(XMM0, R(XMM0));
|
||||||
#if _M_X86_64
|
#if _M_X86_64
|
||||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
||||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
||||||
|
|
|
@ -58,7 +58,7 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
||||||
#endif
|
#endif
|
||||||
if (inst.W) {
|
if (inst.W) {
|
||||||
// One value
|
// One value
|
||||||
XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
|
PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
|
||||||
CVTSD2SS(XMM0, fpr.R(s));
|
CVTSD2SS(XMM0, fpr.R(s));
|
||||||
CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.singleStoreQuantized));
|
CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.singleStoreQuantized));
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -59,13 +59,13 @@ void Jit64::ps_sel(UGeckoInstruction inst)
|
||||||
|
|
||||||
fpr.Lock(a, b, c, d);
|
fpr.Lock(a, b, c, d);
|
||||||
MOVAPD(XMM0, fpr.R(a));
|
MOVAPD(XMM0, fpr.R(a));
|
||||||
XORPD(XMM1, R(XMM1));
|
PXOR(XMM1, R(XMM1));
|
||||||
// XMM0 = XMM0 < 0 ? all 1s : all 0s
|
// XMM0 = XMM0 < 0 ? all 1s : all 0s
|
||||||
CMPPD(XMM0, R(XMM1), LT);
|
CMPPD(XMM0, R(XMM1), LT);
|
||||||
MOVAPD(XMM1, R(XMM0));
|
MOVAPD(XMM1, R(XMM0));
|
||||||
ANDPD(XMM0, fpr.R(b));
|
PAND(XMM0, fpr.R(b));
|
||||||
ANDNPD(XMM1, fpr.R(c));
|
PANDN(XMM1, fpr.R(c));
|
||||||
ORPD(XMM0, R(XMM1));
|
POR(XMM0, R(XMM1));
|
||||||
fpr.BindToRegister(d, false);
|
fpr.BindToRegister(d, false);
|
||||||
MOVAPD(fpr.RX(d), R(XMM0));
|
MOVAPD(fpr.RX(d), R(XMM0));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
|
@ -99,13 +99,13 @@ void Jit64::ps_sign(UGeckoInstruction inst)
|
||||||
switch (inst.SUBOP10)
|
switch (inst.SUBOP10)
|
||||||
{
|
{
|
||||||
case 40: //neg
|
case 40: //neg
|
||||||
XORPD(fpr.RX(d), M((void*)&psSignBits));
|
PXOR(fpr.RX(d), M((void*)&psSignBits));
|
||||||
break;
|
break;
|
||||||
case 136: //nabs
|
case 136: //nabs
|
||||||
ORPD(fpr.RX(d), M((void*)&psSignBits));
|
POR(fpr.RX(d), M((void*)&psSignBits));
|
||||||
break;
|
break;
|
||||||
case 264: //abs
|
case 264: //abs
|
||||||
ANDPD(fpr.RX(d), M((void*)&psAbsMask));
|
PAND(fpr.RX(d), M((void*)&psAbsMask));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -391,12 +391,12 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
|
||||||
case 30: //nmsub
|
case 30: //nmsub
|
||||||
MULPD(XMM0, fpr.R(c));
|
MULPD(XMM0, fpr.R(c));
|
||||||
SUBPD(XMM0, fpr.R(b));
|
SUBPD(XMM0, fpr.R(b));
|
||||||
XORPD(XMM0, M((void*)&psSignBits));
|
PXOR(XMM0, M((void*)&psSignBits));
|
||||||
break;
|
break;
|
||||||
case 31: //nmadd
|
case 31: //nmadd
|
||||||
MULPD(XMM0, fpr.R(c));
|
MULPD(XMM0, fpr.R(c));
|
||||||
ADDPD(XMM0, fpr.R(b));
|
ADDPD(XMM0, fpr.R(b));
|
||||||
XORPD(XMM0, M((void*)&psSignBits));
|
PXOR(XMM0, M((void*)&psSignBits));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
|
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
|
||||||
|
|
|
@ -114,11 +114,11 @@ static GekkoOPTemplate table4[] =
|
||||||
{ //SUBOP10
|
{ //SUBOP10
|
||||||
{0, &JitIL::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
|
{0, &JitIL::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
|
||||||
{32, &JitIL::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
|
{32, &JitIL::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
|
||||||
{40, &JitIL::ps_sign}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
|
{40, &JitIL::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{136, &JitIL::ps_sign}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
|
{136, &JitIL::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{264, &JitIL::ps_sign}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
|
{264, &JitIL::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{64, &JitIL::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
|
{64, &JitIL::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{72, &JitIL::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
|
{72, &JitIL::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{96, &JitIL::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
|
{96, &JitIL::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{528, &JitIL::ps_mergeXX}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
|
{528, &JitIL::ps_mergeXX}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{560, &JitIL::ps_mergeXX}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
|
{560, &JitIL::ps_mergeXX}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
|
||||||
|
@ -139,17 +139,16 @@ static GekkoOPTemplate table4_2[] =
|
||||||
{18, &JitIL::ps_arith}, //"ps_div", OPTYPE_PS, 0, 16}},
|
{18, &JitIL::ps_arith}, //"ps_div", OPTYPE_PS, 0, 16}},
|
||||||
{20, &JitIL::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
|
{20, &JitIL::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
|
||||||
{21, &JitIL::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
|
{21, &JitIL::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
|
||||||
{23, &JitIL::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
|
{23, &JitIL::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}},
|
||||||
{24, &JitIL::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}},
|
{24, &JitIL::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}},
|
||||||
{25, &JitIL::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
|
{25, &JitIL::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
|
||||||
{26, &JitIL::ps_rsqrte}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
{26, &JitIL::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
||||||
{28, &JitIL::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
|
{28, &JitIL::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
|
||||||
{29, &JitIL::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
|
{29, &JitIL::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
|
||||||
{30, &JitIL::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},
|
{30, &JitIL::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},
|
||||||
{31, &JitIL::ps_maddXX}, //"ps_nmadd", OPTYPE_PS, 0}},
|
{31, &JitIL::ps_maddXX}, //"ps_nmadd", OPTYPE_PS, 0}},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static GekkoOPTemplate table4_3[] =
|
static GekkoOPTemplate table4_3[] =
|
||||||
{
|
{
|
||||||
{6, &JitIL::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}},
|
{6, &JitIL::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}},
|
||||||
|
|
|
@ -146,7 +146,7 @@ void JitILBase::dcbz(UGeckoInstruction inst)
|
||||||
if (inst.RA)
|
if (inst.RA)
|
||||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||||
AND(32, R(EAX), Imm32(~31));
|
AND(32, R(EAX), Imm32(~31));
|
||||||
XORPD(XMM0, R(XMM0));
|
PXOR(XMM0, R(XMM0));
|
||||||
#if _M_X86_64
|
#if _M_X86_64
|
||||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
||||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
||||||
|
|
|
@ -5,30 +5,6 @@
|
||||||
#include "Common/Common.h"
|
#include "Common/Common.h"
|
||||||
#include "Core/PowerPC/JitILCommon/JitILBase.h"
|
#include "Core/PowerPC/JitILCommon/JitILBase.h"
|
||||||
|
|
||||||
void JitILBase::ps_mr(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
FallBackToInterpreter(inst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitILBase::ps_sel(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
FallBackToInterpreter(inst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitILBase::ps_sign(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
FallBackToInterpreter(inst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitILBase::ps_rsqrte(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
FallBackToInterpreter(inst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitILBase::ps_arith(UGeckoInstruction inst)
|
void JitILBase::ps_arith(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
|
|
Loading…
Reference in New Issue