Merge pull request #312 from Tilka/small_jit_optimizations

Small JIT optimizations
This commit is contained in:
Pierre Bourdon 2014-04-27 11:51:21 +02:00
commit a093b41231
7 changed files with 23 additions and 48 deletions

View File

@ -156,12 +156,12 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
case 30: //nmsub case 30: //nmsub
MULSD(XMM0, fpr.R(c)); MULSD(XMM0, fpr.R(c));
SUBSD(XMM0, fpr.R(b)); SUBSD(XMM0, fpr.R(b));
XORPD(XMM0, M((void*)&psSignBits2)); PXOR(XMM0, M((void*)&psSignBits2));
break; break;
case 31: //nmadd case 31: //nmadd
MULSD(XMM0, fpr.R(c)); MULSD(XMM0, fpr.R(c));
ADDSD(XMM0, fpr.R(b)); ADDSD(XMM0, fpr.R(b));
XORPD(XMM0, M((void*)&psSignBits2)); PXOR(XMM0, M((void*)&psSignBits2));
break; break;
} }
fpr.BindToRegister(d, false); fpr.BindToRegister(d, false);
@ -197,13 +197,13 @@ void Jit64::fsign(UGeckoInstruction inst)
MOVSD(XMM0, fpr.R(b)); MOVSD(XMM0, fpr.R(b));
switch (inst.SUBOP10) { switch (inst.SUBOP10) {
case 40: // fnegx case 40: // fnegx
XORPD(XMM0, M((void*)&psSignBits2)); PXOR(XMM0, M((void*)&psSignBits2));
break; break;
case 264: // fabsx case 264: // fabsx
ANDPD(XMM0, M((void*)&psAbsMask2)); PAND(XMM0, M((void*)&psAbsMask2));
break; break;
case 136: // fnabs case 136: // fnabs
ORPD(XMM0, M((void*)&psSignBits2)); POR(XMM0, M((void*)&psSignBits2));
break; break;
default: default:
PanicAlert("fsign bleh"); PanicAlert("fsign bleh");

View File

@ -253,7 +253,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
if (inst.RA) if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA)); ADD(32, R(EAX), gpr.R(inst.RA));
AND(32, R(EAX), Imm32(~31)); AND(32, R(EAX), Imm32(~31));
XORPD(XMM0, R(XMM0)); PXOR(XMM0, R(XMM0));
#if _M_X86_64 #if _M_X86_64
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0); MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);

View File

@ -58,7 +58,7 @@ void Jit64::psq_st(UGeckoInstruction inst)
#endif #endif
if (inst.W) { if (inst.W) {
// One value // One value
XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
CVTSD2SS(XMM0, fpr.R(s)); CVTSD2SS(XMM0, fpr.R(s));
CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.singleStoreQuantized)); CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.singleStoreQuantized));
} else { } else {

View File

@ -59,13 +59,13 @@ void Jit64::ps_sel(UGeckoInstruction inst)
fpr.Lock(a, b, c, d); fpr.Lock(a, b, c, d);
MOVAPD(XMM0, fpr.R(a)); MOVAPD(XMM0, fpr.R(a));
XORPD(XMM1, R(XMM1)); PXOR(XMM1, R(XMM1));
// XMM0 = XMM0 < 0 ? all 1s : all 0s // XMM0 = XMM0 < 0 ? all 1s : all 0s
CMPPD(XMM0, R(XMM1), LT); CMPPD(XMM0, R(XMM1), LT);
MOVAPD(XMM1, R(XMM0)); MOVAPD(XMM1, R(XMM0));
ANDPD(XMM0, fpr.R(b)); PAND(XMM0, fpr.R(b));
ANDNPD(XMM1, fpr.R(c)); PANDN(XMM1, fpr.R(c));
ORPD(XMM0, R(XMM1)); POR(XMM0, R(XMM1));
fpr.BindToRegister(d, false); fpr.BindToRegister(d, false);
MOVAPD(fpr.RX(d), R(XMM0)); MOVAPD(fpr.RX(d), R(XMM0));
fpr.UnlockAll(); fpr.UnlockAll();
@ -99,13 +99,13 @@ void Jit64::ps_sign(UGeckoInstruction inst)
switch (inst.SUBOP10) switch (inst.SUBOP10)
{ {
case 40: //neg case 40: //neg
XORPD(fpr.RX(d), M((void*)&psSignBits)); PXOR(fpr.RX(d), M((void*)&psSignBits));
break; break;
case 136: //nabs case 136: //nabs
ORPD(fpr.RX(d), M((void*)&psSignBits)); POR(fpr.RX(d), M((void*)&psSignBits));
break; break;
case 264: //abs case 264: //abs
ANDPD(fpr.RX(d), M((void*)&psAbsMask)); PAND(fpr.RX(d), M((void*)&psAbsMask));
break; break;
} }
@ -391,12 +391,12 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
case 30: //nmsub case 30: //nmsub
MULPD(XMM0, fpr.R(c)); MULPD(XMM0, fpr.R(c));
SUBPD(XMM0, fpr.R(b)); SUBPD(XMM0, fpr.R(b));
XORPD(XMM0, M((void*)&psSignBits)); PXOR(XMM0, M((void*)&psSignBits));
break; break;
case 31: //nmadd case 31: //nmadd
MULPD(XMM0, fpr.R(c)); MULPD(XMM0, fpr.R(c));
ADDPD(XMM0, fpr.R(b)); ADDPD(XMM0, fpr.R(b));
XORPD(XMM0, M((void*)&psSignBits)); PXOR(XMM0, M((void*)&psSignBits));
break; break;
default: default:
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!"); _assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");

View File

@ -114,11 +114,11 @@ static GekkoOPTemplate table4[] =
{ //SUBOP10 { //SUBOP10
{0, &JitIL::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, {0, &JitIL::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
{32, &JitIL::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, {32, &JitIL::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
{40, &JitIL::ps_sign}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, {40, &JitIL::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
{136, &JitIL::ps_sign}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, {136, &JitIL::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, &JitIL::ps_sign}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, {264, &JitIL::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{64, &JitIL::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, {64, &JitIL::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{72, &JitIL::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, {72, &JitIL::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{96, &JitIL::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, {96, &JitIL::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{528, &JitIL::ps_mergeXX}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, {528, &JitIL::ps_mergeXX}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, &JitIL::ps_mergeXX}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, {560, &JitIL::ps_mergeXX}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
@ -139,17 +139,16 @@ static GekkoOPTemplate table4_2[] =
{18, &JitIL::ps_arith}, //"ps_div", OPTYPE_PS, 0, 16}}, {18, &JitIL::ps_arith}, //"ps_div", OPTYPE_PS, 0, 16}},
{20, &JitIL::ps_arith}, //"ps_sub", OPTYPE_PS, 0}}, {20, &JitIL::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
{21, &JitIL::ps_arith}, //"ps_add", OPTYPE_PS, 0}}, {21, &JitIL::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
{23, &JitIL::ps_sel}, //"ps_sel", OPTYPE_PS, 0}}, {23, &JitIL::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}},
{24, &JitIL::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}}, {24, &JitIL::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}},
{25, &JitIL::ps_arith}, //"ps_mul", OPTYPE_PS, 0}}, {25, &JitIL::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
{26, &JitIL::ps_rsqrte}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, {26, &JitIL::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
{28, &JitIL::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}}, {28, &JitIL::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
{29, &JitIL::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}}, {29, &JitIL::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
{30, &JitIL::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}}, {30, &JitIL::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},
{31, &JitIL::ps_maddXX}, //"ps_nmadd", OPTYPE_PS, 0}}, {31, &JitIL::ps_maddXX}, //"ps_nmadd", OPTYPE_PS, 0}},
}; };
static GekkoOPTemplate table4_3[] = static GekkoOPTemplate table4_3[] =
{ {
{6, &JitIL::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}}, {6, &JitIL::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}},

View File

@ -146,7 +146,7 @@ void JitILBase::dcbz(UGeckoInstruction inst)
if (inst.RA) if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA)); ADD(32, R(EAX), gpr.R(inst.RA));
AND(32, R(EAX), Imm32(~31)); AND(32, R(EAX), Imm32(~31));
XORPD(XMM0, R(XMM0)); PXOR(XMM0, R(XMM0));
#if _M_X86_64 #if _M_X86_64
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0); MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);

View File

@ -5,30 +5,6 @@
#include "Common/Common.h" #include "Common/Common.h"
#include "Core/PowerPC/JitILCommon/JitILBase.h" #include "Core/PowerPC/JitILCommon/JitILBase.h"
void JitILBase::ps_mr(UGeckoInstruction inst)
{
FallBackToInterpreter(inst);
return;
}
void JitILBase::ps_sel(UGeckoInstruction inst)
{
FallBackToInterpreter(inst);
return;
}
void JitILBase::ps_sign(UGeckoInstruction inst)
{
FallBackToInterpreter(inst);
return;
}
void JitILBase::ps_rsqrte(UGeckoInstruction inst)
{
FallBackToInterpreter(inst);
return;
}
void JitILBase::ps_arith(UGeckoInstruction inst) void JitILBase::ps_arith(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START