JitIL: Add JIT for addze, ps_sum0. Also, some cleanup.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@2168 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
4d3c0b8783
commit
5c35e95c8f
|
@ -310,6 +310,9 @@ namespace IREmitter {
|
||||||
InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) {
|
InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) {
|
||||||
return FoldBiOp(ICmpUgt, op1, op2);
|
return FoldBiOp(ICmpUgt, op1, op2);
|
||||||
}
|
}
|
||||||
|
InstLoc EmitICmpUlt(InstLoc op1, InstLoc op2) {
|
||||||
|
return FoldBiOp(ICmpUlt, op1, op2);
|
||||||
|
}
|
||||||
InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) {
|
InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) {
|
||||||
return FoldBiOp(ICmpSgt, op1, op2);
|
return FoldBiOp(ICmpSgt, op1, op2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -209,6 +209,7 @@ public:
|
||||||
void srawix(UGeckoInstruction inst);
|
void srawix(UGeckoInstruction inst);
|
||||||
void srawx(UGeckoInstruction inst);
|
void srawx(UGeckoInstruction inst);
|
||||||
void addex(UGeckoInstruction inst);
|
void addex(UGeckoInstruction inst);
|
||||||
|
void addzex(UGeckoInstruction inst);
|
||||||
|
|
||||||
void extsbx(UGeckoInstruction inst);
|
void extsbx(UGeckoInstruction inst);
|
||||||
void extshx(UGeckoInstruction inst);
|
void extshx(UGeckoInstruction inst);
|
||||||
|
|
|
@ -339,6 +339,18 @@
|
||||||
ComputeRC(ibuild, val);
|
ComputeRC(ibuild, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Jit64::addzex(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
IREmitter::InstLoc lhs = ibuild.EmitLoadGReg(inst.RA),
|
||||||
|
val, newcarry;
|
||||||
|
val = ibuild.EmitAdd(lhs, ibuild.EmitLoadCarry());
|
||||||
|
ibuild.EmitStoreGReg(val, inst.RD);
|
||||||
|
newcarry = ibuild.EmitICmpUlt(val, lhs);
|
||||||
|
ibuild.EmitStoreCarry(newcarry);
|
||||||
|
if (inst.Rc)
|
||||||
|
ComputeRC(ibuild, val);
|
||||||
|
}
|
||||||
// This can be optimized
|
// This can be optimized
|
||||||
void Jit64::addex(UGeckoInstruction inst)
|
void Jit64::addex(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
|
|
|
@ -27,138 +27,26 @@
|
||||||
#include "JitCache.h"
|
#include "JitCache.h"
|
||||||
#include "JitRegCache.h"
|
#include "JitRegCache.h"
|
||||||
|
|
||||||
// TODO
|
|
||||||
// ps_madds0
|
|
||||||
// ps_muls0
|
|
||||||
// ps_madds1
|
|
||||||
// ps_sel
|
|
||||||
// cmppd, andpd, andnpd, or
|
|
||||||
// lfsx, ps_merge01 etc
|
|
||||||
|
|
||||||
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
|
||||||
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
|
||||||
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
|
|
||||||
const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0};
|
|
||||||
|
|
||||||
void Jit64::ps_mr(UGeckoInstruction inst)
|
void Jit64::ps_mr(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
Default(inst); return;
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
int d = inst.FD;
|
|
||||||
int b = inst.FB;
|
|
||||||
if (d == b)
|
|
||||||
return;
|
|
||||||
fpr.LoadToX64(d, false);
|
|
||||||
MOVAPD(fpr.RX(d), fpr.R(b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::ps_sel(UGeckoInstruction inst)
|
void Jit64::ps_sel(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
Default(inst); return;
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
Default(inst);
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
// GRR can't get this to work 100%. Getting artifacts in D.O.N. intro.
|
|
||||||
int d = inst.FD;
|
|
||||||
int a = inst.FA;
|
|
||||||
int b = inst.FB;
|
|
||||||
int c = inst.FC;
|
|
||||||
fpr.FlushLockX(XMM7);
|
|
||||||
fpr.FlushLockX(XMM6);
|
|
||||||
fpr.Lock(a, b, c, d);
|
|
||||||
fpr.LoadToX64(a, true, false);
|
|
||||||
fpr.LoadToX64(d, false, true);
|
|
||||||
// BLENDPD would have been nice...
|
|
||||||
MOVAPD(XMM7, fpr.R(a));
|
|
||||||
CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111
|
|
||||||
MOVAPD(XMM6, R(XMM7));
|
|
||||||
ANDPD(XMM7, fpr.R(d));
|
|
||||||
ANDNPD(XMM6, fpr.R(c));
|
|
||||||
MOVAPD(fpr.RX(d), R(XMM7));
|
|
||||||
ORPD(fpr.RX(d), R(XMM6));
|
|
||||||
fpr.UnlockAll();
|
|
||||||
fpr.UnlockAllX();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::ps_sign(UGeckoInstruction inst)
|
void Jit64::ps_sign(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
Default(inst); return;
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
int d = inst.FD;
|
|
||||||
int b = inst.FB;
|
|
||||||
|
|
||||||
fpr.Lock(d, b);
|
|
||||||
if (d != b)
|
|
||||||
{
|
|
||||||
fpr.LoadToX64(d, false);
|
|
||||||
MOVAPD(fpr.RX(d), fpr.R(b));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
fpr.LoadToX64(d, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (inst.SUBOP10)
|
|
||||||
{
|
|
||||||
case 40: //neg
|
|
||||||
XORPD(fpr.RX(d), M((void*)&psSignBits));
|
|
||||||
break;
|
|
||||||
case 136: //nabs
|
|
||||||
ORPD(fpr.RX(d), M((void*)&psSignBits));
|
|
||||||
break;
|
|
||||||
case 264: //abs
|
|
||||||
ANDPD(fpr.RX(d), M((void*)&psAbsMask));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
Default(inst); return;
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
int d = inst.FD;
|
|
||||||
int b = inst.FB;
|
|
||||||
fpr.Lock(d, b);
|
|
||||||
SQRTPD(XMM0, fpr.R(b));
|
|
||||||
MOVAPD(XMM1, M((void*)&psOneOne));
|
|
||||||
DIVPD(XMM1, R(XMM0));
|
|
||||||
MOVAPD(fpr.R(d), XMM1);
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//add a, b, c
|
|
||||||
|
|
||||||
//mov a, b
|
|
||||||
//add a, c
|
|
||||||
//we need:
|
|
||||||
/*
|
|
||||||
psq_l
|
|
||||||
psq_stu
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
add a,b,a
|
|
||||||
*/
|
|
||||||
|
|
||||||
void Jit64::ps_arith(UGeckoInstruction inst)
|
void Jit64::ps_arith(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if (inst.Rc || (inst.SUBOP5 != 21 && inst.SUBOP5 != 20 && inst.SUBOP5 != 25)) {
|
if (inst.Rc || (inst.SUBOP5 != 21 && inst.SUBOP5 != 20 && inst.SUBOP5 != 25)) {
|
||||||
|
@ -187,44 +75,22 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::ps_sum(UGeckoInstruction inst)
|
void Jit64::ps_sum(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
// FIXME: This operation strikes me as a bit strange...
|
||||||
{Default(inst); return;} // turn off from debugger
|
// perhaps we can optimize it depending on the users?
|
||||||
INSTRUCTION_START;
|
INSTRUCTION_START;
|
||||||
if (inst.Rc) {
|
if (inst.Rc || inst.SUBOP5 != 10) {
|
||||||
Default(inst); return;
|
Default(inst); return;
|
||||||
}
|
}
|
||||||
int d = inst.FD;
|
IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA), temp;
|
||||||
int a = inst.FA;
|
val = ibuild.EmitCompactMRegToPacked(val);
|
||||||
int b = inst.FB;
|
val = ibuild.EmitFPDup0(val);
|
||||||
int c = inst.FC;
|
temp = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
|
||||||
fpr.Lock(a,b,c,d);
|
val = ibuild.EmitFPAdd(val, temp);
|
||||||
fpr.LoadToX64(d, d == a || d == b || d == c, true);
|
temp = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
|
||||||
switch (inst.SUBOP5)
|
val = ibuild.EmitFPMerge11(val, temp);
|
||||||
{
|
val = ibuild.EmitExpandPackedToMReg(val);
|
||||||
case 10:
|
ibuild.EmitStoreFReg(val, inst.FD);
|
||||||
// Do the sum in upper subregisters, merge uppers
|
|
||||||
MOVDDUP(XMM0, fpr.R(a));
|
|
||||||
MOVAPD(XMM1, fpr.R(b));
|
|
||||||
ADDPD(XMM0, R(XMM1));
|
|
||||||
UNPCKHPD(XMM0, fpr.R(c)); //merge
|
|
||||||
MOVAPD(fpr.R(d), XMM0);
|
|
||||||
break;
|
|
||||||
case 11:
|
|
||||||
// Do the sum in lower subregisters, merge lowers
|
|
||||||
MOVAPD(XMM0, fpr.R(a));
|
|
||||||
MOVAPD(XMM1, fpr.R(b));
|
|
||||||
SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower
|
|
||||||
ADDPD(XMM0, R(XMM1)); // sum lowers
|
|
||||||
MOVAPD(XMM1, fpr.R(c));
|
|
||||||
UNPCKLPD(XMM1, R(XMM0)); // merge
|
|
||||||
MOVAPD(fpr.R(d), XMM1);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
PanicAlert("ps_sum WTF!!!");
|
|
||||||
}
|
|
||||||
ForceSinglePrecisionP(fpr.RX(d));
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -425,7 +425,11 @@ static GekkoOPTemplate table31_2[] =
|
||||||
{10, Interpreter::addcx, &Jit64::Default, {"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
|
{10, Interpreter::addcx, &Jit64::Default, {"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
|
||||||
{138, Interpreter::addex, &Jit64::addex, {"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
{138, Interpreter::addex, &Jit64::addex, {"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||||
{234, Interpreter::addmex, &Jit64::Default, {"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
{234, Interpreter::addmex, &Jit64::Default, {"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||||
|
#if JITTEST
|
||||||
|
{202, Interpreter::addzex, &Jit64::addzex, {"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||||
|
#else
|
||||||
{202, Interpreter::addzex, &Jit64::Default, {"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
{202, Interpreter::addzex, &Jit64::Default, {"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||||
|
#endif
|
||||||
{491, Interpreter::divwx, &Jit64::Default, {"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
|
{491, Interpreter::divwx, &Jit64::Default, {"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
|
||||||
{459, Interpreter::divwux, &Jit64::divwux, {"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
|
{459, Interpreter::divwux, &Jit64::divwux, {"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
|
||||||
{75, Interpreter::mulhwx, &Jit64::Default, {"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
|
{75, Interpreter::mulhwx, &Jit64::Default, {"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
|
||||||
|
|
Loading…
Reference in New Issue