JitIL: Add JIT for addze, ps_sum0. Also, some cleanup.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@2168 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
magumagu9 2009-02-09 01:31:38 +00:00
parent 4d3c0b8783
commit 5c35e95c8f
5 changed files with 37 additions and 151 deletions

View File

@ -310,6 +310,9 @@ namespace IREmitter {
InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpUgt, op1, op2);
}
InstLoc EmitICmpUlt(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpUlt, op1, op2);
}
InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpSgt, op1, op2);
}

View File

@ -209,6 +209,7 @@ public:
void srawix(UGeckoInstruction inst);
void srawx(UGeckoInstruction inst);
void addex(UGeckoInstruction inst);
void addzex(UGeckoInstruction inst);
void extsbx(UGeckoInstruction inst);
void extshx(UGeckoInstruction inst);

View File

@ -339,6 +339,18 @@
ComputeRC(ibuild, val);
}
void Jit64::addzex(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc lhs = ibuild.EmitLoadGReg(inst.RA),
val, newcarry;
val = ibuild.EmitAdd(lhs, ibuild.EmitLoadCarry());
ibuild.EmitStoreGReg(val, inst.RD);
newcarry = ibuild.EmitICmpUlt(val, lhs);
ibuild.EmitStoreCarry(newcarry);
if (inst.Rc)
ComputeRC(ibuild, val);
}
// This can be optimized
void Jit64::addex(UGeckoInstruction inst)
{

View File

@ -27,138 +27,26 @@
#include "JitCache.h"
#include "JitRegCache.h"
// TODO
// ps_madds0
// ps_muls0
// ps_madds1
// ps_sel
// cmppd, andpd, andnpd, or
// lfsx, ps_merge01 etc
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0};
void Jit64::ps_mr(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int b = inst.FB;
if (d == b)
return;
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), fpr.R(b));
Default(inst); return;
}
void Jit64::ps_sel(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
Default(inst);
return;
if (inst.Rc) {
Default(inst); return;
}
// GRR can't get this to work 100%. Getting artifacts in D.O.N. intro.
int d = inst.FD;
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
fpr.FlushLockX(XMM7);
fpr.FlushLockX(XMM6);
fpr.Lock(a, b, c, d);
fpr.LoadToX64(a, true, false);
fpr.LoadToX64(d, false, true);
// BLENDPD would have been nice...
MOVAPD(XMM7, fpr.R(a));
CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111
MOVAPD(XMM6, R(XMM7));
ANDPD(XMM7, fpr.R(d));
ANDNPD(XMM6, fpr.R(c));
MOVAPD(fpr.RX(d), R(XMM7));
ORPD(fpr.RX(d), R(XMM6));
fpr.UnlockAll();
fpr.UnlockAllX();
Default(inst); return;
}
void Jit64::ps_sign(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int b = inst.FB;
fpr.Lock(d, b);
if (d != b)
{
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), fpr.R(b));
}
else
{
fpr.LoadToX64(d, true);
}
switch (inst.SUBOP10)
{
case 40: //neg
XORPD(fpr.RX(d), M((void*)&psSignBits));
break;
case 136: //nabs
ORPD(fpr.RX(d), M((void*)&psSignBits));
break;
case 264: //abs
ANDPD(fpr.RX(d), M((void*)&psAbsMask));
break;
}
fpr.UnlockAll();
Default(inst); return;
}
void Jit64::ps_rsqrte(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int b = inst.FB;
fpr.Lock(d, b);
SQRTPD(XMM0, fpr.R(b));
MOVAPD(XMM1, M((void*)&psOneOne));
DIVPD(XMM1, R(XMM0));
MOVAPD(fpr.R(d), XMM1);
fpr.UnlockAll();
Default(inst); return;
}
//add a, b, c
//mov a, b
//add a, c
//we need:
/*
psq_l
psq_stu
*/
/*
add a,b,a
*/
void Jit64::ps_arith(UGeckoInstruction inst)
{
if (inst.Rc || (inst.SUBOP5 != 21 && inst.SUBOP5 != 20 && inst.SUBOP5 != 25)) {
@ -188,43 +76,21 @@
void Jit64::ps_sum(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
{Default(inst); return;} // turn off from debugger
// FIXME: This operation strikes me as a bit strange...
// perhaps we can optimize it depending on the users?
INSTRUCTION_START;
if (inst.Rc) {
if (inst.Rc || inst.SUBOP5 != 10) {
Default(inst); return;
}
int d = inst.FD;
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
fpr.Lock(a,b,c,d);
fpr.LoadToX64(d, d == a || d == b || d == c, true);
switch (inst.SUBOP5)
{
case 10:
// Do the sum in upper subregisters, merge uppers
MOVDDUP(XMM0, fpr.R(a));
MOVAPD(XMM1, fpr.R(b));
ADDPD(XMM0, R(XMM1));
UNPCKHPD(XMM0, fpr.R(c)); //merge
MOVAPD(fpr.R(d), XMM0);
break;
case 11:
// Do the sum in lower subregisters, merge lowers
MOVAPD(XMM0, fpr.R(a));
MOVAPD(XMM1, fpr.R(b));
SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower
ADDPD(XMM0, R(XMM1)); // sum lowers
MOVAPD(XMM1, fpr.R(c));
UNPCKLPD(XMM1, R(XMM0)); // merge
MOVAPD(fpr.R(d), XMM1);
break;
default:
PanicAlert("ps_sum WTF!!!");
}
ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll();
IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA), temp;
val = ibuild.EmitCompactMRegToPacked(val);
val = ibuild.EmitFPDup0(val);
temp = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
val = ibuild.EmitFPAdd(val, temp);
temp = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
val = ibuild.EmitFPMerge11(val, temp);
val = ibuild.EmitExpandPackedToMReg(val);
ibuild.EmitStoreFReg(val, inst.FD);
}

View File

@ -425,7 +425,11 @@ static GekkoOPTemplate table31_2[] =
{10, Interpreter::addcx, &Jit64::Default, {"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{138, Interpreter::addex, &Jit64::addex, {"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{234, Interpreter::addmex, &Jit64::Default, {"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
#if JITTEST
{202, Interpreter::addzex, &Jit64::addzex, {"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
#else
{202, Interpreter::addzex, &Jit64::Default, {"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
#endif
{491, Interpreter::divwx, &Jit64::Default, {"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{459, Interpreter::divwux, &Jit64::divwux, {"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{75, Interpreter::mulhwx, &Jit64::Default, {"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},