From 5c35e95c8f134ff9a883f5a6ce35b6ba1b4343cd Mon Sep 17 00:00:00 2001 From: magumagu9 Date: Mon, 9 Feb 2009 01:31:38 +0000 Subject: [PATCH] JitIL: Add JIT for addze, ps_sum0. Also, some cleanup. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@2168 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Src/PowerPC/Jit64IL/IR.h | 3 + Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h | 1 + .../Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp | 12 ++ .../Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp | 168 ++---------------- Source/Core/Core/Src/PowerPC/PPCTables.cpp | 4 + 5 files changed, 37 insertions(+), 151 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h index 8ad18b16eb..6a33d5d270 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h @@ -310,6 +310,9 @@ namespace IREmitter { InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpUgt, op1, op2); } + InstLoc EmitICmpUlt(InstLoc op1, InstLoc op2) { + return FoldBiOp(ICmpUlt, op1, op2); + } InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpSgt, op1, op2); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h index dd3dc844a1..e98922b83a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h @@ -209,6 +209,7 @@ public: void srawix(UGeckoInstruction inst); void srawx(UGeckoInstruction inst); void addex(UGeckoInstruction inst); + void addzex(UGeckoInstruction inst); void extsbx(UGeckoInstruction inst); void extshx(UGeckoInstruction inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp index 5c040c1d17..c49187eff0 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp @@ -339,6 +339,18 @@ ComputeRC(ibuild, val); } + void Jit64::addzex(UGeckoInstruction inst) + { + INSTRUCTION_START + IREmitter::InstLoc lhs = ibuild.EmitLoadGReg(inst.RA), + val, newcarry; + val = ibuild.EmitAdd(lhs, ibuild.EmitLoadCarry()); + ibuild.EmitStoreGReg(val, inst.RD); + newcarry = ibuild.EmitICmpUlt(val, lhs); + ibuild.EmitStoreCarry(newcarry); + if (inst.Rc) + ComputeRC(ibuild, val); + } // This can be optimized void Jit64::addex(UGeckoInstruction inst) { diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp index 19fc349381..ddc6a5b53b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp @@ -27,138 +27,26 @@ #include "JitCache.h" #include "JitRegCache.h" -// TODO -// ps_madds0 -// ps_muls0 -// ps_madds1 -// ps_sel -// cmppd, andpd, andnpd, or -// lfsx, ps_merge01 etc - - const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; - const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; - const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0}; - const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0}; - void Jit64::ps_mr(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - int d = inst.FD; - int b = inst.FB; - if (d == b) - return; - fpr.LoadToX64(d, false); - MOVAPD(fpr.RX(d), fpr.R(b)); + Default(inst); return; } void Jit64::ps_sel(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - Default(inst); - return; - - if (inst.Rc) { - Default(inst); return; - } - // GRR can't get this to work 100%. Getting artifacts in D.O.N. intro. - int d = inst.FD; - int a = inst.FA; - int b = inst.FB; - int c = inst.FC; - fpr.FlushLockX(XMM7); - fpr.FlushLockX(XMM6); - fpr.Lock(a, b, c, d); - fpr.LoadToX64(a, true, false); - fpr.LoadToX64(d, false, true); - // BLENDPD would have been nice... - MOVAPD(XMM7, fpr.R(a)); - CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111 - MOVAPD(XMM6, R(XMM7)); - ANDPD(XMM7, fpr.R(d)); - ANDNPD(XMM6, fpr.R(c)); - MOVAPD(fpr.RX(d), R(XMM7)); - ORPD(fpr.RX(d), R(XMM6)); - fpr.UnlockAll(); - fpr.UnlockAllX(); + Default(inst); return; } void Jit64::ps_sign(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - int d = inst.FD; - int b = inst.FB; - - fpr.Lock(d, b); - if (d != b) - { - fpr.LoadToX64(d, false); - MOVAPD(fpr.RX(d), fpr.R(b)); - } - else - { - fpr.LoadToX64(d, true); - } - - switch (inst.SUBOP10) - { - case 40: //neg - XORPD(fpr.RX(d), M((void*)&psSignBits)); - break; - case 136: //nabs - ORPD(fpr.RX(d), M((void*)&psSignBits)); - break; - case 264: //abs - ANDPD(fpr.RX(d), M((void*)&psAbsMask)); - break; - } - - fpr.UnlockAll(); + Default(inst); return; } void Jit64::ps_rsqrte(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - int d = inst.FD; - int b = inst.FB; - fpr.Lock(d, b); - SQRTPD(XMM0, fpr.R(b)); - MOVAPD(XMM1, M((void*)&psOneOne)); - DIVPD(XMM1, R(XMM0)); - MOVAPD(fpr.R(d), XMM1); - fpr.UnlockAll(); + Default(inst); return; } - //add a, b, c - - //mov a, b - //add a, c - //we need: - /* - psq_l - psq_stu - */ - - /* - add a,b,a - */ - void Jit64::ps_arith(UGeckoInstruction inst) { if (inst.Rc || (inst.SUBOP5 != 21 && inst.SUBOP5 != 20 && inst.SUBOP5 != 25)) { @@ -187,44 +75,22 @@ } void Jit64::ps_sum(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger + { + // FIXME: This operation strikes me as a bit strange... + // perhaps we can optimize it depending on the users? INSTRUCTION_START; - if (inst.Rc) { + if (inst.Rc || inst.SUBOP5 != 10) { Default(inst); return; } - int d = inst.FD; - int a = inst.FA; - int b = inst.FB; - int c = inst.FC; - fpr.Lock(a,b,c,d); - fpr.LoadToX64(d, d == a || d == b || d == c, true); - switch (inst.SUBOP5) - { - case 10: - // Do the sum in upper subregisters, merge uppers - MOVDDUP(XMM0, fpr.R(a)); - MOVAPD(XMM1, fpr.R(b)); - ADDPD(XMM0, R(XMM1)); - UNPCKHPD(XMM0, fpr.R(c)); //merge - MOVAPD(fpr.R(d), XMM0); - break; - case 11: - // Do the sum in lower subregisters, merge lowers - MOVAPD(XMM0, fpr.R(a)); - MOVAPD(XMM1, fpr.R(b)); - SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower - ADDPD(XMM0, R(XMM1)); // sum lowers - MOVAPD(XMM1, fpr.R(c)); - UNPCKLPD(XMM1, R(XMM0)); // merge - MOVAPD(fpr.R(d), XMM1); - break; - default: - PanicAlert("ps_sum WTF!!!"); - } - ForceSinglePrecisionP(fpr.RX(d)); - fpr.UnlockAll(); + IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA), temp; + val = ibuild.EmitCompactMRegToPacked(val); + val = ibuild.EmitFPDup0(val); + temp = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); + val = ibuild.EmitFPAdd(val, temp); + temp = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); + val = ibuild.EmitFPMerge11(val, temp); + val = ibuild.EmitExpandPackedToMReg(val); + ibuild.EmitStoreFReg(val, inst.FD); } diff --git a/Source/Core/Core/Src/PowerPC/PPCTables.cpp b/Source/Core/Core/Src/PowerPC/PPCTables.cpp index c4a717d10b..7c20ea5621 100644 --- a/Source/Core/Core/Src/PowerPC/PPCTables.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCTables.cpp @@ -425,7 +425,11 @@ static GekkoOPTemplate table31_2[] = {10, Interpreter::addcx, &Jit64::Default, {"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {138, Interpreter::addex, &Jit64::addex, {"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {234, Interpreter::addmex, &Jit64::Default, {"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, +#if JITTEST + {202, Interpreter::addzex, &Jit64::addzex, {"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, +#else {202, Interpreter::addzex, &Jit64::Default, {"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, +#endif {491, Interpreter::divwx, &Jit64::Default, {"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {459, Interpreter::divwux, &Jit64::divwux, {"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {75, Interpreter::mulhwx, &Jit64::Default, {"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},