From e4f8419be4d9c9a917de5e69c9e32202fbc0f5a7 Mon Sep 17 00:00:00 2001 From: magumagu9 Date: Tue, 6 Jan 2009 08:39:32 +0000 Subject: [PATCH] A little bit more WIP JIT work. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1809 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp | 20 ++++++ Source/Core/Core/Src/PowerPC/Jit64IL/IR.h | 8 +++ .../Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp | 71 ++++++++++--------- 3 files changed, 67 insertions(+), 32 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp index 356718cc73..45d0c71c8a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp @@ -1063,6 +1063,8 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { case ExpandPackedToMReg: case CompactMRegToPacked: case FPNeg: + case FPDup0: + case FPDup1: case FSNeg: case FDNeg: if (thisUsed) @@ -1604,6 +1606,24 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { fregNormalRegClear(RI, I); break; } + case FPDup0: { + if (!thisUsed) break; + X64Reg reg = fregFindFreeReg(RI); + Jit->MOVAPD(reg, fregLocForInst(RI, getOp1(I))); + Jit->PUNPCKLDQ(reg, R(reg)); + RI.fregs[reg] = I; + fregNormalRegClear(RI, I); + break; + } + case FPDup1: { + if (!thisUsed) break; + X64Reg reg = fregFindFreeReg(RI); + Jit->MOVAPD(reg, fregLocForInst(RI, getOp1(I))); + Jit->SHUFPS(reg, R(reg), 0xE5); + RI.fregs[reg] = I; + fregNormalRegClear(RI, I); + break; + } case LoadFReg: { if (!thisUsed) break; X64Reg reg = fregFindFreeReg(RI); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h index 92f30d8081..5399b6a10b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h @@ -171,6 +171,8 @@ namespace IREmitter { FPMerge01, FPMerge10, FPMerge11, + FPDup0, + FPDup1, FResult_End, StorePaired, StoreSingle, @@ -463,6 +465,12 @@ namespace IREmitter { InstLoc EmitFPMerge11(InstLoc op1, InstLoc op2) { return FoldBiOp(FPMerge11, op1, op2); } + InstLoc EmitFPDup0(InstLoc op1) { + return FoldUOp(FPDup0, op1); + } + InstLoc EmitFPDup1(InstLoc op1) { + return FoldUOp(FPDup1, op1); + } InstLoc EmitFPNeg(InstLoc op1) { return FoldUOp(FPNeg, op1); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp index b34a14c2d2..19fc349381 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp @@ -230,40 +230,23 @@ void Jit64::ps_muls(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; if (inst.Rc) { Default(inst); return; } - int d = inst.FD; - int a = inst.FA; - int c = inst.FC; - fpr.Lock(a, c, d); - fpr.LoadToX64(d, d == a || d == c, true); - switch (inst.SUBOP5) - { - case 12: - // Single multiply scalar high - // TODO - faster version for when regs are different - MOVAPD(XMM0, fpr.R(a)); - MOVDDUP(XMM1, fpr.R(c)); - MULPD(XMM0, R(XMM1)); - MOVAPD(fpr.R(d), XMM0); - break; - case 13: - // TODO - faster version for when regs are different - MOVAPD(XMM0, fpr.R(a)); - MOVAPD(XMM1, fpr.R(c)); - SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower - MULPD(XMM0, R(XMM1)); - MOVAPD(fpr.R(d), XMM0); - break; - default: - PanicAlert("ps_muls WTF!!!"); - } - ForceSinglePrecisionP(fpr.RX(d)); - fpr.UnlockAll(); + IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA), + rhs = ibuild.EmitLoadFReg(inst.FC); + + val = ibuild.EmitCompactMRegToPacked(val); + rhs = ibuild.EmitCompactMRegToPacked(rhs); + + if (inst.SUBOP5 == 12) + rhs = ibuild.EmitFPDup0(rhs); + else + rhs = ibuild.EmitFPDup1(rhs); + + val = ibuild.EmitFPMul(val, rhs); + val = ibuild.EmitExpandPackedToMReg(val); + ibuild.EmitStoreFReg(val, inst.FD); } @@ -301,7 +284,7 @@ void Jit64::ps_maddXX(UGeckoInstruction inst) { - if (inst.Rc || (inst.SUBOP5 != 28 && inst.SUBOP5 != 29 && inst.SUBOP5 != 30)) { + if (inst.Rc) { Default(inst); return; } @@ -309,6 +292,22 @@ val = ibuild.EmitCompactMRegToPacked(val); switch (inst.SUBOP5) { + case 14: {//madds0 + op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); + op2 = ibuild.EmitFPDup0(op2); + val = ibuild.EmitFPMul(val, op2); + op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); + val = ibuild.EmitFPAdd(val, op3); + break; + } + case 15: {//madds1 + op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); + op2 = ibuild.EmitFPDup1(op2); + val = ibuild.EmitFPMul(val, op2); + op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); + val = ibuild.EmitFPAdd(val, op3); + break; + } case 28: {//msub op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); val = ibuild.EmitFPMul(val, op2); @@ -331,6 +330,14 @@ val = ibuild.EmitFPNeg(val); break; } + case 31: {//nmadd + op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); + val = ibuild.EmitFPMul(val, op2); + op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); + val = ibuild.EmitFPAdd(val, op3); + val = ibuild.EmitFPNeg(val); + break; + } } val = ibuild.EmitExpandPackedToMReg(val); ibuild.EmitStoreFReg(val, inst.FD);