diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index bf8d86494e..2e8744338d 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -210,6 +210,7 @@ if(_M_ARM) Src/PowerPC/JitArm32/JitArm_Integer.cpp Src/PowerPC/JitArm32/JitArm_LoadStore.cpp Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp + Src/PowerPC/JitArm32/JitArm_Paired.cpp Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp) endif() diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h index 54df2052f0..f3fded30e8 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h @@ -118,6 +118,10 @@ public: void ComputeRC(int cr = 0); void ComputeRC(s32 value, int cr); + void ComputeCarry(); + void GetCarryAndClear(ARMReg reg); + void FinalizeCarry(ARMReg reg); + // OPCODES void unknown_instruction(UGeckoInstruction _inst); void Default(UGeckoInstruction _inst); @@ -144,6 +148,8 @@ public: void addi(UGeckoInstruction _inst); void addis(UGeckoInstruction _inst); void addx(UGeckoInstruction _inst); + void addcx(UGeckoInstruction _inst); + void addex(UGeckoInstruction _inst); void cmp (UGeckoInstruction _inst); void cmpi(UGeckoInstruction _inst); void cmpl(UGeckoInstruction _inst); @@ -187,12 +193,22 @@ public: // Floating point void fabsx(UGeckoInstruction _inst); + void faddsx(UGeckoInstruction _inst); void faddx(UGeckoInstruction _inst); + void fsubsx(UGeckoInstruction _inst); + void fsubx(UGeckoInstruction _inst); + void fmulsx(UGeckoInstruction _inst); + void fmulx(UGeckoInstruction _inst); void fmrx(UGeckoInstruction _inst); // Floating point loadStore void lfs(UGeckoInstruction _inst); void lfd(UGeckoInstruction _inst); + + // Paired Singles + void ps_add(UGeckoInstruction _inst); + void ps_sub(UGeckoInstruction _inst); + void ps_mul(UGeckoInstruction _inst); }; #endif // _JIT64_H diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp index c82c7d3877..6dd1f80cd6 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp @@ -51,6 +51,21 @@ void JitArm::fabsx(UGeckoInstruction inst) if (inst.Rc) Helper_UpdateCR1(vD); } +void JitArm::faddsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(FloatingPoint) + + ARMReg vD0 = fpr.R0(inst.FD); + ARMReg vD1 = fpr.R1(inst.FD); + ARMReg vA = fpr.R0(inst.FA); + ARMReg vB = fpr.R0(inst.FB); + + VADD(vD0, vA, vB); + VADD(vD1, vA, vB); + if (inst.Rc) Helper_UpdateCR1(vD0); +} + void JitArm::faddx(UGeckoInstruction inst) { INSTRUCTION_START @@ -64,6 +79,65 @@ void JitArm::faddx(UGeckoInstruction inst) if (inst.Rc) Helper_UpdateCR1(vD); } +// Breaks Animal crossing +void JitArm::fsubsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(FloatingPoint) + + Default(inst); return; + + ARMReg vD0 = fpr.R0(inst.FD); + ARMReg vD1 = fpr.R1(inst.FD); + ARMReg vA = fpr.R0(inst.FA); + ARMReg vB = fpr.R0(inst.FB); + + VSUB(vD0, vA, vB); + VSUB(vD1, vA, vB); + if (inst.Rc) Helper_UpdateCR1(vD0); +} + +void JitArm::fsubx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(FloatingPoint) + + ARMReg vD = fpr.R0(inst.FD); + ARMReg vA = fpr.R0(inst.FA); + ARMReg vB = fpr.R0(inst.FB); + + VSUB(vD, vA, vB); + if (inst.Rc) Helper_UpdateCR1(vD); +} +// Breaks animal crossing +void JitArm::fmulsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(FloatingPoint) + + Default(inst); return; + + ARMReg vD0 = fpr.R0(inst.FD); + ARMReg vD1 = fpr.R1(inst.FD); + ARMReg vA = fpr.R0(inst.FA); + ARMReg vC = fpr.R0(inst.FC); + + VMUL(vD0, vA, vC); + VMUL(vD1, vA, vC); + if (inst.Rc) Helper_UpdateCR1(vD0); +} +void JitArm::fmulx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(FloatingPoint) + + ARMReg vD0 = fpr.R0(inst.FD); + ARMReg vA = fpr.R0(inst.FA); + ARMReg vC = fpr.R0(inst.FC); + + VMUL(vD0, vA, vC); + if (inst.Rc) Helper_UpdateCR1(vD0); +} void JitArm::fmrx(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp index 0e5249d4ce..d0c4f35fe3 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp @@ -67,6 +67,45 @@ void JitArm::ComputeRC(s32 value, int cr) { STRB(rB, R9, PPCSTATE_OFF(cr_fast) + cr); gpr.Unlock(rB); } + +void JitArm::ComputeCarry() +{ + ARMReg tmp = gpr.GetReg(); + Operand2 mask = Operand2(2, 2); // XER_CA_MASK + LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + SetCC(CC_CS); + ORR(tmp, tmp, mask); + SetCC(CC_CC); + BIC(tmp, tmp, mask); + SetCC(); + STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + gpr.Unlock(tmp); +} + +void JitArm::GetCarryAndClear(ARMReg reg) +{ + ARMReg tmp = gpr.GetReg(); + Operand2 mask = Operand2(2, 2); // XER_CA_MASK + LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + AND(reg, tmp, mask); + BIC(tmp, tmp, mask); + STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + gpr.Unlock(tmp); +} + +void JitArm::FinalizeCarry(ARMReg reg) +{ + ARMReg tmp = gpr.GetReg(); + Operand2 mask = Operand2(2, 2); // XER_CA_MASK + SetCC(CC_CS); + ORR(reg, reg, mask); + SetCC(); + LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + ORR(tmp, tmp, reg); + STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + gpr.Unlock(tmp); +} + void JitArm::addi(UGeckoInstruction inst) { INSTRUCTION_START @@ -129,6 +168,37 @@ void JitArm::addx(UGeckoInstruction inst) ADDS(RD, RA, RB); if (inst.Rc) ComputeRC(); } + +void JitArm::addcx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + u32 a = inst.RA, b = inst.RB, d = inst.RD; + + ARMReg RA = gpr.R(a); + ARMReg RB = gpr.R(b); + ARMReg RD = gpr.R(d); + ADDS(RD, RA, RB); + ComputeCarry(); + if (inst.Rc) ComputeRC(); +} +void JitArm::addex(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + u32 a = inst.RA, b = inst.RB, d = inst.RD; + Default(inst); return; + ARMReg RA = gpr.R(a); + ARMReg RB = gpr.R(b); + ARMReg RD = gpr.R(d); + ARMReg rA = gpr.GetReg(); + GetCarryAndClear(rA); + ADDS(RD, RA, RB); + FinalizeCarry(rA); + if (inst.Rc) ComputeRC(); + gpr.Unlock(rA); +} + void JitArm::subfx(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp new file mode 100644 index 0000000000..39a8320389 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp @@ -0,0 +1,90 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ +#include "Common.h" +#include "Thunk.h" + +#include "../../Core.h" +#include "../PowerPC.h" +#include "../../CoreTiming.h" +#include "../PPCTables.h" +#include "ArmEmitter.h" + +#include "Jit.h" +#include "JitRegCache.h" +#include "JitAsm.h" + +// Wrong, THP videos like SMS and Ikaruga show artifacts +void JitArm::ps_add(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + + Default(inst); return; + + u32 a = inst.FA, b = inst.FB, d = inst.FD; + if (inst.Rc){ + Default(inst); return; + } + ARMReg vA0 = fpr.R0(a); + ARMReg vA1 = fpr.R1(a); + ARMReg vB0 = fpr.R0(b); + ARMReg vB1 = fpr.R1(b); + ARMReg vD0 = fpr.R0(d, false); + ARMReg vD1 = fpr.R1(d, false); + + VADD(vD0, vA0, vB0); + VADD(vD1, vA1, vB1); +} +void JitArm::ps_sub(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + + u32 a = inst.FA, b = inst.FB, d = inst.FD; + if (inst.Rc){ + Default(inst); return; + } + ARMReg vA0 = fpr.R0(a); + ARMReg vA1 = fpr.R1(a); + ARMReg vB0 = fpr.R0(b); + ARMReg vB1 = fpr.R1(b); + ARMReg vD0 = fpr.R0(d, false); + ARMReg vD1 = fpr.R1(d, false); + + VSUB(vD0, vA0, vB0); + VSUB(vD1, vA1, vB1); +} + +void JitArm::ps_mul(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + u32 a = inst.FA, c = inst.FC, d = inst.FD; + if (inst.Rc){ + Default(inst); return; + } + ARMReg vA0 = fpr.R0(a); + ARMReg vA1 = fpr.R1(a); + ARMReg vC0 = fpr.R0(c); + ARMReg vC1 = fpr.R1(c); + ARMReg vD0 = fpr.R0(d, false); + ARMReg vD1 = fpr.R1(d, false); + + VMUL(vD0, vA0, vC0); + VMUL(vD1, vA1, vC1); +} + diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp index 2557b2de9c..7d9ae6f47a 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp @@ -150,11 +150,11 @@ static GekkoOPTemplate table4_2[] = {14, &JitArm::Default}, //"ps_madds0", OPTYPE_PS, 0}}, {15, &JitArm::Default}, //"ps_madds1", OPTYPE_PS, 0}}, {18, &JitArm::Default}, //"ps_div", OPTYPE_PS, 0, 16}}, - {20, &JitArm::Default}, //"ps_sub", OPTYPE_PS, 0}}, - {21, &JitArm::Default}, //"ps_add", OPTYPE_PS, 0}}, + {20, &JitArm::ps_sub}, //"ps_sub", OPTYPE_PS, 0}}, + {21, &JitArm::ps_add}, //"ps_add", OPTYPE_PS, 0}}, {23, &JitArm::Default}, //"ps_sel", OPTYPE_PS, 0}}, {24, &JitArm::Default}, //"ps_res", OPTYPE_PS, 0}}, - {25, &JitArm::Default}, //"ps_mul", OPTYPE_PS, 0}}, + {25, &JitArm::ps_mul}, //"ps_mul", OPTYPE_PS, 0}}, {26, &JitArm::Default}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, {28, &JitArm::Default}, //"ps_msub", OPTYPE_PS, 0}}, {29, &JitArm::Default}, //"ps_madd", OPTYPE_PS, 0}}, @@ -309,8 +309,8 @@ static GekkoOPTemplate table31_2[] = { {266, &JitArm::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {778, &JitArm::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {10, &JitArm::Default}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {138, &JitArm::Default}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {10, &JitArm::addcx}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {138, &JitArm::addex}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {234, &JitArm::Default}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {202, &JitArm::Default}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {491, &JitArm::Default}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, @@ -333,11 +333,11 @@ static GekkoOPTemplate table31_2[] = static GekkoOPTemplate table59[] = { {18, &JitArm::Default}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}}, - {20, &JitArm::Default}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {21, &JitArm::Default}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {20, &JitArm::fsubsx}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {21, &JitArm::faddsx}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, // {22, &JitArm::Default}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}}, // Not implemented on gekko {24, &JitArm::Default}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}}, - {25, &JitArm::Default}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {25, &JitArm::fmulsx}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, {28, &JitArm::Default}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, {29, &JitArm::Default}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, {30, &JitArm::Default}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, @@ -367,11 +367,11 @@ static GekkoOPTemplate table63[] = static GekkoOPTemplate table63_2[] = { {18, &JitArm::Default}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}}, - {20, &JitArm::Default}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {20, &JitArm::fsubx}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, {21, &JitArm::faddx}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, {22, &JitArm::Default}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, {23, &JitArm::Default}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, - {25, &JitArm::Default}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, + {25, &JitArm::fmulx}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, {26, &JitArm::Default}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, {28, &JitArm::Default}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, {29, &JitArm::Default}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},