From 12be9952ab3fe454c79d87d1856dab2594dd6ab7 Mon Sep 17 00:00:00 2001 From: "dok.slade" Date: Fri, 30 Jul 2010 21:28:20 +0000 Subject: [PATCH] Implemented more JIT instructions git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6008 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 2 +- Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 21 + .../Core/Src/PowerPC/Jit64/Jit64_Tables.cpp | 40 +- .../Core/Src/PowerPC/Jit64/Jit_Integer.cpp | 477 ++++++++++++++---- .../Src/PowerPC/Jit64/Jit_SystemRegisters.cpp | 369 +++++++++++++- 5 files changed, 771 insertions(+), 138 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index 47e63113bb..28b01e979f 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -524,7 +524,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc js.compilerPC = ops[i].address; js.op = &ops[i]; js.instructionNumber = i; - const GekkoOPInfo *opinfo = GetOpInfo(ops[i].inst); + const GekkoOPInfo *opinfo = ops[i].opinfo; js.downcountAmount += (opinfo->numCyclesMinusOne + 1); if (i == (int)size - 1) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 4ef4ca8c1b..81d6b9f26e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -169,9 +169,15 @@ public: void DynaRunTable63(UGeckoInstruction _inst); void addx(UGeckoInstruction inst); + void addcx(UGeckoInstruction inst); void orx(UGeckoInstruction inst); + void orcx(UGeckoInstruction inst); + void norx(UGeckoInstruction inst); void xorx(UGeckoInstruction inst); + void eqvx(UGeckoInstruction inst); void andx(UGeckoInstruction inst); + void nandx(UGeckoInstruction inst); + void andcx(UGeckoInstruction inst); void mulli(UGeckoInstruction inst); void mulhwux(UGeckoInstruction inst); void mullwx(UGeckoInstruction inst); @@ -179,6 +185,8 @@ public: void srawix(UGeckoInstruction inst); void srawx(UGeckoInstruction inst); void addex(UGeckoInstruction inst); + void addmex(UGeckoInstruction inst); + void addzex(UGeckoInstruction inst); void extsbx(UGeckoInstruction inst); void extshx(UGeckoInstruction inst); @@ -198,6 +206,17 @@ public: void mftb(UGeckoInstruction inst); void mtcrf(UGeckoInstruction inst); void mfcr(UGeckoInstruction inst); + void mcrf(UGeckoInstruction inst); + void mcrxr(UGeckoInstruction inst); + + void crand(UGeckoInstruction inst); + void crandc(UGeckoInstruction inst); + void creqv(UGeckoInstruction inst); + void crnand(UGeckoInstruction inst); + void crnor(UGeckoInstruction inst); + void cror(UGeckoInstruction inst); + void crorc(UGeckoInstruction inst); + void crxor(UGeckoInstruction inst); void reg_imm(UGeckoInstruction inst); @@ -247,6 +266,8 @@ public: void subfcx(UGeckoInstruction inst); void subfx(UGeckoInstruction inst); void subfex(UGeckoInstruction inst); + void subfmex(UGeckoInstruction inst); + void subfzex(UGeckoInstruction inst); void lbzx(UGeckoInstruction inst); void lwzx(UGeckoInstruction inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp index 66f40e728f..9d7ce9eac1 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp @@ -174,17 +174,17 @@ static GekkoOPTemplate table19[] = { {528, &Jit64::bcctrx}, //"bcctrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, {16, &Jit64::bclrx}, //"bclrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, - {257, &Jit64::Default}, //"crand", OPTYPE_CR, FL_EVIL}}, - {129, &Jit64::Default}, //"crandc", OPTYPE_CR, FL_EVIL}}, - {289, &Jit64::Default}, //"creqv", OPTYPE_CR, FL_EVIL}}, - {225, &Jit64::Default}, //"crnand", OPTYPE_CR, FL_EVIL}}, - {33, &Jit64::Default}, //"crnor", OPTYPE_CR, FL_EVIL}}, - {449, &Jit64::Default}, //"cror", OPTYPE_CR, FL_EVIL}}, - {417, &Jit64::Default}, //"crorc", OPTYPE_CR, FL_EVIL}}, - {193, &Jit64::Default}, //"crxor", OPTYPE_CR, FL_EVIL}}, + {257, &Jit64::crand}, //"crand", OPTYPE_CR, FL_EVIL}}, + {129, &Jit64::crandc}, //"crandc", OPTYPE_CR, FL_EVIL}}, + {289, &Jit64::creqv}, //"creqv", OPTYPE_CR, FL_EVIL}}, + {225, &Jit64::crnand}, //"crnand", OPTYPE_CR, FL_EVIL}}, + {33, &Jit64::crnor}, //"crnor", OPTYPE_CR, FL_EVIL}}, + {449, &Jit64::cror}, //"cror", OPTYPE_CR, FL_EVIL}}, + {417, &Jit64::crorc}, //"crorc", OPTYPE_CR, FL_EVIL}}, + {193, &Jit64::crxor}, //"crxor", OPTYPE_CR, FL_EVIL}}, {150, &Jit64::DoNothing}, //"isync", OPTYPE_ICACHE, FL_EVIL}}, - {0, &Jit64::Default}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}}, + {0, &Jit64::mcrf}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}}, {50, &Jit64::rfi}, //"rfi", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 1}}, {18, &Jit64::Default}, //"rfid", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS}} @@ -194,13 +194,13 @@ static GekkoOPTemplate table19[] = static GekkoOPTemplate table31[] = { {28, &Jit64::andx}, //"andx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {60, &Jit64::Default}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {60, &Jit64::andcx}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {444, &Jit64::orx}, //"orx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {124, &Jit64::Default}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {124, &Jit64::norx}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {316, &Jit64::xorx}, //"xorx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {412, &Jit64::Default}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {476, &Jit64::Default}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {284, &Jit64::Default}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {412, &Jit64::orcx}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {476, &Jit64::nandx}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {284, &Jit64::eqvx}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {0, &Jit64::cmpXX}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, {32, &Jit64::cmpXX}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, {26, &Jit64::cntlzwx}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, @@ -287,7 +287,7 @@ static GekkoOPTemplate table31[] = {339, &Jit64::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, {467, &Jit64::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}}, {371, &Jit64::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, - {512, &Jit64::Default}, //"mcrxr", OPTYPE_SYSTEM, 0}}, + {512, &Jit64::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, 0}}, {595, &Jit64::Default}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, {659, &Jit64::Default}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, @@ -307,10 +307,10 @@ static GekkoOPTemplate table31[] = static GekkoOPTemplate table31_2[] = { {266, &Jit64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {10, &Jit64::Default}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {10, &Jit64::addcx}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {138, &Jit64::addex}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {234, &Jit64::Default}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {202, &Jit64::Default}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {234, &Jit64::addmex}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {202, &Jit64::addzex}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {491, &Jit64::Default}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {459, &Jit64::divwux}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {75, &Jit64::Default}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, @@ -320,8 +320,8 @@ static GekkoOPTemplate table31_2[] = {40, &Jit64::subfx}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {8, &Jit64::subfcx}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {136, &Jit64::subfex}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {232, &Jit64::Default}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {200, &Jit64::Default}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {232, &Jit64::subfmex}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {200, &Jit64::subfzex}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, }; static GekkoOPTemplate table59[] = diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index 9775a954ed..6ba6b018d5 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -28,29 +28,42 @@ void Jit64::GenerateCarry() { // USES_XER FixupBranch pNoCarry = J_CC(CC_NC); - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(1 << 29)); + OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); FixupBranch pContinue = J(); SetJumpTarget(pNoCarry); - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29))); + AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(XER_CA_MASK))); SetJumpTarget(pContinue); } void Jit64::ComputeRC(const Gen::OpArg & arg) { - CMP(32, arg, Imm8(0)); - FixupBranch pLesser = J_CC(CC_L); - FixupBranch pGreater = J_CC(CC_G); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); // _x86Reg == 0 - FixupBranch continue1 = J(); + if( arg.IsImm() ) + { + s32 value = (s32)arg.offset; + if( value < 0 ) + MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); + else if( value > 0 ) + MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); + else + MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); + } + else + { + CMP(32, arg, Imm8(0)); + FixupBranch pLesser = J_CC(CC_L); + FixupBranch pGreater = J_CC(CC_G); + MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); // _x86Reg == 0 + FixupBranch continue1 = J(); - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); // _x86Reg > 0 - FixupBranch continue2 = J(); + SetJumpTarget(pGreater); + MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); // _x86Reg > 0 + FixupBranch continue2 = J(); - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); // _x86Reg < 0 + SetJumpTarget(pLesser); + MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); // _x86Reg < 0 - SetJumpTarget(continue1); - SetJumpTarget(continue2); + SetJumpTarget(continue1); + SetJumpTarget(continue2); + } } u32 Add(u32 a, u32 b) {return a + b;} @@ -139,6 +152,7 @@ void Jit64::reg_imm(UGeckoInstruction inst) // FIXME: Seems to be required on OS X (see r5799) gpr.StoreFromX64(d); #endif + js.downcountAmount++; js.skipnext = true; break; } @@ -148,6 +162,7 @@ void Jit64::reg_imm(UGeckoInstruction inst) // FIXME: Seems to be required on OS X (see r5799) gpr.StoreFromX64(d); #endif + js.downcountAmount++; js.skipnext = true; break; } @@ -365,6 +380,67 @@ void Jit64::orx(UGeckoInstruction inst) } } +void Jit64::orcx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, s = inst.RS, b = inst.RB; + if (a != s && a != b) { + gpr.LoadToX64(a, false, true); + } else { + gpr.LoadToX64(a, true, true); + } + gpr.Lock(a, s, b); + MOV(32, R(EAX), gpr.R(b)); + NOT(32, R(EAX)); + OR(32, R(EAX), gpr.R(s)); + MOV(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); + + if (inst.Rc) { + // result is already in eax + ComputeRC(R(EAX)); + } +} + +void Jit64::norx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int s = inst.RS; + int b = inst.RB; + + if (s == b && s != a) + { + gpr.Lock(a,s); + gpr.LoadToX64(a, false); + MOV(32, gpr.R(a), gpr.R(s)); + NOT(32, gpr.R(a)); + gpr.UnlockAll(); + } + else + { + gpr.Lock(a, s, b); + gpr.LoadToX64(a, (a == s || a == b), true); + if (a == s) + OR(32, gpr.R(a), gpr.R(b)); + else if (a == b) + OR(32, gpr.R(a), gpr.R(s)); + else { + MOV(32, gpr.R(a), gpr.R(b)); + OR(32, gpr.R(a), gpr.R(s)); + } + NOT(32, gpr.R(a)); + gpr.UnlockAll(); + } + + if (inst.Rc) + { + ComputeRC(gpr.R(a)); + } +} + // m_GPR[_inst.RA] = m_GPR[_inst.RS] ^ m_GPR[_inst.RB]; void Jit64::xorx(UGeckoInstruction inst) @@ -394,6 +470,34 @@ void Jit64::xorx(UGeckoInstruction inst) } } +void Jit64::eqvx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int s = inst.RS; + int b = inst.RB; + + if (s == b) { + gpr.SetImmediate32(a, 0); + } + else + { + gpr.LoadToX64(a, a == s || a == b, true); + gpr.Lock(a, s, b); + MOV(32, R(EAX), gpr.R(s)); + XOR(32, R(EAX), gpr.R(b)); + NOT(32, R(EAX)); + MOV(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); + } + + if (inst.Rc) + { + ComputeRC(gpr.R(a)); + } +} + void Jit64::andx(UGeckoInstruction inst) { INSTRUCTION_START @@ -416,6 +520,52 @@ void Jit64::andx(UGeckoInstruction inst) } } +void Jit64::nandx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, s = inst.RS, b = inst.RB; + if (a != s && a != b) { + gpr.LoadToX64(a, false, true); + } else { + gpr.LoadToX64(a, true, true); + } + gpr.Lock(a, s, b); + MOV(32, R(EAX), gpr.R(s)); + AND(32, R(EAX), gpr.R(b)); + NOT(32, R(EAX)); + MOV(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); + + if (inst.Rc) { + // result is already in eax + ComputeRC(R(EAX)); + } +} + +void Jit64::andcx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, s = inst.RS, b = inst.RB; + if (a != s && a != b) { + gpr.LoadToX64(a, false, true); + } else { + gpr.LoadToX64(a, true, true); + } + gpr.Lock(a, s, b); + MOV(32, R(EAX), gpr.R(b)); + NOT(32, R(EAX)); + AND(32, R(EAX), gpr.R(s)); + MOV(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); + + if (inst.Rc) { + // result is already in eax + ComputeRC(R(EAX)); + } +} + void Jit64::extsbx(UGeckoInstruction inst) { INSTRUCTION_START @@ -542,6 +692,80 @@ void Jit64::subfex(UGeckoInstruction inst) } } +void Jit64::subfmex(UGeckoInstruction inst) +{ + // USES_XER + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, d = inst.RD; + + if (d == a) + { + gpr.Lock(a, d); + gpr.LoadToX64(d, true); + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + NOT(32, gpr.R(d)); + ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); + GenerateCarry(); + gpr.UnlockAll(); + } + else + { + gpr.Lock(a, d); + gpr.LoadToX64(d, false); + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + MOV(32, gpr.R(d), gpr.R(a)); + NOT(32, gpr.R(d)); + ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); + GenerateCarry(); + gpr.UnlockAll(); + } + + if (inst.Rc) + { + ComputeRC(gpr.R(d)); + } +} + +void Jit64::subfzex(UGeckoInstruction inst) +{ + // USES_XER + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, d = inst.RD; + + if (d == a) + { + gpr.Lock(a, d); + gpr.LoadToX64(d, true); + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + NOT(32, gpr.R(d)); + ADC(32, gpr.R(d), Imm8(0)); + GenerateCarry(); + gpr.UnlockAll(); + } + else + { + gpr.Lock(a, d); + gpr.LoadToX64(d, false); + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + MOV(32, gpr.R(d), gpr.R(a)); + NOT(32, gpr.R(d)); + ADC(32, gpr.R(d), Imm8(0)); + GenerateCarry(); + gpr.UnlockAll(); + } + + if (inst.Rc) + { + ComputeRC(gpr.R(d)); + } +} + void Jit64::subfx(UGeckoInstruction inst) { INSTRUCTION_START @@ -655,103 +879,178 @@ void Jit64::divwux(UGeckoInstruction inst) void Jit64::addx(UGeckoInstruction inst) { - INSTRUCTION_START - JITDISABLE(Integer) - int a = inst.RA, b = inst.RB, d = inst.RD; - _assert_msg_(DYNA_REC, !inst.OE, "Add - OE enabled :("); - - if (a != d && b != d && a != b) + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, b = inst.RB, d = inst.RD; + _assert_msg_(DYNA_REC, !inst.OE, "Add - OE enabled :("); + + if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { gpr.Lock(a, b, d); gpr.LoadToX64(d, false); - if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { - LEA(32, gpr.RX(d), MComplex(gpr.RX(a), gpr.RX(b), 1, 0)); - } else { - MOV(32, gpr.R(d), gpr.R(a)); - ADD(32, gpr.R(d), gpr.R(b)); - } - if (inst.Rc) - { - ComputeRC(gpr.R(d)); - } + LEA(32, gpr.RX(d), MComplex(gpr.RX(a), gpr.RX(b), 1, 0)); gpr.UnlockAll(); } - else if (d == a && d != b) + else if ((d == a) || (d == b)) { - gpr.Lock(b, d); + int operand = ((d == a) ? b : a); + gpr.Lock(a, b, d); gpr.LoadToX64(d, true); - ADD(32, gpr.R(d), gpr.R(b)); - if (inst.Rc) - { - ComputeRC(gpr.R(d)); - } - gpr.UnlockAll(); - } - else if (d == b && d != a) - { - gpr.Lock(a, d); - gpr.LoadToX64(d, true); - ADD(32, gpr.R(d), gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(d)); - } - gpr.UnlockAll(); - } - else if( a == b && b == d && a == d) - { - gpr.Lock(d); - gpr.LoadToX64(d, true); - ADD(32, gpr.R(d), gpr.R(d)); - if (inst.Rc) - { - ComputeRC(gpr.R(d)); - } - gpr.UnlockAll(); - } - else if( a == b && b != d) - { - gpr.Lock(a, d); - gpr.LoadToX64(d, false); - MOV(32, gpr.R(d), gpr.R(a)); - ADD(32, gpr.R(d), gpr.R(d)); - if (inst.Rc) - { - ComputeRC(gpr.R(d)); - } + ADD(32, gpr.R(d), gpr.R(operand)); gpr.UnlockAll(); } else { - Default(inst); return; + gpr.Lock(a, b, d); + gpr.LoadToX64(d, false); + MOV(32, gpr.R(d), gpr.R(a)); + ADD(32, gpr.R(d), gpr.R(b)); + gpr.UnlockAll(); + } + + if (inst.Rc) + { + ComputeRC(gpr.R(d)); } } -// This can be optimized void Jit64::addex(UGeckoInstruction inst) { - // USES_XER - INSTRUCTION_START - JITDISABLE(Integer) - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.Lock(a, b, d); - if (d != a && d != b) - gpr.LoadToX64(d, false); - else + // USES_XER + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, b = inst.RB, d = inst.RD; + + if ((d == a) || (d == b)) + { + gpr.Lock(a, b, d); gpr.LoadToX64(d, true); - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag - MOV(32, R(EAX), gpr.R(a)); - ADC(32, R(EAX), gpr.R(b)); - MOV(32, gpr.R(d), R(EAX)); - GenerateCarry(); - gpr.UnlockAll(); + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + ADC(32, gpr.R(d), gpr.R((d == a) ? b : a)); + GenerateCarry(); + gpr.UnlockAll(); + } + else + { + gpr.Lock(a, b, d); + gpr.LoadToX64(d, false); + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + MOV(32, gpr.R(d), gpr.R(a)); + ADC(32, gpr.R(d), gpr.R(b)); + GenerateCarry(); + gpr.UnlockAll(); + } + + if (inst.Rc) + { + ComputeRC(gpr.R(d)); + } +} + +void Jit64::addcx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, b = inst.RB, d = inst.RD; + _assert_msg_(DYNA_REC, !inst.OE, "Add - OE enabled :("); + + if ((d == a) || (d == b)) + { + int operand = ((d == a) ? b : a); + gpr.Lock(a, b, d); + gpr.LoadToX64(d, true); + ADD(32, gpr.R(d), gpr.R(operand)); + GenerateCarry(); + gpr.UnlockAll(); + } + else + { + gpr.Lock(a, b, d); + gpr.LoadToX64(d, false); + MOV(32, gpr.R(d), gpr.R(a)); + ADD(32, gpr.R(d), gpr.R(b)); + GenerateCarry(); + gpr.UnlockAll(); + } + if (inst.Rc) { - ComputeRC(R(EAX)); + ComputeRC(gpr.R(d)); } } +void Jit64::addmex(UGeckoInstruction inst) +{ + // USES_XER + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, d = inst.RD; + + if (d == a) + { + gpr.Lock(a, d); + gpr.LoadToX64(d, true); + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); + GenerateCarry(); + gpr.UnlockAll(); + } + else + { + gpr.Lock(a, d); + gpr.LoadToX64(d, false); + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + MOV(32, gpr.R(d), gpr.R(a)); + ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); + GenerateCarry(); + gpr.UnlockAll(); + } + + if (inst.Rc) + { + ComputeRC(gpr.R(d)); + } +} + +void Jit64::addzex(UGeckoInstruction inst) +{ + // USES_XER + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, d = inst.RD; + + if (d == a) + { + gpr.Lock(a, d); + gpr.LoadToX64(d, true); + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + ADC(32, gpr.R(d), Imm8(0)); + GenerateCarry(); + gpr.UnlockAll(); + } + else + { + gpr.Lock(a, d); + gpr.LoadToX64(d, false); + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + MOV(32, gpr.R(d), gpr.R(a)); + ADC(32, gpr.R(d), Imm8(0)); + GenerateCarry(); + gpr.UnlockAll(); + } + + if (inst.Rc) + { + ComputeRC(gpr.R(d)); + } +} + void Jit64::rlwinmx(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp index 40990a7f52..5d3eebbd36 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -146,12 +146,10 @@ void Jit64::mfcr(UGeckoInstruction inst) int d = inst.RD; gpr.LoadToX64(d, false, true); MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); - SHL(32, R(EAX), Imm8(4)); - for (int i = 1; i < 7; i++) { - OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i])); + for (int i = 1; i < 8; i++) { SHL(32, R(EAX), Imm8(4)); + OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i])); } - OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7])); MOV(32, gpr.R(d), R(EAX)); } @@ -161,32 +159,347 @@ void Jit64::mtcrf(UGeckoInstruction inst) JITDISABLE(SystemRegisters) // USES_CR - u32 mask = 0; u32 crm = inst.CRM; - if (crm == 0xFF) { - gpr.FlushLockX(ECX); - MOV(32, R(EAX), gpr.R(inst.RS)); - for (int i = 0; i < 8; i++) { - MOV(32, R(ECX), R(EAX)); - SHR(32, R(ECX), Imm8(28 - (i * 4))); - AND(32, R(ECX), Imm32(0xF)); - MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX)); + if (crm != 0) + { + if (gpr.R(inst.RS).IsImm()) + { + for (int i = 0; i < 8; i++) + { + if ((crm & (0x80 >> i)) != 0) + { + u8 newcr = (gpr.R(inst.RS).offset >> (28 - (i * 4))) & 0xF; + MOV(8, M(&PowerPC::ppcState.cr_fast[i]), Imm8(newcr)); + } + } } - gpr.UnlockAllX(); - } else { - Default(inst); - return; - - // TODO: translate this to work in new CR model. - for (int i = 0; i < 8; i++) { - if (crm & (1 << i)) - mask |= 0xF << (i*4); + else + { + gpr.LoadToX64(inst.RS, true); + for (int i = 0; i < 8; i++) + { + if ((crm & (0x80 >> i)) != 0) + { + MOV(32, R(EAX), gpr.R(inst.RS)); + SHR(32, R(EAX), Imm8(28 - (i * 4))); + AND(32, R(EAX), Imm32(0xF)); + MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(EAX)); + } + } } - MOV(32, R(EAX), gpr.R(inst.RS)); - MOV(32, R(ECX), M(&PowerPC::ppcState.cr)); - AND(32, R(EAX), Imm32(mask)); - AND(32, R(ECX), Imm32(~mask)); - OR(32, R(EAX), R(ECX)); - MOV(32, M(&PowerPC::ppcState.cr), R(EAX)); } } + +void Jit64::mcrf(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + + // USES_CR + if (inst.CRFS != inst.CRFD) + { + MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRFS])); + MOV(8, M(&PowerPC::ppcState.cr_fast[inst.CRFD]), R(EAX)); + } +} + +void Jit64::mcrxr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + + // USES_CR + + // Copy XER[0-3] into CR[inst.CRFD] + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(32, R(EAX), Imm8(28)); + MOV(8, M(&PowerPC::ppcState.cr_fast[inst.CRFD]), R(EAX)); + + // Clear XER[0-3] + AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(0x0FFFFFFF)); +} + +void Jit64::crand(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + + // USES_CR + + // Get bit CRBA in EAX aligned with bit CRBD + int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); + MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRBA >> 2])); + if (shiftA < 0) + SHL(8, R(EAX), Imm8(-shiftA)); + else if (shiftA > 0) + SHR(8, R(EAX), Imm8(shiftA)); + + // Get bit CRBB in ECX aligned with bit CRBD + gpr.FlushLockX(ECX); + int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); + MOV(8, R(ECX), M(&PowerPC::ppcState.cr_fast[inst.CRBB >> 2])); + if (shiftB < 0) + SHL(8, R(ECX), Imm8(-shiftB)); + else if (shiftB > 0) + SHR(8, R(ECX), Imm8(shiftB)); + + // Compute combined bit + AND(8, R(EAX), R(ECX)); + + // Store result bit in CRBD + AND(8, R(EAX), Imm8(0x8 >> (inst.CRBD & 3))); + AND(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), Imm8(~(0x8 >> (inst.CRBD & 3)))); + OR(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), R(EAX)); + + gpr.UnlockAllX(); +} + +void Jit64::crandc(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + + // USES_CR + + // Get bit CRBA in EAX aligned with bit CRBD + int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); + MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRBA >> 2])); + if (shiftA < 0) + SHL(8, R(EAX), Imm8(-shiftA)); + else if (shiftA > 0) + SHR(8, R(EAX), Imm8(shiftA)); + + // Get bit CRBB in ECX aligned with bit CRBD + gpr.FlushLockX(ECX); + int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); + MOV(8, R(ECX), M(&PowerPC::ppcState.cr_fast[inst.CRBB >> 2])); + if (shiftB < 0) + SHL(8, R(ECX), Imm8(-shiftB)); + else if (shiftB > 0) + SHR(8, R(ECX), Imm8(shiftB)); + + // Compute combined bit + NOT(8, R(ECX)); + AND(8, R(EAX), R(ECX)); + + // Store result bit in CRBD + AND(8, R(EAX), Imm8(0x8 >> (inst.CRBD & 3))); + AND(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), Imm8(~(0x8 >> (inst.CRBD & 3)))); + OR(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), R(EAX)); + + gpr.UnlockAllX(); +} + +void Jit64::creqv(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + + // USES_CR + + // Get bit CRBA in EAX aligned with bit CRBD + int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); + MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRBA >> 2])); + if (shiftA < 0) + SHL(8, R(EAX), Imm8(-shiftA)); + else if (shiftA > 0) + SHR(8, R(EAX), Imm8(shiftA)); + + // Get bit CRBB in ECX aligned with bit CRBD + gpr.FlushLockX(ECX); + int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); + MOV(8, R(ECX), M(&PowerPC::ppcState.cr_fast[inst.CRBB >> 2])); + if (shiftB < 0) + SHL(8, R(ECX), Imm8(-shiftB)); + else if (shiftB > 0) + SHR(8, R(ECX), Imm8(shiftB)); + + // Compute combined bit + XOR(8, R(EAX), R(ECX)); + NOT(8, R(EAX)); + + // Store result bit in CRBD + AND(8, R(EAX), Imm8(0x8 >> (inst.CRBD & 3))); + AND(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), Imm8(~(0x8 >> (inst.CRBD & 3)))); + OR(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), R(EAX)); + + gpr.UnlockAllX(); +} + +void Jit64::crnand(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + + // USES_CR + + // Get bit CRBA in EAX aligned with bit CRBD + int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); + MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRBA >> 2])); + if (shiftA < 0) + SHL(8, R(EAX), Imm8(-shiftA)); + else if (shiftA > 0) + SHR(8, R(EAX), Imm8(shiftA)); + + // Get bit CRBB in ECX aligned with bit CRBD + gpr.FlushLockX(ECX); + int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); + MOV(8, R(ECX), M(&PowerPC::ppcState.cr_fast[inst.CRBB >> 2])); + if (shiftB < 0) + SHL(8, R(ECX), Imm8(-shiftB)); + else if (shiftB > 0) + SHR(8, R(ECX), Imm8(shiftB)); + + // Compute combined bit + AND(8, R(EAX), R(ECX)); + NOT(8, R(EAX)); + + // Store result bit in CRBD + AND(8, R(EAX), Imm8(0x8 >> (inst.CRBD & 3))); + AND(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), Imm8(~(0x8 >> (inst.CRBD & 3)))); + OR(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), R(EAX)); + + gpr.UnlockAllX(); +} + +void Jit64::crnor(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + + // USES_CR + + // Get bit CRBA in EAX aligned with bit CRBD + int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); + MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRBA >> 2])); + if (shiftA < 0) + SHL(8, R(EAX), Imm8(-shiftA)); + else if (shiftA > 0) + SHR(8, R(EAX), Imm8(shiftA)); + + // Get bit CRBB in ECX aligned with bit CRBD + gpr.FlushLockX(ECX); + int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); + MOV(8, R(ECX), M(&PowerPC::ppcState.cr_fast[inst.CRBB >> 2])); + if (shiftB < 0) + SHL(8, R(ECX), Imm8(-shiftB)); + else if (shiftB > 0) + SHR(8, R(ECX), Imm8(shiftB)); + + // Compute combined bit + OR(8, R(EAX), R(ECX)); + NOT(8, R(EAX)); + + // Store result bit in CRBD + AND(8, R(EAX), Imm8(0x8 >> (inst.CRBD & 3))); + AND(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), Imm8(~(0x8 >> (inst.CRBD & 3)))); + OR(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), R(EAX)); + + gpr.UnlockAllX(); +} + +void Jit64::cror(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + + // USES_CR + + // Get bit CRBA in EAX aligned with bit CRBD + int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); + MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRBA >> 2])); + if (shiftA < 0) + SHL(8, R(EAX), Imm8(-shiftA)); + else if (shiftA > 0) + SHR(8, R(EAX), Imm8(shiftA)); + + // Get bit CRBB in ECX aligned with bit CRBD + gpr.FlushLockX(ECX); + int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); + MOV(8, R(ECX), M(&PowerPC::ppcState.cr_fast[inst.CRBB >> 2])); + if (shiftB < 0) + SHL(8, R(ECX), Imm8(-shiftB)); + else if (shiftB > 0) + SHR(8, R(ECX), Imm8(shiftB)); + + // Compute combined bit + OR(8, R(EAX), R(ECX)); + + // Store result bit in CRBD + AND(8, R(EAX), Imm8(0x8 >> (inst.CRBD & 3))); + AND(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), Imm8(~(0x8 >> (inst.CRBD & 3)))); + OR(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), R(EAX)); + + gpr.UnlockAllX(); +} + +void Jit64::crorc(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + + // USES_CR + + // Get bit CRBA in EAX aligned with bit CRBD + int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); + MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRBA >> 2])); + if (shiftA < 0) + SHL(8, R(EAX), Imm8(-shiftA)); + else if (shiftA > 0) + SHR(8, R(EAX), Imm8(shiftA)); + + // Get bit CRBB in ECX aligned with bit CRBD + gpr.FlushLockX(ECX); + int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); + MOV(8, R(ECX), M(&PowerPC::ppcState.cr_fast[inst.CRBB >> 2])); + if (shiftB < 0) + SHL(8, R(ECX), Imm8(-shiftB)); + else if (shiftB > 0) + SHR(8, R(ECX), Imm8(shiftB)); + + // Compute combined bit + NOT(8, R(ECX)); + OR(8, R(EAX), R(ECX)); + + // Store result bit in CRBD + AND(8, R(EAX), Imm8(0x8 >> (inst.CRBD & 3))); + AND(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), Imm8(~(0x8 >> (inst.CRBD & 3)))); + OR(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), R(EAX)); + + gpr.UnlockAllX(); +} + +void Jit64::crxor(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + + // USES_CR + + // Get bit CRBA in EAX aligned with bit CRBD + int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); + MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRBA >> 2])); + if (shiftA < 0) + SHL(8, R(EAX), Imm8(-shiftA)); + else if (shiftA > 0) + SHR(8, R(EAX), Imm8(shiftA)); + + // Get bit CRBB in ECX aligned with bit CRBD + gpr.FlushLockX(ECX); + int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); + MOV(8, R(ECX), M(&PowerPC::ppcState.cr_fast[inst.CRBB >> 2])); + if (shiftB < 0) + SHL(8, R(ECX), Imm8(-shiftB)); + else if (shiftB > 0) + SHR(8, R(ECX), Imm8(shiftB)); + + // Compute combined bit + XOR(8, R(EAX), R(ECX)); + + // Store result bit in CRBD + AND(8, R(EAX), Imm8(0x8 >> (inst.CRBD & 3))); + AND(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), Imm8(~(0x8 >> (inst.CRBD & 3)))); + OR(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), R(EAX)); + + gpr.UnlockAllX(); +}