From 1faff6aeb71ff1eb36b29a97c2f35ce5be4ee42a Mon Sep 17 00:00:00 2001 From: "dok.slade" Date: Fri, 6 Aug 2010 19:35:40 +0000 Subject: [PATCH] JIT compiler: * Improved constant folding/propagation in integer instructions * Merged boolean instructions git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6063 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 9 +- .../Core/Src/PowerPC/Jit64/Jit64_Tables.cpp | 16 +- .../Core/Src/PowerPC/Jit64/Jit_Integer.cpp | 1128 ++++++++++------- 3 files changed, 673 insertions(+), 480 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 74ba93f873..dc9a8aec0f 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -170,14 +170,6 @@ public: void addx(UGeckoInstruction inst); void addcx(UGeckoInstruction inst); - void orx(UGeckoInstruction inst); - void orcx(UGeckoInstruction inst); - void norx(UGeckoInstruction inst); - void xorx(UGeckoInstruction inst); - void eqvx(UGeckoInstruction inst); - void andx(UGeckoInstruction inst); - void nandx(UGeckoInstruction inst); - void andcx(UGeckoInstruction inst); void mulli(UGeckoInstruction inst); void mulhwux(UGeckoInstruction inst); void mullwx(UGeckoInstruction inst); @@ -209,6 +201,7 @@ public: void mcrf(UGeckoInstruction inst); void mcrxr(UGeckoInstruction inst); + void boolX(UGeckoInstruction inst); void crXXX(UGeckoInstruction inst); void reg_imm(UGeckoInstruction inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp index 71fbd7e200..ef50a62057 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp @@ -193,14 +193,14 @@ static GekkoOPTemplate table19[] = static GekkoOPTemplate table31[] = { - {28, &Jit64::andx}, //"andx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {60, &Jit64::andcx}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {444, &Jit64::orx}, //"orx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {124, &Jit64::norx}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {316, &Jit64::xorx}, //"xorx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {412, &Jit64::orcx}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {476, &Jit64::nandx}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {284, &Jit64::eqvx}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {28, &Jit64::boolX}, //"andx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {60, &Jit64::boolX}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {444, &Jit64::boolX}, //"orx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {124, &Jit64::boolX}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {316, &Jit64::boolX}, //"xorx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {412, &Jit64::boolX}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {476, &Jit64::boolX}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {284, &Jit64::boolX}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {0, &Jit64::cmpXX}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, {32, &Jit64::cmpXX}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, {26, &Jit64::cntlzwx}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index 2ae6e77705..81f7d388f5 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -76,19 +76,16 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void gpr.Lock(d, a); if (a || binary || carry) // yeh nasty special case addic { - if (a == d) + if (gpr.R(a).IsImm() && !carry) { - if (gpr.R(d).IsImm() && !carry) - { - gpr.SetImmediate32(d, doop((u32)gpr.R(d).offset, value)); - } - else - { - gpr.KillImmediate(d, true, true); - (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; - if (carry) - GenerateCarry(); - } + gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value)); + } + else if (a == d) + { + gpr.KillImmediate(d, true, true); + (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; + if (carry) + GenerateCarry(); } else { @@ -115,7 +112,6 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void } if (Rc) { - // Todo - special case immediates. ComputeRC(gpr.R(d)); } gpr.UnlockAll(); @@ -184,8 +180,8 @@ void Jit64::reg_imm(UGeckoInstruction inst) case 29: regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true); break; case 26: regimmop(a, s, true, inst.UIMM, Xor, &XEmitter::XOR, false); break; //xori case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false); break; //xoris - case 12: //regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, XEmitter::ADD, false, true); //addic - case 13: //regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, XEmitter::ADD, true, true); //addic_rc + case 12: regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, false, true); break; //addic + case 13: regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, true, true); break; //addic_rc default: Default(inst); break; @@ -216,356 +212,438 @@ void Jit64::cmpXX(UGeckoInstruction inst) } } - Gen::CCFlags less_than, greater_than; OpArg comparand; + bool signedCompare; if (inst.OPCD == 31) { + // cmp / cmpl gpr.Lock(a, b); - gpr.BindToRegister(a, true, false); comparand = gpr.R(b); - if (inst.SUBOP10 == 32) { - //cmpl - less_than = CC_B; - greater_than = CC_A; - } else { - //cmp - less_than = CC_L; - greater_than = CC_G; - } + signedCompare = (inst.SUBOP10 == 0); } else { gpr.Lock(a); - gpr.KillImmediate(a, true, false); // todo, optimize instead, but unlikely to make a difference if (inst.OPCD == 10) { //cmpli - less_than = CC_B; - greater_than = CC_A; - comparand = Imm32(inst.UIMM); + comparand = Imm32((u32)inst.UIMM); + signedCompare = false; } else if (inst.OPCD == 11) { //cmpi - less_than = CC_L; - greater_than = CC_G; - comparand = Imm32((s32)(s16)inst.UIMM); + comparand = Imm32((u32)(s32)(s16)inst.UIMM); + signedCompare = true; } else { PanicAlert("cmpXX"); - less_than = CC_O; - greater_than = CC_O; - comparand = Imm32(0); } } - if (!merge_branch) + if (gpr.R(a).IsImm() && comparand.IsImm()) { - // Keep the normal code separate for clarity. - CMP(32, gpr.R(a), comparand); - - FixupBranch pLesser = J_CC(less_than); - FixupBranch pGreater = J_CC(greater_than); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0 - FixupBranch continue1 = J(); - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0 - FixupBranch continue2 = J(); - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0 - SetJumpTarget(continue1); - SetJumpTarget(continue2); - // TODO: If we ever care about SO, borrow a trick from - // http://maws.mameworld.info/maws/mamesrc/src/emu/cpu/powerpc/drc_ops.c : bt, adc - } else { - js.downcountAmount++; - int test_bit = 8 >> (js.next_inst.BI & 3); - bool condition = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? false : true; - CMP(32, gpr.R(a), comparand); - gpr.UnlockAll(); - - // Test swapping (in the future, will be used to inline across branches the right way) - // if (rand() & 1) - // std::swap(destination1, destination2), condition = !condition; - - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - FixupBranch pLesser = J_CC(less_than); - FixupBranch pGreater = J_CC(greater_than); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // == 0 - FixupBranch continue1 = J(); - - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // > 0 - FixupBranch continue2 = J(); - - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // < 0 - FixupBranch continue3; - if (!!(8 & test_bit) == condition) continue3 = J(); - if (!!(4 & test_bit) != condition) SetJumpTarget(continue2); - if (!!(2 & test_bit) != condition) SetJumpTarget(continue1); - if (js.next_inst.OPCD == 16) // bcx + // Both registers contain immediate values, so we can pre-compile the compare result + u8 compareResult; + if (signedCompare) { - if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); - - u32 destination; - if (js.next_inst.AA) - destination = SignExt16(js.next_inst.BD << 2); + if ((s32)gpr.R(a).offset == (s32)comparand.offset) + compareResult = 0x2; + else if ((s32)gpr.R(a).offset > (s32)comparand.offset) + compareResult = 0x4; else - destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); - WriteExit(destination, 0); - } - else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx - { - if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); - MOV(32, R(EAX), M(&CTR)); - AND(32, R(EAX), Imm32(0xFFFFFFFC)); - WriteExitDestInEAX(0); - } - else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx - { - MOV(32, R(EAX), M(&LR)); - AND(32, R(EAX), Imm32(0xFFFFFFFC)); - if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); - WriteExitDestInEAX(0); + compareResult = 0x8; } else { - PanicAlert("WTF invalid branch"); + if ((u32)gpr.R(a).offset == (u32)comparand.offset) + compareResult = 0x2; + else if ((u32)gpr.R(a).offset > (u32)comparand.offset) + compareResult = 0x4; + else + compareResult = 0x8; + } + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(compareResult)); + gpr.UnlockAll(); + + if (merge_branch) + { + js.downcountAmount++; + + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + + int test_bit = 8 >> (js.next_inst.BI & 3); + u8 conditionResult = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? test_bit : 0; + if ((compareResult & test_bit) == conditionResult) + { + if (js.next_inst.OPCD == 16) // bcx + { + if (js.next_inst.LK) + MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + + u32 destination; + if (js.next_inst.AA) + destination = SignExt16(js.next_inst.BD << 2); + else + destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); + WriteExit(destination, 0); + } + else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx + { + if (js.next_inst.LK) + MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, R(EAX), M(&CTR)); + AND(32, R(EAX), Imm32(0xFFFFFFFC)); + WriteExitDestInEAX(0); + } + else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx + { + MOV(32, R(EAX), M(&LR)); + if (js.next_inst.LK) + MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + WriteExitDestInEAX(0); + } + else + { + PanicAlert("WTF invalid branch"); + } + } + else + { + WriteExit(js.next_compilerPC + 4, 0); + } + + js.cancel = true; + } + } + else + { + Gen::CCFlags less_than, greater_than; + if (signedCompare) + { + less_than = CC_L; + greater_than = CC_G; + } + else + { + less_than = CC_B; + greater_than = CC_A; } - if (!!(8 & test_bit) == condition) SetJumpTarget(continue3); - if (!!(4 & test_bit) == condition) SetJumpTarget(continue2); - if (!!(2 & test_bit) == condition) SetJumpTarget(continue1); + if (gpr.R(a).IsImm() || (!gpr.R(a).IsSimpleReg() && !comparand.IsImm() && !comparand.IsSimpleReg())) + { + // Syntax for CMP is invalid with such arguments. We must load RA in a register. + gpr.BindToRegister(a, true, false); + } + CMP(32, gpr.R(a), comparand); + gpr.UnlockAll(); - WriteExit(js.next_compilerPC + 4, 1); + if (!merge_branch) + { + // Keep the normal code separate for clarity. + + FixupBranch pLesser = J_CC(less_than); + FixupBranch pGreater = J_CC(greater_than); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0 + FixupBranch continue1 = J(); + SetJumpTarget(pGreater); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0 + FixupBranch continue2 = J(); + SetJumpTarget(pLesser); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0 + SetJumpTarget(continue1); + SetJumpTarget(continue2); + // TODO: If we ever care about SO, borrow a trick from + // http://maws.mameworld.info/maws/mamesrc/src/emu/cpu/powerpc/drc_ops.c : bt, adc + } else { + js.downcountAmount++; + int test_bit = 8 >> (js.next_inst.BI & 3); + bool condition = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? false : true; + + // Test swapping (in the future, will be used to inline across branches the right way) + // if (rand() & 1) + // std::swap(destination1, destination2), condition = !condition; - js.cancel = true; + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + FixupBranch pLesser = J_CC(less_than); + FixupBranch pGreater = J_CC(greater_than); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // == 0 + FixupBranch continue1 = J(); + + SetJumpTarget(pGreater); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // > 0 + FixupBranch continue2 = J(); + + SetJumpTarget(pLesser); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // < 0 + FixupBranch continue3; + if (!!(8 & test_bit) == condition) continue3 = J(); + if (!!(4 & test_bit) != condition) SetJumpTarget(continue2); + if (!!(2 & test_bit) != condition) SetJumpTarget(continue1); + if (js.next_inst.OPCD == 16) // bcx + { + if (js.next_inst.LK) + MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + + u32 destination; + if (js.next_inst.AA) + destination = SignExt16(js.next_inst.BD << 2); + else + destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); + WriteExit(destination, 0); + } + else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx + { + if (js.next_inst.LK) + MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, R(EAX), M(&CTR)); + AND(32, R(EAX), Imm32(0xFFFFFFFC)); + WriteExitDestInEAX(0); + } + else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx + { + MOV(32, R(EAX), M(&LR)); + AND(32, R(EAX), Imm32(0xFFFFFFFC)); + if (js.next_inst.LK) + MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + WriteExitDestInEAX(0); + } + else + { + PanicAlert("WTF invalid branch"); + } + + if (!!(8 & test_bit) == condition) SetJumpTarget(continue3); + if (!!(4 & test_bit) == condition) SetJumpTarget(continue2); + if (!!(2 & test_bit) == condition) SetJumpTarget(continue1); + + WriteExit(js.next_compilerPC + 4, 1); + + js.cancel = true; + } } gpr.UnlockAll(); } -void Jit64::orx(UGeckoInstruction inst) +void Jit64::boolX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(Integer) - int a = inst.RA; - int s = inst.RS; - int b = inst.RB; - - if (s == b && s != a) + int a = inst.RA, s = inst.RS, b = inst.RB; + _dbg_assert_msg_(DYNA_REC, inst.OPCD == 31, "Invalid boolX"); + + if (gpr.R(s).IsImm() && gpr.R(b).IsImm()) { - gpr.Lock(a,s); - gpr.BindToRegister(a, false); - MOV(32, gpr.R(a), gpr.R(s)); + if (inst.SUBOP10 == 28) /* andx */ + gpr.SetImmediate32(a, (u32)gpr.R(s).offset & (u32)gpr.R(b).offset); + else if (inst.SUBOP10 == 476) /* nandx */ + gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset & (u32)gpr.R(b).offset)); + else if (inst.SUBOP10 == 60) /* andcx */ + gpr.SetImmediate32(a, (u32)gpr.R(s).offset & (~(u32)gpr.R(b).offset)); + else if (inst.SUBOP10 == 444) /* orx */ + gpr.SetImmediate32(a, (u32)gpr.R(s).offset | (u32)gpr.R(b).offset); + else if (inst.SUBOP10 == 124) /* norx */ + gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset | (u32)gpr.R(b).offset)); + else if (inst.SUBOP10 == 412) /* orcx */ + gpr.SetImmediate32(a, (u32)gpr.R(s).offset | (~(u32)gpr.R(b).offset)); + else if (inst.SUBOP10 == 316) /* xorx */ + gpr.SetImmediate32(a, (u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset); + else if (inst.SUBOP10 == 284) /* eqvx */ + gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset)); + } + else if (s == b) + { + if ((inst.SUBOP10 == 28 /* andx */) || (inst.SUBOP10 == 444 /* orx */)) + { + if (a != s) + { + gpr.Lock(a,s); + gpr.BindToRegister(a, false, true); + MOV(32, gpr.R(a), gpr.R(s)); + gpr.UnlockAll(); + } + } + else if ((inst.SUBOP10 == 476 /* nandx */) || (inst.SUBOP10 == 124 /* norx */)) + { + if (a != s) + { + gpr.Lock(a,s); + gpr.BindToRegister(a, false, true); + MOV(32, gpr.R(a), gpr.R(s)); + } + else + { + gpr.KillImmediate(a, true, true); + } + NOT(32, gpr.R(a)); + gpr.UnlockAll(); + } + else if ((inst.SUBOP10 == 412 /* orcx */) || (inst.SUBOP10 == 284 /* eqvx */)) + { + gpr.SetImmediate32(a, 0xFFFFFFFF); + } + else if ((inst.SUBOP10 == 60 /* andcx */) || (inst.SUBOP10 == 316 /* xorx */)) + { + gpr.SetImmediate32(a, 0); + } + else + { + PanicAlert("WTF!"); + } + } + else if ((a == s) || (a == b)) + { + gpr.Lock(a,((a == s) ? b : s)); + OpArg operand = ((a == s) ? gpr.R(b) : gpr.R(s)); + gpr.BindToRegister(a, true, true); + + if (inst.SUBOP10 == 28) /* andx */ + { + AND(32, gpr.R(a), operand); + } + else if (inst.SUBOP10 == 476) /* nandx */ + { + AND(32, gpr.R(a), operand); + NOT(32, gpr.R(a)); + } + else if (inst.SUBOP10 == 60) /* andcx */ + { + if (a == b) + { + NOT(32, gpr.R(a)); + AND(32, gpr.R(a), operand); + } + else + { + MOV(32, R(EAX), operand); + NOT(32, R(EAX)); + AND(32, gpr.R(a), R(EAX)); + } + } + else if (inst.SUBOP10 == 444) /* orx */ + { + OR(32, gpr.R(a), operand); + } + else if (inst.SUBOP10 == 124) /* norx */ + { + OR(32, gpr.R(a), operand); + NOT(32, gpr.R(a)); + } + else if (inst.SUBOP10 == 412) /* orcx */ + { + if (a == b) + { + NOT(32, gpr.R(a)); + OR(32, gpr.R(a), operand); + } + else + { + MOV(32, R(EAX), operand); + NOT(32, R(EAX)); + OR(32, gpr.R(a), R(EAX)); + } + } + else if (inst.SUBOP10 == 316) /* xorx */ + { + XOR(32, gpr.R(a), operand); + } + else if (inst.SUBOP10 == 284) /* eqvx */ + { + XOR(32, gpr.R(a), operand); + NOT(32, gpr.R(a)); + } + else + { + PanicAlert("WTF"); + } gpr.UnlockAll(); } else { - gpr.Lock(a, s, b); - gpr.BindToRegister(a, (a == s || a == b), true); - if (a == s) - OR(32, gpr.R(a), gpr.R(b)); - else if (a == b) - OR(32, gpr.R(a), gpr.R(s)); - else { + gpr.Lock(a,s,b); + gpr.BindToRegister(a, false, true); + + if (inst.SUBOP10 == 28) /* andx */ + { + MOV(32, gpr.R(a), gpr.R(s)); + AND(32, gpr.R(a), gpr.R(b)); + } + else if (inst.SUBOP10 == 476) /* nandx */ + { + MOV(32, gpr.R(a), gpr.R(s)); + AND(32, gpr.R(a), gpr.R(b)); + NOT(32, gpr.R(a)); + } + else if (inst.SUBOP10 == 60) /* andcx */ + { MOV(32, gpr.R(a), gpr.R(b)); - OR(32, gpr.R(a), gpr.R(s)); + NOT(32, gpr.R(a)); + AND(32, gpr.R(a), gpr.R(s)); } - gpr.UnlockAll(); - } - - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } -} - -void Jit64::orcx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(Integer) - int a = inst.RA, s = inst.RS, b = inst.RB; - gpr.Lock(a, s, b); - gpr.BindToRegister(a, (a == s || a == b), true); - MOV(32, R(EAX), gpr.R(b)); - NOT(32, R(EAX)); - OR(32, R(EAX), gpr.R(s)); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - - if (inst.Rc) { - // result is already in eax - ComputeRC(R(EAX)); - } -} - -void Jit64::norx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(Integer) - int a = inst.RA; - int s = inst.RS; - int b = inst.RB; - - if (s == b && s != a) - { - gpr.Lock(a,s); - gpr.BindToRegister(a, false); - MOV(32, gpr.R(a), gpr.R(s)); - NOT(32, gpr.R(a)); - gpr.UnlockAll(); - } - else - { - gpr.Lock(a, s, b); - gpr.BindToRegister(a, (a == s || a == b), true); - if (a == s) + else if (inst.SUBOP10 == 444) /* orx */ + { + MOV(32, gpr.R(a), gpr.R(s)); OR(32, gpr.R(a), gpr.R(b)); - else if (a == b) - OR(32, gpr.R(a), gpr.R(s)); - else { + } + else if (inst.SUBOP10 == 124) /* norx */ + { + MOV(32, gpr.R(a), gpr.R(s)); + OR(32, gpr.R(a), gpr.R(b)); + NOT(32, gpr.R(a)); + } + else if (inst.SUBOP10 == 412) /* orcx */ + { MOV(32, gpr.R(a), gpr.R(b)); + NOT(32, gpr.R(a)); OR(32, gpr.R(a), gpr.R(s)); } - NOT(32, gpr.R(a)); + else if (inst.SUBOP10 == 316) /* xorx */ + { + MOV(32, gpr.R(a), gpr.R(s)); + XOR(32, gpr.R(a), gpr.R(b)); + } + else if (inst.SUBOP10 == 284) /* eqvx */ + { + MOV(32, gpr.R(a), gpr.R(s)); + XOR(32, gpr.R(a), gpr.R(b)); + NOT(32, gpr.R(a)); + } + else + { + PanicAlert("WTF!"); + } gpr.UnlockAll(); } - + if (inst.Rc) { ComputeRC(gpr.R(a)); } } - -// m_GPR[_inst.RA] = m_GPR[_inst.RS] ^ m_GPR[_inst.RB]; -void Jit64::xorx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(Integer) - int a = inst.RA; - int s = inst.RS; - int b = inst.RB; - - if (s == b) { - gpr.SetImmediate32(a, 0); - } - else - { - gpr.Lock(a, s, b); - gpr.BindToRegister(a, a == s || a == b, true); - MOV(32, R(EAX), gpr.R(s)); - XOR(32, R(EAX), gpr.R(b)); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - } - - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } -} - -void Jit64::eqvx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(Integer) - int a = inst.RA; - int s = inst.RS; - int b = inst.RB; - - if (s == b) { - gpr.SetImmediate32(a, 0); - } - else - { - gpr.Lock(a, s, b); - gpr.BindToRegister(a, a == s || a == b, true); - MOV(32, R(EAX), gpr.R(s)); - XOR(32, R(EAX), gpr.R(b)); - NOT(32, R(EAX)); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - } - - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } -} - -void Jit64::andx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(Integer) - int a = inst.RA, s = inst.RS, b = inst.RB; - gpr.Lock(a, s, b); - gpr.BindToRegister(a, (a == s || a == b), true); - MOV(32, R(EAX), gpr.R(s)); - AND(32, R(EAX), gpr.R(b)); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - - if (inst.Rc) { - // result is already in eax - ComputeRC(R(EAX)); - } -} - -void Jit64::nandx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(Integer) - int a = inst.RA, s = inst.RS, b = inst.RB; - gpr.Lock(a, s, b); - gpr.BindToRegister(a, (a == s || a == b), true); - MOV(32, R(EAX), gpr.R(s)); - AND(32, R(EAX), gpr.R(b)); - NOT(32, R(EAX)); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - - if (inst.Rc) { - // result is already in eax - ComputeRC(R(EAX)); - } -} - -void Jit64::andcx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(Integer) - int a = inst.RA, s = inst.RS, b = inst.RB; - gpr.Lock(a, s, b); - gpr.BindToRegister(a, (a == s || a == b), true); - MOV(32, R(EAX), gpr.R(b)); - NOT(32, R(EAX)); - AND(32, R(EAX), gpr.R(s)); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - - if (inst.Rc) { - // result is already in eax - ComputeRC(R(EAX)); - } -} - void Jit64::extsbx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, s = inst.RS; - gpr.Lock(a, s); - gpr.BindToRegister(a, a == s, true); - // Always force moving to EAX because it isn't possible - // to refer to the lowest byte of some registers, at least in - // 32-bit mode. - MOV(32, R(EAX), gpr.R(s)); - MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends - if (inst.Rc) { + + if (gpr.R(s).IsImm()) + { + gpr.SetImmediate32(a, (u32)(s32)(s8)gpr.R(s).offset); + } + else + { + gpr.Lock(a, s); + gpr.BindToRegister(a, a == s, true); + // Always force moving to EAX because it isn't possible + // to refer to the lowest byte of some registers, at least in + // 32-bit mode. + MOV(32, R(EAX), gpr.R(s)); + MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends + gpr.UnlockAll(); + } + + if (inst.Rc) + { ComputeRC(gpr.R(a)); } - gpr.UnlockAll(); } void Jit64::extshx(UGeckoInstruction inst) @@ -573,17 +651,27 @@ void Jit64::extshx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, s = inst.RS; - gpr.Lock(a, s); - gpr.KillImmediate(s, true, false); - gpr.BindToRegister(a, a == s, true); - // This looks a little dangerous, but it's safe because - // every 32-bit register has a 16-bit half at the same index - // as the 32-bit register. - MOVSX(32, 16, gpr.RX(a), gpr.R(s)); - if (inst.Rc) { + + if (gpr.R(s).IsImm()) + { + gpr.SetImmediate32(a, (u32)(s32)(s16)gpr.R(s).offset); + } + else + { + gpr.Lock(a, s); + gpr.KillImmediate(s, true, false); + gpr.BindToRegister(a, a == s, true); + // This looks a little dangerous, but it's safe because + // every 32-bit register has a 16-bit half at the same index + // as the 32-bit register. + MOVSX(32, 16, gpr.RX(a), gpr.R(s)); + gpr.UnlockAll(); + } + + if (inst.Rc) + { ComputeRC(gpr.R(a)); } - gpr.UnlockAll(); } void Jit64::subfic(UGeckoInstruction inst) @@ -752,16 +840,25 @@ void Jit64::subfx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.Lock(a, b, d); - gpr.BindToRegister(d, (d == a || d == b), true); - MOV(32, R(EAX), gpr.R(b)); - SUB(32, R(EAX), gpr.R(a)); - MOV(32, gpr.R(d), R(EAX)); - gpr.UnlockAll(); + + if (gpr.R(a).IsImm() && gpr.R(b).IsImm()) + { + gpr.SetImmediate32(d, (u32)gpr.R(b).offset - (u32)gpr.R(a).offset); + } + else + { + gpr.Lock(a, b, d); + gpr.BindToRegister(d, (d == a || d == b), true); + MOV(32, R(EAX), gpr.R(b)); + SUB(32, R(EAX), gpr.R(a)); + MOV(32, gpr.R(d), R(EAX)); + gpr.UnlockAll(); + } + if (inst.OE) PanicAlert("OE: subfx"); - if (inst.Rc) { - // result is already in eax - ComputeRC(R(EAX)); + if (inst.Rc) + { + ComputeRC(gpr.R(d)); } } @@ -770,11 +867,19 @@ void Jit64::mulli(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, d = inst.RD; - gpr.Lock(a, d); - gpr.BindToRegister(d, (d == a), true); - gpr.KillImmediate(a, true, false); - IMUL(32, gpr.RX(d), gpr.R(a), Imm32((u32)(s32)inst.SIMM_16)); - gpr.UnlockAll(); + + if (gpr.R(a).IsImm()) + { + gpr.SetImmediate32(d, (s32)gpr.R(a).offset * (s32)inst.SIMM_16); + } + else + { + gpr.Lock(a, d); + gpr.BindToRegister(d, (d == a), true); + gpr.KillImmediate(a, true, false); + IMUL(32, gpr.RX(d), gpr.R(a), Imm32((u32)(s32)inst.SIMM_16)); + gpr.UnlockAll(); + } } void Jit64::mullwx(UGeckoInstruction inst) @@ -782,18 +887,28 @@ void Jit64::mullwx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.Lock(a, b, d); - gpr.BindToRegister(d, (d == a || d == b), true); - if (d == a) { - IMUL(32, gpr.RX(d), gpr.R(b)); - } else if (d == b) { - IMUL(32, gpr.RX(d), gpr.R(a)); - } else { - MOV(32, gpr.R(d), gpr.R(b)); - IMUL(32, gpr.RX(d), gpr.R(a)); + + if (gpr.R(a).IsImm() && gpr.R(b).IsImm()) + { + gpr.SetImmediate32(d, (s32)gpr.R(a).offset * (s32)gpr.R(b).offset); } - gpr.UnlockAll(); - if (inst.Rc) { + else + { + gpr.Lock(a, b, d); + gpr.BindToRegister(d, (d == a || d == b), true); + if (d == a) { + IMUL(32, gpr.RX(d), gpr.R(b)); + } else if (d == b) { + IMUL(32, gpr.RX(d), gpr.R(a)); + } else { + MOV(32, gpr.R(d), gpr.R(b)); + IMUL(32, gpr.RX(d), gpr.R(a)); + } + gpr.UnlockAll(); + } + + if (inst.Rc) + { ComputeRC(gpr.R(d)); } } @@ -803,19 +918,28 @@ void Jit64::mulhwux(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(EDX); - gpr.Lock(a, b, d); - gpr.BindToRegister(d, (d == a || d == b), true); - if (gpr.RX(d) == EDX) - PanicAlert("mulhwux : WTF"); - MOV(32, R(EAX), gpr.R(a)); - gpr.KillImmediate(b, true, false); - MUL(32, gpr.R(b)); - gpr.UnlockAll(); - gpr.UnlockAllX(); - MOV(32, gpr.R(d), R(EDX)); + + if (gpr.R(a).IsImm() && gpr.R(b).IsImm()) + { + gpr.SetImmediate32(d, (u32)(((u64)gpr.R(a).offset * (u64)gpr.R(b).offset) >> 32)); + } + else + { + gpr.FlushLockX(EDX); + gpr.Lock(a, b, d); + gpr.BindToRegister(d, (d == a || d == b), true); + if (gpr.RX(d) == EDX) + PanicAlert("mulhwux : WTF"); + MOV(32, R(EAX), gpr.R(a)); + gpr.KillImmediate(b, true, false); + MUL(32, gpr.R(b)); + gpr.UnlockAll(); + gpr.UnlockAllX(); + MOV(32, gpr.R(d), R(EDX)); + } + if (inst.Rc) - ComputeRC(R(EDX)); + ComputeRC(gpr.R(d)); } void Jit64::divwux(UGeckoInstruction inst) @@ -823,26 +947,39 @@ void Jit64::divwux(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(EDX); - gpr.Lock(a, b, d); - gpr.BindToRegister(d, (d == a || d == b), true); - MOV(32, R(EAX), gpr.R(a)); - XOR(32, R(EDX), R(EDX)); - gpr.KillImmediate(b, true, false); - CMP(32, gpr.R(b), Imm32(0)); - // doesn't handle if OE is set, but int doesn't either... - FixupBranch not_div_by_zero = J_CC(CC_NZ); - MOV(32, gpr.R(d), R(EDX)); - MOV(32, R(EAX), gpr.R(d)); - FixupBranch end = J(); - SetJumpTarget(not_div_by_zero); - DIV(32, gpr.R(b)); - MOV(32, gpr.R(d), R(EAX)); - SetJumpTarget(end); - gpr.UnlockAll(); - gpr.UnlockAllX(); - if (inst.Rc) { - ComputeRC(R(EAX)); + + if (gpr.R(a).IsImm() && gpr.R(b).IsImm()) + { + if( gpr.R(b).offset == 0 ) + gpr.SetImmediate32(d, 0); + else + gpr.SetImmediate32(d, (u32)gpr.R(a).offset / (u32)gpr.R(b).offset); + } + else + { + gpr.FlushLockX(EDX); + gpr.Lock(a, b, d); + gpr.BindToRegister(d, (d == a || d == b), true); + MOV(32, R(EAX), gpr.R(a)); + XOR(32, R(EDX), R(EDX)); + gpr.KillImmediate(b, true, false); + CMP(32, gpr.R(b), Imm32(0)); + // doesn't handle if OE is set, but int doesn't either... + FixupBranch not_div_by_zero = J_CC(CC_NZ); + MOV(32, gpr.R(d), R(EDX)); + MOV(32, R(EAX), gpr.R(d)); + FixupBranch end = J(); + SetJumpTarget(not_div_by_zero); + DIV(32, gpr.R(b)); + MOV(32, gpr.R(d), R(EAX)); + SetJumpTarget(end); + gpr.UnlockAll(); + gpr.UnlockAllX(); + } + + if (inst.Rc) + { + ComputeRC(gpr.R(d)); } } @@ -853,7 +990,11 @@ void Jit64::addx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; _assert_msg_(DYNA_REC, !inst.OE, "Add - OE enabled :("); - if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) + if (gpr.R(a).IsImm() && gpr.R(b).IsImm()) + { + gpr.SetImmediate32(d, (u32)gpr.R(a).offset + (u32)gpr.R(b).offset); + } + else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { gpr.Lock(a, b, d); gpr.BindToRegister(d, false); @@ -1026,47 +1167,48 @@ void Jit64::rlwinmx(UGeckoInstruction inst) JITDISABLE(Integer) int a = inst.RA; int s = inst.RS; - if (gpr.R(s).IsImm() && !inst.Rc) + if (gpr.R(s).IsImm()) { unsigned result = (int)gpr.R(s).offset; if (inst.SH != 0) result = _rotl(result, inst.SH); result &= Helper_Mask(inst.MB, inst.ME); gpr.SetImmediate32(a, result); - return; - } - - gpr.Lock(a, s); - gpr.BindToRegister(a, a == s); - if (a != s) - { - MOV(32, gpr.R(a), gpr.R(s)); - } - - if (inst.MB == 0 && inst.ME==31-inst.SH) - { - SHL(32, gpr.R(a), Imm8(inst.SH)); - } - else if (inst.ME == 31 && inst.MB == 32 - inst.SH) - { - SHR(32, gpr.R(a), Imm8(inst.MB)); } else { - bool written = false; - if (inst.SH != 0) + gpr.Lock(a, s); + gpr.BindToRegister(a, a == s); + if (a != s) { - ROL(32, gpr.R(a), Imm8(inst.SH)); - written = true; + MOV(32, gpr.R(a), gpr.R(s)); } - if (!(inst.MB==0 && inst.ME==31)) + + if (inst.MB == 0 && inst.ME==31-inst.SH) { - written = true; - AND(32, gpr.R(a), Imm32(Helper_Mask(inst.MB, inst.ME))); + SHL(32, gpr.R(a), Imm8(inst.SH)); } - _assert_msg_(DYNA_REC, written, "W T F!!!"); + else if (inst.ME == 31 && inst.MB == 32 - inst.SH) + { + SHR(32, gpr.R(a), Imm8(inst.MB)); + } + else + { + bool written = false; + if (inst.SH != 0) + { + ROL(32, gpr.R(a), Imm8(inst.SH)); + written = true; + } + if (!(inst.MB==0 && inst.ME==31)) + { + written = true; + AND(32, gpr.R(a), Imm32(Helper_Mask(inst.MB, inst.ME))); + } + _assert_msg_(DYNA_REC, written, "W T F!!!"); + } + gpr.UnlockAll(); } - gpr.UnlockAll(); if (inst.Rc) { @@ -1082,16 +1224,25 @@ void Jit64::rlwimix(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; - gpr.Lock(a, s); - gpr.KillImmediate(a, true, true); - u32 mask = Helper_Mask(inst.MB, inst.ME); - MOV(32, R(EAX), gpr.R(s)); - AND(32, gpr.R(a), Imm32(~mask)); - if (inst.SH) - ROL(32, R(EAX), Imm8(inst.SH)); - AND(32, R(EAX), Imm32(mask)); - OR(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); + if (gpr.R(a).IsImm() && gpr.R(s).IsImm()) + { + u32 mask = Helper_Mask(inst.MB,inst.ME); + gpr.SetImmediate32(a, ((u32)gpr.R(a).offset & ~mask) | (_rotl((u32)gpr.R(s).offset,inst.SH) & mask)); + } + else + { + gpr.Lock(a, s); + gpr.KillImmediate(a, true, true); + u32 mask = Helper_Mask(inst.MB, inst.ME); + MOV(32, R(EAX), gpr.R(s)); + AND(32, gpr.R(a), Imm32(~mask)); + if (inst.SH) + ROL(32, R(EAX), Imm8(inst.SH)); + AND(32, R(EAX), Imm32(mask)); + OR(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); + } + if (inst.Rc) { ComputeRC(gpr.R(a)); @@ -1105,20 +1256,28 @@ void Jit64::rlwnmx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, s = inst.RS; u32 mask = Helper_Mask(inst.MB, inst.ME); - gpr.FlushLockX(ECX); - gpr.Lock(a, b, s); - gpr.KillImmediate(a, (a == s || a == b), true); - MOV(32, R(EAX), gpr.R(s)); - MOV(32, R(ECX), gpr.R(b)); - AND(32, R(ECX), Imm32(0x1f)); - ROL(32, R(EAX), R(ECX)); - AND(32, R(EAX), Imm32(mask)); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - gpr.UnlockAllX(); + if (gpr.R(b).IsImm() && gpr.R(s).IsImm()) + { + gpr.SetImmediate32(a, _rotl((u32)gpr.R(s).offset, (u32)gpr.R(b).offset & 0x1F) & mask); + } + else + { + gpr.FlushLockX(ECX); + gpr.Lock(a, b, s); + gpr.KillImmediate(a, (a == s || a == b), true); + MOV(32, R(EAX), gpr.R(s)); + MOV(32, R(ECX), gpr.R(b)); + AND(32, R(ECX), Imm32(0x1f)); + ROL(32, R(EAX), R(ECX)); + AND(32, R(EAX), Imm32(mask)); + MOV(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); + gpr.UnlockAllX(); + } + if (inst.Rc) { - ComputeRC(R(EAX)); + ComputeRC(gpr.R(a)); } } @@ -1128,12 +1287,21 @@ void Jit64::negx(UGeckoInstruction inst) JITDISABLE(Integer) int a = inst.RA; int d = inst.RD; - gpr.Lock(a, d); - gpr.BindToRegister(d, a == d, true); - if (a != d) - MOV(32, gpr.R(d), gpr.R(a)); - NEG(32, gpr.R(d)); - gpr.UnlockAll(); + + if (gpr.R(a).IsImm()) + { + gpr.SetImmediate32(d, ~((u32)gpr.R(a).offset) + 1); + } + else + { + gpr.Lock(a, d); + gpr.BindToRegister(d, a == d, true); + if (a != d) + MOV(32, gpr.R(d), gpr.R(a)); + NEG(32, gpr.R(d)); + gpr.UnlockAll(); + } + if (inst.Rc) { ComputeRC(gpr.R(d)); @@ -1147,22 +1315,32 @@ void Jit64::srwx(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; int s = inst.RS; - gpr.FlushLockX(ECX); - gpr.Lock(a, b, s); - gpr.BindToRegister(a, a == s || a == b || s == b, true); - MOV(32, R(ECX), gpr.R(b)); - XOR(32, R(EAX), R(EAX)); - TEST(32, R(ECX), Imm32(32)); - FixupBranch branch = J_CC(CC_NZ); - MOV(32, R(EAX), gpr.R(s)); - SHR(32, R(EAX), R(ECX)); - SetJumpTarget(branch); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - gpr.UnlockAllX(); + + if (gpr.R(b).IsImm() && gpr.R(s).IsImm()) + { + u32 amount = (u32)gpr.R(b).offset; + gpr.SetImmediate32(a, (amount & 0x20) ? 0 : ((u32)gpr.R(s).offset >> (amount & 0x1f))); + } + else + { + gpr.FlushLockX(ECX); + gpr.Lock(a, b, s); + gpr.BindToRegister(a, a == s || a == b || s == b, true); + MOV(32, R(ECX), gpr.R(b)); + XOR(32, R(EAX), R(EAX)); + TEST(32, R(ECX), Imm32(32)); + FixupBranch branch = J_CC(CC_NZ); + MOV(32, R(EAX), gpr.R(s)); + SHR(32, R(EAX), R(ECX)); + SetJumpTarget(branch); + MOV(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); + gpr.UnlockAllX(); + } + if (inst.Rc) { - ComputeRC(R(EAX)); + ComputeRC(gpr.R(a)); } } @@ -1173,22 +1351,32 @@ void Jit64::slwx(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; int s = inst.RS; - gpr.FlushLockX(ECX); - gpr.Lock(a, b, s); - gpr.BindToRegister(a, a == s || a == b || s == b, true); - MOV(32, R(ECX), gpr.R(b)); - XOR(32, R(EAX), R(EAX)); - TEST(32, R(ECX), Imm32(32)); - FixupBranch branch = J_CC(CC_NZ); - MOV(32, R(EAX), gpr.R(s)); - SHL(32, R(EAX), R(ECX)); - SetJumpTarget(branch); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - gpr.UnlockAllX(); + + if (gpr.R(b).IsImm() && gpr.R(s).IsImm()) + { + u32 amount = (u32)gpr.R(b).offset; + gpr.SetImmediate32(a, (amount & 0x20) ? 0 : (u32)gpr.R(s).offset << amount); + } + else + { + gpr.FlushLockX(ECX); + gpr.Lock(a, b, s); + gpr.BindToRegister(a, a == s || a == b || s == b, true); + MOV(32, R(ECX), gpr.R(b)); + XOR(32, R(EAX), R(EAX)); + TEST(32, R(ECX), Imm32(32)); + FixupBranch branch = J_CC(CC_NZ); + MOV(32, R(EAX), gpr.R(s)); + SHL(32, R(EAX), R(ECX)); + SetJumpTarget(branch); + MOV(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); + gpr.UnlockAllX(); + } + if (inst.Rc) { - ComputeRC(R(EAX)); + ComputeRC(gpr.R(a)); } } @@ -1286,15 +1474,27 @@ void Jit64::cntlzwx(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; - gpr.Lock(a, s); - gpr.KillImmediate(s, true, false); - gpr.BindToRegister(a, (a == s), true); - BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s)); - FixupBranch gotone = J_CC(CC_NZ); - MOV(32, gpr.R(a), Imm32(63)); - SetJumpTarget(gotone); - XOR(32, gpr.R(a), Imm8(0x1f)); // flip order - gpr.UnlockAll(); + if (gpr.R(s).IsImm()) + { + u32 mask = 0x80000000; + u32 i = 0; + for (; i < 32; i++, mask >>= 1) + if ((u32)gpr.R(s).offset & mask) + break; + gpr.SetImmediate32(a, i); + } + else + { + gpr.Lock(a, s); + gpr.KillImmediate(s, true, false); + gpr.BindToRegister(a, (a == s), true); + BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s)); + FixupBranch gotone = J_CC(CC_NZ); + MOV(32, gpr.R(a), Imm32(63)); + SetJumpTarget(gotone); + XOR(32, gpr.R(a), Imm8(0x1f)); // flip order + gpr.UnlockAll(); + } if (inst.Rc) {