diff --git a/Source/Android/res/values-ja/strings.xml b/Source/Android/res/values-ja/strings.xml index 49502bf86d..84b6c24f58 100644 --- a/Source/Android/res/values-ja/strings.xml +++ b/Source/Android/res/values-ja/strings.xml @@ -137,7 +137,6 @@ JIT64 Recompiler JITIL Recompiler JIT ARM Recompiler - JITIL ARM Recompiler CPU CPUコア %s diff --git a/Source/Android/res/values/arrays.xml b/Source/Android/res/values/arrays.xml index 4aaedd6370..4233c194be 100644 --- a/Source/Android/res/values/arrays.xml +++ b/Source/Android/res/values/arrays.xml @@ -19,12 +19,10 @@ @string/interpreter @string/jit_arm_recompiler - @string/jitil_arm_recompiler 0 3 - 4 diff --git a/Source/Android/res/values/strings.xml b/Source/Android/res/values/strings.xml index 88d89dbb85..92e3094121 100644 --- a/Source/Android/res/values/strings.xml +++ b/Source/Android/res/values/strings.xml @@ -138,7 +138,6 @@ JIT64 Recompiler JITIL Recompiler JIT ARM Recompiler - JITIL ARM Recompiler CPU CPU Core %s diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index b4ec2bc331..5d3718e5bd 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -721,7 +721,7 @@ void XEmitter::SETcc(CCFlags flag, OpArg dest) { if (dest.IsImm()) _assert_msg_(DYNA_REC, 0, "SETcc - Imm argument"); dest.operandReg = 0; - dest.WriteRex(this, 0, 0); + dest.WriteRex(this, 0, 8); Write8(0x0F); Write8(0x90 + (u8)flag); dest.WriteRest(this); diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 53b3881a04..77f778c877 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -218,12 +218,6 @@ if(_M_ARM_32) PowerPC/JitArm32/JitArm_LoadStorePaired.cpp PowerPC/JitArm32/JitArm_SystemRegisters.cpp PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp - #JitArmIL - PowerPC/JitArmIL/JitIL.cpp - PowerPC/JitArmIL/JitILAsm.cpp - PowerPC/JitArmIL/JitIL_Tables.cpp - PowerPC/JitArmIL/JitIL_Branch.cpp - PowerPC/JitArmIL/IR_Arm.cpp ) endif() diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp index a82d7bcc82..d1c03420fc 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp @@ -84,7 +84,7 @@ static void Trace(UGeckoInstruction& instCode) char ppcInst[256]; DisassembleGekko(instCode.hex, PC, ppcInst, 256); - DEBUG_LOG(POWERPC, "INTER PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s %08x %s", PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str(), instCode.hex, ppcInst); + DEBUG_LOG(POWERPC, "INTER PC: %08x SRR0: %08x SRR1: %08x CRval: %016lx FPSCR: %08x MSR: %08x LR: %08x %s %08x %s", PC, SRR0, SRR1, PowerPC::ppcState.cr_val[0], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), instCode.hex, ppcInst); } int Interpreter::SingleStepInner(void) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp index c166f107f2..0fa0f8f4a8 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp @@ -4,32 +4,22 @@ #include "Core/PowerPC/Interpreter/Interpreter.h" -void Interpreter::Helper_UpdateCR0(u32 _uValue) +void Interpreter::Helper_UpdateCR0(u32 value) { - u32 new_cr0; - int sValue = (int)_uValue; - if (sValue > 0) - new_cr0 = 0x4; - else if (sValue < 0) - new_cr0 = 0x8; - else - new_cr0 = 0x2; - new_cr0 |= GetXER_SO(); - SetCRField(0, new_cr0); + s64 sign_extended = (s64)(s32)value; + u64 cr_val = (u64)sign_extended; + cr_val = (cr_val & ~(1ull << 61)) | ((u64)GetXER_SO() << 61); + + PowerPC::ppcState.cr_val[0] = cr_val; } -void Interpreter::Helper_UpdateCRx(int _x, u32 _uValue) +void Interpreter::Helper_UpdateCRx(int idx, u32 value) { - u32 new_crX; - int sValue = (int)_uValue; - if (sValue > 0) - new_crX = 0x4; - else if (sValue < 0) - new_crX = 0x8; - else - new_crX = 0x2; - new_crX |= GetXER_SO(); - SetCRField(_x, new_crX); + s64 sign_extended = (s64)(s32)value; + u64 cr_val = (u64)sign_extended; + cr_val = (cr_val & ~(1ull << 61)) | ((u64)GetXER_SO() << 61); + + PowerPC::ppcState.cr_val[idx] = cr_val; } u32 Interpreter::Helper_Carry(u32 _uValue1, u32 _uValue2) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 837b3c4be9..ce0c36b6a5 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -377,10 +377,8 @@ void Jit64::Trace() } #endif - DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s", - PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], - PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, - PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); + DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x FPSCR: %08x MSR: %08x LR: %08x %s %s", + PC, SRR0, SRR1, PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); } void STACKALIGN Jit64::Jit(u32 em_address) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 8c5056c372..ab25a88d6b 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -106,6 +106,16 @@ public: void GenerateRC(); void ComputeRC(const Gen::OpArg & arg); + // Reads a given bit of a given CR register part. Clobbers ABI_PARAM1, + // don't forget to xlock it before. + void GetCRFieldBit(int field, int bit, Gen::X64Reg out); + // Clobbers ABI_PARAM1 and ABI_PARAM2, xlock them before. + void SetCRFieldBit(int field, int bit, Gen::X64Reg in); + + // Generates a branch that will check if a given bit of a CR register part + // is set or not. + FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true); + void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)); typedef u32 (*Operation)(u32 a, u32 b); void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 4b4e9ab0e6..135e0c4f3c 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -117,11 +117,8 @@ void Jit64::bcx(UGeckoInstruction inst) FixupBranch pConditionDontBranch; if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit { - TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch - pConditionDontBranch = J_CC(CC_Z, true); - else - pConditionDontBranch = J_CC(CC_NZ, true); + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } if (inst.LK) @@ -179,14 +176,8 @@ void Jit64::bcctrx(UGeckoInstruction inst) // BO_2 == 001zy -> b if false // BO_2 == 011zy -> b if true - // Ripped from bclrx - TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - Gen::CCFlags branch; - if (inst.BO_2 & BO_BRANCH_IF_TRUE) - branch = CC_Z; - else - branch = CC_NZ; - FixupBranch b = J_CC(branch, true); + FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); MOV(32, R(EAX), M(&CTR)); AND(32, R(EAX), Imm32(0xFFFFFFFC)); //MOV(32, M(&PC), R(EAX)); => Already done in WriteExitDestInEAX() @@ -222,11 +213,8 @@ void Jit64::bclrx(UGeckoInstruction inst) FixupBranch pConditionDontBranch; if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit { - TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch - pConditionDontBranch = J_CC(CC_Z, true); - else - pConditionDontBranch = J_CC(CC_NZ, true); + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } // This below line can be used to prove that blr "eats flags" in practice. diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 648da27d26..781ab58172 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -237,26 +237,22 @@ void Jit64::fcmpx(UGeckoInstruction inst) pGreater = J_CC(CC_B); } - // Equal - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ))); continue1 = J(); - // NAN SetJumpTarget(pNaN); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x1)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO))); if (a != b) { continue2 = J(); - // Greater Than SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT))); continue3 = J(); - // Less Than SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT))); } SetJumpTarget(continue1); @@ -266,6 +262,7 @@ void Jit64::fcmpx(UGeckoInstruction inst) SetJumpTarget(continue3); } + MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); fpr.UnlockAll(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 67c5e96921..1d98d0d291 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -116,57 +116,17 @@ void Jit64::GenerateCarry() SetJumpTarget(pContinue); } -// Assumes that Sign and Zero flags were set by the last operation. Preserves all flags and registers. -void Jit64::GenerateRC() -{ - FixupBranch pZero = J_CC(CC_Z); - FixupBranch pNegative = J_CC(CC_S); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); // Result > 0 - FixupBranch continue1 = J(); - - SetJumpTarget(pNegative); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); // Result < 0 - FixupBranch continue2 = J(); - - SetJumpTarget(pZero); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); // Result == 0 - - SetJumpTarget(continue1); - SetJumpTarget(continue2); -} - void Jit64::ComputeRC(const Gen::OpArg & arg) { if (arg.IsImm()) { - s32 value = (s32)arg.offset; - if (value < 0) - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); - else if (value > 0) - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); - else - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); + MOV(32, R(EAX), Imm32((s32)arg.offset)); + MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RAX)); } else { - if (arg.IsSimpleReg()) - TEST(32, arg, arg); - else - CMP(32, arg, Imm8(0)); - FixupBranch pLesser = J_CC(CC_L); - FixupBranch pGreater = J_CC(CC_G); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); // _x86Reg == 0 - FixupBranch continue1 = J(); - - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); // _x86Reg > 0 - FixupBranch continue2 = J(); - - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); // _x86Reg < 0 - - SetJumpTarget(continue1); - SetJumpTarget(continue2); + MOVSX(64, 32, RAX, arg); + MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RAX)); } } @@ -192,26 +152,20 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void { gpr.KillImmediate(d, true, true); (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; - if (Rc) - { - // All of the possible passed operators affect Sign/Zero flags - GenerateRC(); - } if (carry) GenerateCarry(); + if (Rc) + ComputeRC(gpr.R(d)); } else { gpr.BindToRegister(d, false); MOV(32, gpr.R(d), gpr.R(a)); (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; - if (Rc) - { - // All of the possible passed operators affect Sign/Zero flags - GenerateRC(); - } if (carry) GenerateCarry(); + if (Rc) + ComputeRC(gpr.R(d)); } } else if (doop == Add) @@ -219,9 +173,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void // a == 0, which for these instructions imply value = 0 gpr.SetImmediate32(d, value); if (Rc) - { ComputeRC(gpr.R(d)); - } } else { @@ -363,22 +315,23 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (signedCompare) { if ((s32)gpr.R(a).offset == (s32)comparand.offset) - compareResult = 0x2; + compareResult = CR_EQ; else if ((s32)gpr.R(a).offset > (s32)comparand.offset) - compareResult = 0x4; + compareResult = CR_GT; else - compareResult = 0x8; + compareResult = CR_LT; } else { if ((u32)gpr.R(a).offset == (u32)comparand.offset) - compareResult = 0x2; + compareResult = CR_EQ; else if ((u32)gpr.R(a).offset > (u32)comparand.offset) - compareResult = 0x4; + compareResult = CR_GT; else - compareResult = 0x8; + compareResult = CR_LT; } - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(compareResult)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(compareResult))); + MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); gpr.UnlockAll(); if (merge_branch) @@ -436,71 +389,58 @@ void Jit64::cmpXX(UGeckoInstruction inst) } else { - Gen::CCFlags less_than, greater_than; if (signedCompare) { - less_than = CC_L; - greater_than = CC_G; + if (gpr.R(a).IsImm()) + MOV(64, R(RAX), gpr.R(a)); + else + MOVSX(64, 32, RAX, gpr.R(a)); + if (!comparand.IsImm()) + { + MOVSX(64, 32, ABI_PARAM1, comparand); + comparand = R(ABI_PARAM1); + } } else { - less_than = CC_B; - greater_than = CC_A; - } + if (gpr.R(a).IsImm()) + MOV(32, R(RAX), gpr.R(a)); + else + MOVZX(64, 32, RAX, gpr.R(a)); - if (gpr.R(a).IsImm() || (!gpr.R(a).IsSimpleReg() && !comparand.IsImm() && !comparand.IsSimpleReg())) - { - // Syntax for CMP is invalid with such arguments. We must load RA in a register. - gpr.BindToRegister(a, true, false); + if (comparand.IsImm()) + MOV(32, R(ABI_PARAM1), comparand); + else + MOVZX(64, 32, ABI_PARAM1, comparand); + comparand = R(ABI_PARAM1); } - CMP(32, gpr.R(a), comparand); - gpr.UnlockAll(); + SUB(64, R(RAX), comparand); + MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); - if (!merge_branch) - { - // Keep the normal code separate for clarity. - - FixupBranch pLesser = J_CC(less_than); - FixupBranch pGreater = J_CC(greater_than); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0 - FixupBranch continue1 = J(); - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0 - FixupBranch continue2 = J(); - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0 - SetJumpTarget(continue1); - SetJumpTarget(continue2); - // TODO: If we ever care about SO, borrow a trick from - // http://maws.mameworld.info/maws/mamesrc/src/emu/cpu/powerpc/drc_ops.c : bt, adc - } - else + if (merge_branch) { js.downcountAmount++; int test_bit = 8 >> (js.next_inst.BI & 3); - bool condition = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? false : true; + bool condition = js.next_inst.BO & BO_BRANCH_IF_TRUE; // Test swapping (in the future, will be used to inline across branches the right way) // if (rand() & 1) // std::swap(destination1, destination2), condition = !condition; + gpr.UnlockAll(); gpr.Flush(); fpr.Flush(); - FixupBranch pLesser = J_CC(less_than); - FixupBranch pGreater = J_CC(greater_than); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // == 0 - FixupBranch continue1 = J(); + FixupBranch pDontBranch; + if (test_bit & 8) + pDontBranch = J_CC(condition ? CC_GE : CC_L); // Test < 0, so jump over if >= 0. + else if (test_bit & 4) + pDontBranch = J_CC(condition ? CC_LE : CC_G); // Test > 0, so jump over if <= 0. + else if (test_bit & 2) + pDontBranch = J_CC(condition ? CC_NE : CC_E); // Test = 0, so jump over if != 0. + else // SO bit, do not branch (we don't emulate SO for cmp). + pDontBranch = J(); - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // > 0 - FixupBranch continue2 = J(); - - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // < 0 - FixupBranch continue3; - if (!!(8 & test_bit) == condition) continue3 = J(); - if (!!(4 & test_bit) != condition) SetJumpTarget(continue2); - if (!!(2 & test_bit) != condition) SetJumpTarget(continue1); + // Code that handles successful PPC branching. if (js.next_inst.OPCD == 16) // bcx { if (js.next_inst.LK) @@ -534,9 +474,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) PanicAlert("WTF invalid branch"); } - if (!!(8 & test_bit) == condition) SetJumpTarget(continue3); - if (!!(4 & test_bit) == condition) SetJumpTarget(continue2); - if (!!(2 & test_bit) == condition) SetJumpTarget(continue1); + SetJumpTarget(pDontBranch); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) { @@ -619,9 +557,7 @@ void Jit64::boolX(UGeckoInstruction inst) PanicAlert("WTF!"); } if (inst.Rc) - { ComputeRC(gpr.R(a)); - } } else if ((a == s) || (a == b)) { @@ -632,19 +568,11 @@ void Jit64::boolX(UGeckoInstruction inst) if (inst.SUBOP10 == 28) /* andx */ { AND(32, gpr.R(a), operand); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 476) /* nandx */ { AND(32, gpr.R(a), operand); NOT(32, gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } } else if (inst.SUBOP10 == 60) /* andcx */ { @@ -659,27 +587,15 @@ void Jit64::boolX(UGeckoInstruction inst) NOT(32, R(EAX)); AND(32, gpr.R(a), R(EAX)); } - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 444) /* orx */ { OR(32, gpr.R(a), operand); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 124) /* norx */ { OR(32, gpr.R(a), operand); NOT(32, gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } } else if (inst.SUBOP10 == 412) /* orcx */ { @@ -694,32 +610,22 @@ void Jit64::boolX(UGeckoInstruction inst) NOT(32, R(EAX)); OR(32, gpr.R(a), R(EAX)); } - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 316) /* xorx */ { XOR(32, gpr.R(a), operand); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 284) /* eqvx */ { NOT(32, gpr.R(a)); XOR(32, gpr.R(a), operand); - if (inst.Rc) - { - GenerateRC(); - } } else { PanicAlert("WTF"); } + if (inst.Rc) + ComputeRC(gpr.R(a)); gpr.UnlockAll(); } else @@ -731,83 +637,53 @@ void Jit64::boolX(UGeckoInstruction inst) { MOV(32, gpr.R(a), gpr.R(s)); AND(32, gpr.R(a), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 476) /* nandx */ { MOV(32, gpr.R(a), gpr.R(s)); AND(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } } else if (inst.SUBOP10 == 60) /* andcx */ { MOV(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); AND(32, gpr.R(a), gpr.R(s)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 444) /* orx */ { MOV(32, gpr.R(a), gpr.R(s)); OR(32, gpr.R(a), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 124) /* norx */ { MOV(32, gpr.R(a), gpr.R(s)); OR(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } } else if (inst.SUBOP10 == 412) /* orcx */ { MOV(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); OR(32, gpr.R(a), gpr.R(s)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 316) /* xorx */ { MOV(32, gpr.R(a), gpr.R(s)); XOR(32, gpr.R(a), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 284) /* eqvx */ { MOV(32, gpr.R(a), gpr.R(s)); NOT(32, gpr.R(a)); XOR(32, gpr.R(a), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } } else { PanicAlert("WTF!"); } + if (inst.Rc) + ComputeRC(gpr.R(a)); gpr.UnlockAll(); } } @@ -943,9 +819,8 @@ void Jit64::subfcx(UGeckoInstruction inst) MOV(32, gpr.R(d), gpr.R(b)); SUB(32, gpr.R(d), gpr.R(a)); } - if (inst.Rc) { - GenerateRC(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); FinalizeCarryOverflow(inst.OE, true); gpr.UnlockAll(); @@ -980,10 +855,9 @@ void Jit64::subfex(UGeckoInstruction inst) NOT(32, gpr.R(d)); ADC(32, gpr.R(d), gpr.R(b)); } - if (inst.Rc) { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE, invertedCarry); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } @@ -1004,11 +878,9 @@ void Jit64::subfmex(UGeckoInstruction inst) } NOT(32, gpr.R(d)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } @@ -1029,11 +901,9 @@ void Jit64::subfzex(UGeckoInstruction inst) } NOT(32, gpr.R(d)); ADC(32, gpr.R(d), Imm8(0)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } @@ -1076,14 +946,10 @@ void Jit64::subfx(UGeckoInstruction inst) MOV(32, gpr.R(d), gpr.R(b)); SUB(32, gpr.R(d), gpr.R(a)); } - if (inst.Rc) - { - GenerateRC(); - } if (inst.OE) - { GenerateOverflow(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1505,14 +1371,10 @@ void Jit64::addx(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, true); ADD(32, gpr.R(d), gpr.R(operand)); - if (inst.Rc) - { - GenerateRC(); - } if (inst.OE) - { GenerateOverflow(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1521,14 +1383,10 @@ void Jit64::addx(UGeckoInstruction inst) gpr.BindToRegister(d, false); MOV(32, gpr.R(d), gpr.R(a)); ADD(32, gpr.R(d), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } if (inst.OE) - { GenerateOverflow(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1547,11 +1405,9 @@ void Jit64::addex(UGeckoInstruction inst) GetCarryEAXAndClear(); ADC(32, gpr.R(d), gpr.R((d == a) ? b : a)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1562,11 +1418,9 @@ void Jit64::addex(UGeckoInstruction inst) GetCarryEAXAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1584,11 +1438,9 @@ void Jit64::addcx(UGeckoInstruction inst) gpr.BindToRegister(d, true); JitClearCAOV(inst.OE); ADD(32, gpr.R(d), gpr.R(operand)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryOverflow(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1598,11 +1450,9 @@ void Jit64::addcx(UGeckoInstruction inst) JitClearCAOV(inst.OE); MOV(32, gpr.R(d), gpr.R(a)); ADD(32, gpr.R(d), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryOverflow(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1621,11 +1471,9 @@ void Jit64::addmex(UGeckoInstruction inst) GetCarryEAXAndClear(); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1636,11 +1484,9 @@ void Jit64::addmex(UGeckoInstruction inst) GetCarryEAXAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1659,11 +1505,9 @@ void Jit64::addzex(UGeckoInstruction inst) GetCarryEAXAndClear(); ADC(32, gpr.R(d), Imm8(0)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1674,11 +1518,9 @@ void Jit64::addzex(UGeckoInstruction inst) GetCarryEAXAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), Imm8(0)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1714,17 +1556,13 @@ void Jit64::rlwinmx(UGeckoInstruction inst) { SHL(32, gpr.R(a), Imm8(inst.SH)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } else if (inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH) { SHR(32, gpr.R(a), Imm8(inst.MB)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } else { @@ -1736,9 +1574,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst) { AND(32, gpr.R(a), Imm32(Helper_Mask(inst.MB, inst.ME))); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } else if (inst.Rc) { @@ -1818,9 +1654,7 @@ void Jit64::rlwimix(UGeckoInstruction inst) XOR(32, gpr.R(a), R(EAX)); } if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } else { @@ -1828,9 +1662,7 @@ void Jit64::rlwimix(UGeckoInstruction inst) AND(32, gpr.R(a), Imm32(~mask)); XOR(32, gpr.R(a), gpr.R(s)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } gpr.UnlockAll(); } @@ -1864,9 +1696,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst) ROL(32, gpr.R(a), R(ECX)); AND(32, gpr.R(a), Imm32(mask)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); gpr.UnlockAll(); gpr.UnlockAllX(); } @@ -1898,14 +1728,10 @@ void Jit64::negx(UGeckoInstruction inst) if (a != d) MOV(32, gpr.R(d), gpr.R(a)); NEG(32, gpr.R(d)); - if (inst.Rc) - { - GenerateRC(); - } if (inst.OE) - { GenerateOverflow(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1994,7 +1820,7 @@ void Jit64::slwx(UGeckoInstruction inst) if (inst.Rc) { AND(32, gpr.R(a), gpr.R(a)); - GenerateRC(); + ComputeRC(gpr.R(a)); } else { @@ -2104,9 +1930,7 @@ void Jit64::srawix(UGeckoInstruction inst) } SAR(32, gpr.R(a), Imm8(amount)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); SHL(32, R(EAX), Imm8(32-amount)); TEST(32, R(EAX), gpr.R(a)); FixupBranch nocarry = J_CC(CC_Z); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 977e298dd2..4dead8df4e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -10,6 +10,130 @@ #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64/JitRegCache.h" +void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out) +{ + switch (bit) + { + case CR_SO_BIT: // check bit 61 set + MOV(64, R(ABI_PARAM1), Imm64(1ull << 61)); + TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1)); + SETcc(CC_NZ, R(out)); + break; + + case CR_EQ_BIT: // check bits 31-0 == 0 + CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm32(0)); + SETcc(CC_Z, R(out)); + break; + + case CR_GT_BIT: // check val > 0 + MOV(64, R(ABI_PARAM1), M(&PowerPC::ppcState.cr_val[field])); + TEST(64, R(ABI_PARAM1), R(ABI_PARAM1)); + SETcc(CC_G, R(out)); + break; + + case CR_LT_BIT: // check bit 62 set + MOV(64, R(ABI_PARAM1), Imm64(1ull << 62)); + TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1)); + SETcc(CC_NZ, R(out)); + break; + + default: + _assert_msg_(DYNA_REC, false, "Invalid CR bit"); + } +} + +void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) +{ + MOV(64, R(ABI_PARAM2), M(&PowerPC::ppcState.cr_val[field])); + TEST(8, R(in), Imm8(1)); + FixupBranch input_is_set = J_CC(CC_NZ, false); + + // New value is 0. + switch (bit) + { + case CR_SO_BIT: // unset bit 61 + MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 61))); + AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case CR_EQ_BIT: // set bit 0 to 1 + OR(8, R(ABI_PARAM2), Imm8(1)); + break; + + case CR_GT_BIT: // !GT, set bit 63 + MOV(64, R(ABI_PARAM1), Imm64(1ull << 63)); + OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case CR_LT_BIT: // !LT, unset bit 62 + MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 62))); + AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + } + + FixupBranch end = J(); + SetJumpTarget(input_is_set); + + switch (bit) + { + case CR_SO_BIT: // set bit 61 + MOV(64, R(ABI_PARAM1), Imm64(1ull << 61)); + OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case CR_EQ_BIT: // set bits 31-0 to 0 + MOV(64, R(ABI_PARAM1), Imm64(0xFFFFFFFF00000000)); + AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case CR_GT_BIT: // unset bit 63 + MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 63))); + AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case CR_LT_BIT: // set bit 62 + MOV(64, R(ABI_PARAM1), Imm64(1ull << 62)); + OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + } + + SetJumpTarget(end); + MOV(64, R(ABI_PARAM1), Imm64(1ull << 32)); + OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); + MOV(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM2)); +} + +FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) +{ + switch (bit) + { + case CR_SO_BIT: // check bit 61 set + MOV(64, R(RAX), Imm64(1ull << 61)); + TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(RAX)); + return J_CC(jump_if_set ? CC_NZ : CC_Z, true); + + case CR_EQ_BIT: // check bits 31-0 == 0 + CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm32(0)); + return J_CC(jump_if_set ? CC_Z : CC_NZ, true); + + case CR_GT_BIT: // check val > 0 + MOV(64, R(RAX), M(&PowerPC::ppcState.cr_val[field])); + TEST(64, R(RAX), R(RAX)); + return J_CC(jump_if_set ? CC_G : CC_LE, true); + + case CR_LT_BIT: // check bit 62 set + MOV(64, R(RAX), Imm64(1ull << 62)); + TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(RAX)); + return J_CC(jump_if_set ? CC_NZ : CC_Z, true); + + default: + _assert_msg_(DYNA_REC, false, "Invalid CR bit"); + } + + // Should never happen. + return FixupBranch(); +} + void Jit64::mtspr(UGeckoInstruction inst) { INSTRUCTION_START @@ -154,16 +278,47 @@ void Jit64::mfcr(UGeckoInstruction inst) int d = inst.RD; gpr.Lock(d); gpr.KillImmediate(d, false, true); - MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); + XOR(32, R(EAX), R(EAX)); - for (int i = 1; i < 8; i++) + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + X64Reg cr_val = ABI_PARAM1; + X64Reg tmp = ABI_PARAM2; + for (int i = 0; i < 8; i++) { - SHL(32, R(EAX), Imm8(4)); - OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i])); + if (i != 0) + SHL(32, R(EAX), Imm8(4)); + + MOV(64, R(cr_val), M(&PowerPC::ppcState.cr_val[i])); + + // SO: Bit 61 set. + MOV(64, R(tmp), R(cr_val)); + SHR(64, R(tmp), Imm8(61)); + AND(32, R(tmp), Imm8(1)); + OR(32, R(EAX), R(tmp)); + + // EQ: Bits 31-0 == 0. + XOR(32, R(tmp), R(tmp)); + TEST(32, R(cr_val), R(cr_val)); + SETcc(CC_Z, R(tmp)); + SHL(32, R(tmp), Imm8(1)); + OR(32, R(EAX), R(tmp)); + + // GT: Value > 0. + TEST(64, R(cr_val), R(cr_val)); + SETcc(CC_G, R(tmp)); + SHL(32, R(tmp), Imm8(2)); + OR(32, R(EAX), R(tmp)); + + // LT: Bit 62 set. + MOV(64, R(tmp), R(cr_val)); + SHR(64, R(tmp), Imm8(62 - 3)); + AND(32, R(tmp), Imm8(0x8)); + OR(32, R(EAX), R(tmp)); } MOV(32, gpr.R(d), R(EAX)); gpr.UnlockAll(); + gpr.UnlockAllX(); } void Jit64::mtcrf(UGeckoInstruction inst) @@ -182,7 +337,8 @@ void Jit64::mtcrf(UGeckoInstruction inst) if ((crm & (0x80 >> i)) != 0) { u8 newcr = (gpr.R(inst.RS).offset >> (28 - (i * 4))) & 0xF; - MOV(8, M(&PowerPC::ppcState.cr_fast[i]), Imm8(newcr)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(newcr))); + MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(RAX)); } } } @@ -190,17 +346,50 @@ void Jit64::mtcrf(UGeckoInstruction inst) { gpr.Lock(inst.RS); gpr.BindToRegister(inst.RS, true, false); + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); for (int i = 0; i < 8; i++) { if ((crm & (0x80 >> i)) != 0) { - MOV(32, R(EAX), gpr.R(inst.RS)); - SHR(32, R(EAX), Imm8(28 - (i * 4))); - AND(32, R(EAX), Imm32(0xF)); - MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(EAX)); + MOVZX(64, 32, EAX, gpr.R(inst.RS)); + SHR(64, R(EAX), Imm8(28 - (i * 4))); + AND(64, R(EAX), Imm32(0xF)); + + X64Reg cr_val = ABI_PARAM1; + X64Reg tmp = ABI_PARAM2; + + MOV(64, R(cr_val), Imm64(1ull << 32)); + + // SO + MOV(64, R(tmp), R(EAX)); + SHL(64, R(tmp), Imm8(63)); + SHR(64, R(tmp), Imm8(63 - 61)); + OR(64, R(cr_val), R(tmp)); + + // EQ + MOV(64, R(tmp), R(EAX)); + NOT(64, R(tmp)); + AND(64, R(tmp), Imm8(CR_EQ)); + OR(64, R(cr_val), R(tmp)); + + // GT + MOV(64, R(tmp), R(EAX)); + NOT(64, R(tmp)); + AND(64, R(tmp), Imm8(CR_GT)); + SHL(64, R(tmp), Imm8(63 - 2)); + OR(64, R(cr_val), R(tmp)); + + // LT + MOV(64, R(tmp), R(EAX)); + AND(64, R(tmp), Imm8(CR_LT)); + SHL(64, R(tmp), Imm8(62 - 3)); + OR(64, R(cr_val), R(tmp)); + + MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(cr_val)); } } gpr.UnlockAll(); + gpr.UnlockAllX(); } } } @@ -213,8 +402,8 @@ void Jit64::mcrf(UGeckoInstruction inst) // USES_CR if (inst.CRFS != inst.CRFD) { - MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRFS])); - MOV(8, M(&PowerPC::ppcState.cr_fast[inst.CRFD]), R(EAX)); + MOV(64, R(EAX), M(&PowerPC::ppcState.cr_val[inst.CRFS])); + MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(EAX)); } } @@ -226,9 +415,41 @@ void Jit64::mcrxr(UGeckoInstruction inst) // USES_CR // Copy XER[0-3] into CR[inst.CRFD] - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - SHR(32, R(EAX), Imm8(28)); - MOV(8, M(&PowerPC::ppcState.cr_fast[inst.CRFD]), R(EAX)); + MOVZX(64, 32, EAX, M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(64, R(EAX), Imm8(28)); + + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + X64Reg cr_val = ABI_PARAM1; + X64Reg tmp = ABI_PARAM2; + + MOV(64, R(cr_val), Imm64(1ull << 32)); + + // SO + MOV(64, R(tmp), R(EAX)); + SHL(64, R(tmp), Imm8(63)); + SHR(64, R(tmp), Imm8(63 - 61)); + OR(64, R(cr_val), R(tmp)); + + // EQ + MOV(64, R(tmp), R(EAX)); + AND(64, R(tmp), Imm8(0x2)); + OR(64, R(cr_val), R(tmp)); + + // GT + MOV(64, R(tmp), R(EAX)); + NOT(64, R(tmp)); + AND(64, R(tmp), Imm8(0x4)); + SHL(64, R(tmp), Imm8(63 - 2)); + OR(64, R(cr_val), R(tmp)); + + // LT + MOV(64, R(tmp), R(EAX)); + AND(64, R(tmp), Imm8(0x8)); + SHL(64, R(tmp), Imm8(62 - 3)); + OR(64, R(cr_val), R(tmp)); + + MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(cr_val)); + gpr.UnlockAllX(); // Clear XER[0-3] AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(0x0FFFFFFF)); @@ -240,70 +461,59 @@ void Jit64::crXXX(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); _dbg_assert_msg_(DYNA_REC, inst.OPCD == 19, "Invalid crXXX"); + // TODO(delroth): Potential optimizations could be applied here. For + // instance, if the two CR bits being loaded are the same, two loads are + // not required. + // USES_CR - // Get bit CRBA in EAX aligned with bit CRBD - int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); - MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRBA >> 2])); - if (shiftA < 0) - SHL(8, R(EAX), Imm8(-shiftA)); - else if (shiftA > 0) - SHR(8, R(EAX), Imm8(shiftA)); - - // Get bit CRBB in ECX aligned with bit CRBD - gpr.FlushLockX(ECX); - int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); - MOV(8, R(ECX), M(&PowerPC::ppcState.cr_fast[inst.CRBB >> 2])); - if (shiftB < 0) - SHL(8, R(ECX), Imm8(-shiftB)); - else if (shiftB > 0) - SHR(8, R(ECX), Imm8(shiftB)); + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), ABI_PARAM2); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), EAX); // Compute combined bit switch (inst.SUBOP10) { case 33: // crnor - OR(8, R(EAX), R(ECX)); + OR(8, R(EAX), R(ABI_PARAM2)); NOT(8, R(EAX)); break; case 129: // crandc - NOT(8, R(ECX)); - AND(8, R(EAX), R(ECX)); + NOT(8, R(ABI_PARAM2)); + AND(8, R(EAX), R(ABI_PARAM2)); break; case 193: // crxor - XOR(8, R(EAX), R(ECX)); + XOR(8, R(EAX), R(ABI_PARAM2)); break; case 225: // crnand - AND(8, R(EAX), R(ECX)); + AND(8, R(EAX), R(ABI_PARAM2)); NOT(8, R(EAX)); break; case 257: // crand - AND(8, R(EAX), R(ECX)); + AND(8, R(EAX), R(ABI_PARAM2)); break; case 289: // creqv - XOR(8, R(EAX), R(ECX)); + XOR(8, R(EAX), R(ABI_PARAM2)); NOT(8, R(EAX)); break; case 417: // crorc - NOT(8, R(ECX)); - OR(8, R(EAX), R(ECX)); + NOT(8, R(ABI_PARAM2)); + OR(8, R(EAX), R(ABI_PARAM2)); break; case 449: // cror - OR(8, R(EAX), R(ECX)); + OR(8, R(EAX), R(ABI_PARAM2)); break; } // Store result bit in CRBD - AND(8, R(EAX), Imm8(0x8 >> (inst.CRBD & 3))); - AND(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), Imm8(~(0x8 >> (inst.CRBD & 3)))); - OR(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), R(EAX)); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), EAX); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index c1f1aacd40..0870a0c32d 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -85,7 +85,7 @@ static unsigned regReadUse(RegInfo& R, InstLoc I) { return R.IInfo[I - R.FirstI] & 3; } -static unsigned SlotSet[1000]; +static u64 SlotSet[1000]; static u8 GC_ALIGNED16(FSlotSet[16*1000]); static OpArg regLocForSlot(RegInfo& RI, unsigned slot) { @@ -107,7 +107,7 @@ static void regSpill(RegInfo& RI, X64Reg reg) { unsigned slot = regGetSpill(RI, RI.regs[reg]); if (!slot) { slot = regCreateSpill(RI, RI.regs[reg]); - RI.Jit->MOV(32, regLocForSlot(RI, slot), R(reg)); + RI.Jit->MOV(64, regLocForSlot(RI, slot), R(reg)); } RI.regs[reg] = nullptr; } @@ -550,6 +550,48 @@ static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) { regNormalRegClear(RI, I); } +static void regEmitICmpCRInst(RegInfo& RI, InstLoc I) { + bool signed_compare = getOpcode(*I) == ICmpCRSigned; + X64Reg reg; + if (RI.IInfo[I - RI.FirstI] & 4) + { + reg = regEnsureInReg(RI, getOp1(I)); + if (signed_compare) + RI.Jit->MOVSX(64, 32, reg, R(reg)); + } + else + { + reg = regFindFreeReg(RI); + if (signed_compare) + RI.Jit->MOVSX(64, 32, reg, regLocForInst(RI, getOp1(I))); + else + RI.Jit->MOV(32, R(reg), regLocForInst(RI, getOp1(I))); + } + if (isImm(*getOp2(I))) + { + unsigned RHS = RI.Build->GetImmValue(getOp2(I)); + if (!signed_compare && (RHS & 0x80000000U)) + { + RI.Jit->MOV(32, R(EAX), Imm32(RHS)); + RI.Jit->SUB(64, R(reg), R(RAX)); + } + else if (RHS) + { + RI.Jit->SUB(64, R(reg), Imm32(RHS)); + } + } + else + { + if (signed_compare) + RI.Jit->MOVSX(64, 32, RAX, regLocForInst(RI, getOp2(I))); + else + RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp2(I))); + RI.Jit->SUB(64, R(reg), R(RAX)); + } + RI.regs[reg] = I; + regNormalRegClear(RI, I); +} + static void regWriteExit(RegInfo& RI, InstLoc dest) { if (isImm(*dest)) { RI.exitNumber++; @@ -621,6 +663,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { case FPDup1: case FSNeg: case FDNeg: + case ConvertFromFastCR: + case ConvertToFastCR: + case FastCRSOSet: + case FastCREQSet: + case FastCRGTSet: + case FastCRLTSet: if (thisUsed) regMarkUse(RI, I, getOp1(I), 1); break; @@ -715,9 +763,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regMarkUse(RI, I, getOp1(getOp1(I)), 1); break; case BranchCond: { - if (isICmp(*getOp1(I)) && - isImm(*getOp2(getOp1(I)))) { + if (isICmp(*getOp1(I))) { regMarkUse(RI, I, getOp1(getOp1(I)), 1); + if (!isImm(*getOp2(getOp1(I)))) + regMarkUse(RI, I, getOp2(getOp1(I)), 2); } else { regMarkUse(RI, I, getOp1(I), 1); } @@ -763,7 +812,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { if (!thisUsed) break; X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; - Jit->MOVZX(32, 8, reg, M(&PowerPC::ppcState.cr_fast[ppcreg])); + Jit->MOV(64, R(reg), M(&PowerPC::ppcState.cr_val[ppcreg])); RI.regs[reg] = I; break; } @@ -813,10 +862,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { break; } case StoreCR: { - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); + X64Reg reg = regEnsureInReg(RI, getOp1(I)); unsigned ppcreg = *I >> 16; - // CAUTION: uses 8-bit reg! - Jit->MOV(8, M(&PowerPC::ppcState.cr_fast[ppcreg]), R(ECX)); + Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(reg)); regNormalRegClear(RI, I); break; } @@ -1076,40 +1124,138 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regEmitICmpInst(RI, I, CC_LE); break; } - case ICmpCRUnsigned: { + case ICmpCRUnsigned: + { if (!thisUsed) break; - regEmitCmp(RI, I); - X64Reg reg = regBinReg(RI, I); - FixupBranch pLesser = Jit->J_CC(CC_B); - FixupBranch pGreater = Jit->J_CC(CC_A); - Jit->MOV(32, R(reg), Imm32(0x2)); // _x86Reg == 0 - FixupBranch continue1 = Jit->J(); - Jit->SetJumpTarget(pGreater); - Jit->MOV(32, R(reg), Imm32(0x4)); // _x86Reg > 0 - FixupBranch continue2 = Jit->J(); - Jit->SetJumpTarget(pLesser); - Jit->MOV(32, R(reg), Imm32(0x8)); // _x86Reg < 0 - Jit->SetJumpTarget(continue1); - Jit->SetJumpTarget(continue2); + regEmitICmpCRInst(RI, I); + break; + } + case ICmpCRSigned: + { + if (!thisUsed) break; + regEmitICmpCRInst(RI, I); + break; + } + case ConvertFromFastCR: + { + if (!thisUsed) break; + X64Reg cr_val = regUReg(RI, I); + Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); + + Jit->XOR(32, R(EAX), R(EAX)); + + // SO: Bit 61 set. + Jit->MOV(64, R(RCX), R(cr_val)); + Jit->SHR(64, R(RCX), Imm8(61)); + Jit->AND(32, R(ECX), Imm8(1)); + Jit->OR(32, R(EAX), R(ECX)); + + // EQ: Bits 31-0 == 0. + Jit->XOR(32, R(ECX), R(ECX)); + Jit->TEST(32, R(cr_val), R(cr_val)); + Jit->SETcc(CC_Z, R(ECX)); + Jit->SHL(32, R(ECX), Imm8(1)); + Jit->OR(32, R(EAX), R(ECX)); + + // GT: Value > 0. + Jit->XOR(32, R(ECX), R(ECX)); + Jit->TEST(64, R(cr_val), R(cr_val)); + Jit->SETcc(CC_G, R(ECX)); + Jit->SHL(32, R(ECX), Imm8(2)); + Jit->OR(32, R(EAX), R(ECX)); + + // LT: Bit 62 set. + Jit->MOV(64, R(ECX), R(cr_val)); + Jit->SHR(64, R(ECX), Imm8(62 - 3)); + Jit->AND(32, R(ECX), Imm8(0x8)); + Jit->OR(32, R(EAX), R(ECX)); + + Jit->MOV(32, R(cr_val), R(EAX)); + RI.regs[cr_val] = I; + regNormalRegClear(RI, I); + break; + } + case ConvertToFastCR: + { + if (!thisUsed) break; + X64Reg cr_val = regUReg(RI, I); + Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); + + Jit->MOV(64, R(RCX), Imm64(1ull << 32)); + + // SO + Jit->MOV(64, R(RAX), R(cr_val)); + Jit->SHL(64, R(RAX), Imm8(63)); + Jit->SHR(64, R(RAX), Imm8(63 - 61)); + Jit->OR(64, R(RCX), R(RAX)); + + // EQ + Jit->MOV(64, R(RAX), R(cr_val)); + Jit->NOT(64, R(RAX)); + Jit->AND(64, R(RAX), Imm8(CR_EQ)); + Jit->OR(64, R(RCX), R(RAX)); + + // GT + Jit->MOV(64, R(RAX), R(cr_val)); + Jit->NOT(64, R(RAX)); + Jit->AND(64, R(RAX), Imm8(CR_GT)); + Jit->SHL(64, R(RAX), Imm8(63 - 2)); + Jit->OR(64, R(RCX), R(RAX)); + + // LT + Jit->MOV(64, R(RAX), R(cr_val)); + Jit->AND(64, R(RAX), Imm8(CR_LT)); + Jit->SHL(64, R(RAX), Imm8(62 - 3)); + Jit->OR(64, R(RCX), R(RAX)); + + Jit->MOV(64, R(cr_val), R(RCX)); + + RI.regs[cr_val] = I; + regNormalRegClear(RI, I); + break; + } + case FastCRSOSet: + { + if (!thisUsed) break; + X64Reg reg = regUReg(RI, I); + Jit->MOV(64, R(RAX), Imm64(1ull << 61)); + Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); + Jit->SETcc(CC_NZ, R(AL)); + Jit->MOVZX(32, 8, reg, R(AL)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; } - case ICmpCRSigned: { + case FastCREQSet: + { if (!thisUsed) break; - regEmitCmp(RI, I); - X64Reg reg = regBinReg(RI, I); - FixupBranch pLesser = Jit->J_CC(CC_L); - FixupBranch pGreater = Jit->J_CC(CC_G); - Jit->MOV(32, R(reg), Imm32(0x2)); // _x86Reg == 0 - FixupBranch continue1 = Jit->J(); - Jit->SetJumpTarget(pGreater); - Jit->MOV(32, R(reg), Imm32(0x4)); // _x86Reg > 0 - FixupBranch continue2 = Jit->J(); - Jit->SetJumpTarget(pLesser); - Jit->MOV(32, R(reg), Imm32(0x8)); // _x86Reg < 0 - Jit->SetJumpTarget(continue1); - Jit->SetJumpTarget(continue2); + X64Reg reg = regUReg(RI, I); + Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(0)); + Jit->SETcc(CC_Z, R(AL)); + Jit->MOVZX(32, 8, reg, R(AL)); + RI.regs[reg] = I; + regNormalRegClear(RI, I); + break; + } + case FastCRGTSet: + { + if (!thisUsed) break; + X64Reg reg = regUReg(RI, I); + Jit->CMP(64, regLocForInst(RI, getOp1(I)), Imm8(0)); + Jit->SETcc(CC_G, R(AL)); + Jit->MOVZX(32, 8, reg, R(AL)); + RI.regs[reg] = I; + regNormalRegClear(RI, I); + break; + } + case FastCRLTSet: + { + if (!thisUsed) break; + X64Reg reg = regUReg(RI, I); + Jit->MOV(64, R(RAX), Imm64(1ull << 62)); + Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); + Jit->SETcc(CC_NZ, R(AL)); + Jit->MOVZX(32, 8, reg, R(AL)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; @@ -1538,7 +1684,13 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { case CInt16: { if (!thisUsed) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), Imm32(ibuild->GetImmValue(I))); + u64 val = ibuild->GetImmValue64(I); + if ((u32)val == val) + Jit->MOV(32, R(reg), Imm32(val)); + else if ((s32)val == val) + Jit->MOV(64, R(reg), Imm32(val)); + else + Jit->MOV(64, R(reg), Imm64(val)); RI.regs[reg] = I; break; } @@ -1566,17 +1718,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { } case BranchCond: { - if (isICmp(*getOp1(I)) && - isImm(*getOp2(getOp1(I)))) { - Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))), - Imm32(RI.Build->GetImmValue(getOp2(getOp1(I))))); + if (isICmp(*getOp1(I))) { + regEmitCmp(RI, getOp1(I)); CCFlags flag; switch (getOpcode(*getOp1(I))) { case ICmpEq: flag = CC_NE; break; case ICmpNe: flag = CC_E; break; case ICmpUgt: flag = CC_BE; break; case ICmpUlt: flag = CC_AE; break; - case ICmpUge: flag = CC_L; break; + case ICmpUge: flag = CC_B; break; case ICmpUle: flag = CC_A; break; case ICmpSgt: flag = CC_LE; break; case ICmpSlt: flag = CC_GE; break; @@ -1589,7 +1739,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->SetJumpTarget(cont); if (RI.IInfo[I - RI.FirstI] & 4) regClearInst(RI, getOp1(getOp1(I))); - } else { + if (RI.IInfo[I - RI.FirstI] & 8) + regClearInst(RI, getOp2(getOp1(I))); + } + else { Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm8(0)); FixupBranch cont = Jit->J_CC(CC_Z); regWriteExit(RI, getOp2(I)); diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index a3cb619f62..e86a829ab0 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -477,9 +477,9 @@ void JitIL::Trace() } #endif - DEBUG_LOG(DYNA_REC, "JITIL PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s", - PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], - PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, + DEBUG_LOG(DYNA_REC, "JITIL PC: %08x SRR0: %08x SRR1: %08x CRval: %016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx FPSCR: %08x MSR: %08x LR: %08x %s %s", + PC, SRR0, SRR1, PowerPC::ppcState.cr_val[0], PowerPC::ppcState.cr_val[1], PowerPC::ppcState.cr_val[2], PowerPC::ppcState.cr_val[3], + PowerPC::ppcState.cr_val[4], PowerPC::ppcState.cr_val[5], PowerPC::ppcState.cr_val[6], PowerPC::ppcState.cr_val[7], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); } diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp index afe4d490cc..97ef0644ec 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp @@ -233,31 +233,25 @@ void JitArm::SingleStep() void JitArm::Trace() { - char regs[500] = ""; - char fregs[750] = ""; + std::string regs; + std::string fregs; #ifdef JIT_LOG_GPR for (int i = 0; i < 32; i++) { - char reg[50]; - sprintf(reg, "r%02d: %08x ", i, PowerPC::ppcState.gpr[i]); - strncat(regs, reg, sizeof(regs) - 1); + regs += StringFromFormat("r%02d: %08x ", i, PowerPC::ppcState.gpr[i]); } #endif #ifdef JIT_LOG_FPR for (int i = 0; i < 32; i++) { - char reg[50]; - sprintf(reg, "f%02d: %016x ", i, riPS0(i)); - strncat(fregs, reg, sizeof(fregs) - 1); + fregs += StringFromFormat("f%02d: %016x ", i, riPS0(i)); } #endif - DEBUG_LOG(DYNA_REC, "JITARM PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s", - PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], - PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, - PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs, fregs); + DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x FPSCR: %08x MSR: %08x LR: %08x %s %s", + PC, SRR0, SRR1, PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); } void JitArm::PrintDebug(UGeckoInstruction inst, u32 level) diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.h b/Source/Core/Core/PowerPC/JitArm32/Jit.h index 30681d63e5..c4cfcaa115 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.h @@ -50,6 +50,8 @@ private: void Helper_UpdateCR1(ARMReg fpscr, ARMReg temp); void SetFPException(ARMReg Reg, u32 Exception); + + FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); public: JitArm() : code_buffer(32000) {} ~JitArm() {} @@ -96,8 +98,7 @@ public: void WriteCallInterpreter(UGeckoInstruction _inst); void Cleanup(); - void GenerateRC(int cr = 0); - void ComputeRC(int cr = 0); + void ComputeRC(ARMReg value, int cr = 0); void ComputeRC(s32 value, int cr); void ComputeCarry(); @@ -143,8 +144,6 @@ public: void cntlzwx(UGeckoInstruction _inst); void cmp (UGeckoInstruction _inst); void cmpi(UGeckoInstruction _inst); - void cmpl(UGeckoInstruction _inst); - void cmpli(UGeckoInstruction _inst); void negx(UGeckoInstruction _inst); void mulhwux(UGeckoInstruction _inst); void rlwimix(UGeckoInstruction _inst); @@ -160,13 +159,9 @@ public: void mtspr(UGeckoInstruction _inst); void mfspr(UGeckoInstruction _inst); void mftb(UGeckoInstruction _inst); - void crXXX(UGeckoInstruction _inst); void mcrf(UGeckoInstruction _inst); - void mfcr(UGeckoInstruction _inst); - void mtcrf(UGeckoInstruction _inst); void mtsr(UGeckoInstruction _inst); void mfsr(UGeckoInstruction _inst); - void mcrxr(UGeckoInstruction _inst); void twx(UGeckoInstruction _inst); // LoadStore @@ -193,8 +188,6 @@ public: void fmaddx(UGeckoInstruction _inst); void fctiwx(UGeckoInstruction _inst); void fctiwzx(UGeckoInstruction _inst); - void fcmpo(UGeckoInstruction _inst); - void fcmpu(UGeckoInstruction _inst); void fnmaddx(UGeckoInstruction _inst); void fnmaddsx(UGeckoInstruction _inst); void fresx(UGeckoInstruction _inst); @@ -232,10 +225,6 @@ public: void ps_nabs(UGeckoInstruction _inst); void ps_rsqrte(UGeckoInstruction _inst); void ps_sel(UGeckoInstruction _inst); - void ps_cmpu0(UGeckoInstruction _inst); - void ps_cmpu1(UGeckoInstruction _inst); - void ps_cmpo0(UGeckoInstruction _inst); - void ps_cmpo1(UGeckoInstruction _inst); // LoadStore paired void psq_l(UGeckoInstruction _inst); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp index de76f030f6..09d4b33acb 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp @@ -16,15 +16,6 @@ // The branches are known good, or at least reasonably good. // No need for a disable-mechanism. -// If defined, clears CR0 at blr and bl-s. If the assumption that -// flags never carry over between functions holds, then the task for -// an optimizer becomes much easier. - -// #define ACID_TEST - -// Zelda and many more games seem to pass the Acid Test. - - using namespace ArmGen; void JitArm::sc(UGeckoInstruction inst) { @@ -121,13 +112,7 @@ void JitArm::bx(UGeckoInstruction inst) destination = SignExt26(inst.LI << 2); else destination = js.compilerPC + SignExt26(inst.LI << 2); -#ifdef ACID_TEST - if (inst.LK) - { - MOV(R14, 0); - STRB(R14, R9, PPCSTATE_OFF(cr_fast[0])); - } -#endif + if (destination == js.compilerPC) { //PanicAlert("Idle loop detected at %08x", destination); @@ -168,15 +153,10 @@ void JitArm::bcx(UGeckoInstruction inst) FixupBranch pConditionDontBranch; if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit { - LDRB(rA, R9, PPCSTATE_OFF(cr_fast) + (inst.BI >> 2)); - TST(rA, 8 >> (inst.BI & 3)); - - //TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch - pConditionDontBranch = B_CC(CC_EQ); // Zero - else - pConditionDontBranch = B_CC(CC_NEQ); // Not Zero + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } + if (inst.LK) { u32 Jumpto = js.compilerPC + 4; @@ -240,20 +220,13 @@ void JitArm::bcctrx(UGeckoInstruction inst) else { // Rare condition seen in (just some versions of?) Nintendo's NES Emulator - // BO_2 == 001zy -> b if false // BO_2 == 011zy -> b if true ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); - LDRB(rA, R9, PPCSTATE_OFF(cr_fast) + (inst.BI >> 2)); - TST(rA, 8 >> (inst.BI & 3)); - CCFlags branch; - if (inst.BO_2 & BO_BRANCH_IF_TRUE) - branch = CC_EQ; - else - branch = CC_NEQ; - FixupBranch b = B_CC(branch); + FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); LDR(rA, R9, PPCSTATE_OFF(spr[SPR_CTR])); BIC(rA, rA, 0x3); @@ -304,25 +277,10 @@ void JitArm::bclrx(UGeckoInstruction inst) FixupBranch pConditionDontBranch; if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit { - LDRB(rA, R9, PPCSTATE_OFF(cr_fast) + (inst.BI >> 2)); - TST(rA, 8 >> (inst.BI & 3)); - //TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch - pConditionDontBranch = B_CC(CC_EQ); // Zero - else - pConditionDontBranch = B_CC(CC_NEQ); // Not Zero + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } - // This below line can be used to prove that blr "eats flags" in practice. - // This observation will let us do a lot of fun observations. - #ifdef ACID_TEST - if (inst.LK) - { - MOV(R14, 0); - STRB(R14, R9, PPCSTATE_OFF(cr_fast[0])); - } - #endif - //MOV(32, R(EAX), M(&LR)); //AND(32, R(EAX), Imm32(0xFFFFFFFC)); LDR(rA, R9, PPCSTATE_OFF(spr[SPR_LR])); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp index 6bd36cff2c..79a0b79885 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp @@ -19,8 +19,6 @@ void JitArm::Helper_UpdateCR1(ARMReg fpscr, ARMReg temp) { - UBFX(temp, fpscr, 28, 4); - STRB(temp, R9, PPCSTATE_OFF(cr_fast[1])); } void JitArm::fctiwx(UGeckoInstruction inst) @@ -129,7 +127,6 @@ void JitArm::fctiwx(UGeckoInstruction inst) fpr.Unlock(V2); } - void JitArm::fctiwzx(UGeckoInstruction inst) { INSTRUCTION_START @@ -210,136 +207,6 @@ void JitArm::fctiwzx(UGeckoInstruction inst) fpr.Unlock(V2); } -void JitArm::fcmpo(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - u32 a = inst.FA, b = inst.FB; - int cr = inst.CRFD; - - ARMReg vA = fpr.R0(a); - ARMReg vB = fpr.R0(b); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg crReg = gpr.GetReg(); - Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 - Operand2 LessThan(0x8, 0xA); // 0x8000 - Operand2 GreaterThan(0x4, 0xA); // 0x4000 - Operand2 EqualTo(0x2, 0xA); // 0x2000 - Operand2 NANRes(0x1, 0xA); // 0x1000 - FixupBranch Done1, Done2, Done3; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - BIC(fpscrReg, fpscrReg, FPRFMask); - - VCMPE(vA, vB); - VMRS(_PC); - SetCC(CC_LT); - ORR(fpscrReg, fpscrReg, LessThan); - MOV(crReg, 8); - Done1 = B(); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, GreaterThan); - MOV(crReg, 4); - Done2 = B(); - SetCC(CC_EQ); - ORR(fpscrReg, fpscrReg, EqualTo); - MOV(crReg, 2); - Done3 = B(); - SetCC(); - - ORR(fpscrReg, fpscrReg, NANRes); - MOV(crReg, 1); - - VCMPE(vA, vA); - VMRS(_PC); - FixupBranch NanA = B_CC(CC_NEQ); - VCMPE(vB, vB); - VMRS(_PC); - FixupBranch NanB = B_CC(CC_NEQ); - - SetFPException(fpscrReg, FPSCR_VXVC); - FixupBranch Done4 = B(); - - SetJumpTarget(NanA); - SetJumpTarget(NanB); - - SetFPException(fpscrReg, FPSCR_VXSNAN); - - TST(fpscrReg, VEMask); - - FixupBranch noVXVC = B_CC(CC_NEQ); - SetFPException(fpscrReg, FPSCR_VXVC); - - SetJumpTarget(noVXVC); - SetJumpTarget(Done1); - SetJumpTarget(Done2); - SetJumpTarget(Done3); - SetJumpTarget(Done4); - STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, crReg); -} - -void JitArm::fcmpu(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - u32 a = inst.FA, b = inst.FB; - int cr = inst.CRFD; - - ARMReg vA = fpr.R0(a); - ARMReg vB = fpr.R0(b); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg crReg = gpr.GetReg(); - Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 - Operand2 LessThan(0x8, 0xA); // 0x8000 - Operand2 GreaterThan(0x4, 0xA); // 0x4000 - Operand2 EqualTo(0x2, 0xA); // 0x2000 - Operand2 NANRes(0x1, 0xA); // 0x1000 - FixupBranch Done1, Done2, Done3; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - BIC(fpscrReg, fpscrReg, FPRFMask); - - VCMPE(vA, vB); - VMRS(_PC); - SetCC(CC_LT); - ORR(fpscrReg, fpscrReg, LessThan); - MOV(crReg, 8); - Done1 = B(); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, GreaterThan); - MOV(crReg, 4); - Done2 = B(); - SetCC(CC_EQ); - ORR(fpscrReg, fpscrReg, EqualTo); - MOV(crReg, 2); - Done3 = B(); - SetCC(); - - ORR(fpscrReg, fpscrReg, NANRes); - MOV(crReg, 1); - - VCMPE(vA, vA); - VMRS(_PC); - FixupBranch NanA = B_CC(CC_NEQ); - VCMPE(vB, vB); - VMRS(_PC); - FixupBranch NanB = B_CC(CC_NEQ); - FixupBranch Done4 = B(); - - SetJumpTarget(NanA); - SetJumpTarget(NanB); - - SetFPException(fpscrReg, FPSCR_VXSNAN); - - SetJumpTarget(Done1); - SetJumpTarget(Done2); - SetJumpTarget(Done3); - SetJumpTarget(Done4); - STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, crReg); -} - void JitArm::fabsx(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp index 5f47aefb34..6f7e6854dc 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp @@ -14,42 +14,27 @@ #include "Core/PowerPC/JitArm32/JitAsm.h" #include "Core/PowerPC/JitArm32/JitRegCache.h" -// Assumes that Sign and Zero flags were set by the last operation. Preserves all flags and registers. -// Jit64 ComputerRC is signed -// JIT64 GenerateRC is unsigned -void JitArm::GenerateRC(int cr) { +void JitArm::ComputeRC(ARMReg value, int cr) { ARMReg rB = gpr.GetReg(); - MOV(rB, 0x4); // Result > 0 - SetCC(CC_EQ); MOV(rB, 0x2); // Result == 0 - SetCC(CC_MI); MOV(rB, 0x8); // Result < 0 - SetCC(); + Operand2 ASRReg(value, ST_ASR, 31); - STRB(rB, R9, PPCSTATE_OFF(cr_fast) + cr); - gpr.Unlock(rB); -} -void JitArm::ComputeRC(int cr) { - ARMReg rB = gpr.GetReg(); + STR(value, R9, PPCSTATE_OFF(cr_val[cr])); + MOV(rB, ASRReg); + STR(rB, R9, PPCSTATE_OFF(cr_val[cr]) + sizeof(u32)); - MOV(rB, 0x2); // Result == 0 - SetCC(CC_LT); MOV(rB, 0x8); // Result < 0 - SetCC(CC_GT); MOV(rB, 0x4); // Result > 0 - SetCC(); - - STRB(rB, R9, PPCSTATE_OFF(cr_fast) + cr); gpr.Unlock(rB); } void JitArm::ComputeRC(s32 value, int cr) { ARMReg rB = gpr.GetReg(); - if (value < 0) - MOV(rB, 0x8); - else if (value > 0) - MOV(rB, 0x4); - else - MOV(rB, 0x2); + Operand2 ASRReg(rB, ST_ASR, 31); + + MOVI2R(rB, value); + STR(rB, R9, PPCSTATE_OFF(cr_val[cr])); + MOV(rB, ASRReg); + STR(rB, R9, PPCSTATE_OFF(cr_val[cr]) + sizeof(u32)); - STRB(rB, R9, PPCSTATE_OFF(cr_fast) + cr); gpr.Unlock(rB); } @@ -195,7 +180,6 @@ void JitArm::arith(UGeckoInstruction inst) u32 Imm[2] = {0, 0}; bool Rc = false; bool carry = false; - bool isUnsigned = false; bool shiftedImm = false; switch (inst.OPCD) @@ -306,7 +290,6 @@ void JitArm::arith(UGeckoInstruction inst) case 522: // addcox carry = true; case 40: // subfx - isUnsigned = true; case 235: // mullwx case 266: case 747: // mullwox @@ -431,6 +414,8 @@ void JitArm::arith(UGeckoInstruction inst) if (Rc) ComputeRC(gpr.GetImm(dest), 0); return; } + + u32 dest = d; // One or the other isn't a IMM switch (inst.OPCD) { @@ -472,6 +457,7 @@ void JitArm::arith(UGeckoInstruction inst) case 24: case 25: { + dest = a; ARMReg rA = gpr.GetReg(); RS = gpr.R(s); RA = gpr.R(a); @@ -483,6 +469,7 @@ void JitArm::arith(UGeckoInstruction inst) case 26: case 27: { + dest = a; ARMReg rA = gpr.GetReg(); RS = gpr.R(s); RA = gpr.R(a); @@ -495,6 +482,7 @@ void JitArm::arith(UGeckoInstruction inst) case 28: case 29: { + dest = a; ARMReg rA = gpr.GetReg(); RS = gpr.R(s); RA = gpr.R(a); @@ -507,12 +495,14 @@ void JitArm::arith(UGeckoInstruction inst) switch (inst.SUBOP10) { case 24: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); LSLS(RA, RS, RB); break; case 28: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); @@ -525,12 +515,14 @@ void JitArm::arith(UGeckoInstruction inst) SUBS(RD, RB, RA); break; case 60: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); BICS(RA, RS, RB); break; case 124: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); @@ -545,6 +537,7 @@ void JitArm::arith(UGeckoInstruction inst) MULS(RD, RA, RB); break; case 284: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); @@ -552,6 +545,7 @@ void JitArm::arith(UGeckoInstruction inst) MVNS(RA, RA); break; case 316: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); @@ -559,6 +553,7 @@ void JitArm::arith(UGeckoInstruction inst) break; case 412: { + dest = a; ARMReg rA = gpr.GetReg(); RA = gpr.R(a); RS = gpr.R(s); @@ -569,12 +564,14 @@ void JitArm::arith(UGeckoInstruction inst) } break; case 444: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); ORRS(RA, RS, RB); break; case 476: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); @@ -582,12 +579,14 @@ void JitArm::arith(UGeckoInstruction inst) MVNS(RA, RA); break; case 536: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); LSRS(RA, RS, RB); break; case 792: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); @@ -605,7 +604,7 @@ void JitArm::arith(UGeckoInstruction inst) break; } if (carry) ComputeCarry(); - if (Rc) isUnsigned ? GenerateRC() : ComputeRC(); + if (Rc) ComputeRC(gpr.R(dest)); } void JitArm::addex(UGeckoInstruction inst) @@ -624,7 +623,7 @@ void JitArm::addex(UGeckoInstruction inst) GetCarryAndClear(rA); ADDS(RD, RA, RB); FinalizeCarry(rA); - if (inst.Rc) ComputeRC(); + if (inst.Rc) ComputeRC(RD); gpr.Unlock(rA); } @@ -638,10 +637,7 @@ void JitArm::cntlzwx(UGeckoInstruction inst) ARMReg RS = gpr.R(s); CLZ(RA, RS); if (inst.Rc) - { - CMP(RA, 0); - ComputeRC(); - } + ComputeRC(RA); } void JitArm::mulhwux(UGeckoInstruction inst) @@ -655,8 +651,8 @@ void JitArm::mulhwux(UGeckoInstruction inst) ARMReg RB = gpr.R(b); ARMReg RD = gpr.R(d); ARMReg rA = gpr.GetReg(false); - UMULLS(rA, RD, RA, RB); - if (inst.Rc) ComputeRC(); + UMULL(rA, RD, RA, RB); + if (inst.Rc) ComputeRC(RD); } void JitArm::extshx(UGeckoInstruction inst) @@ -674,10 +670,8 @@ void JitArm::extshx(UGeckoInstruction inst) ARMReg rA = gpr.R(a); ARMReg rS = gpr.R(s); SXTH(rA, rS); - if (inst.Rc){ - CMP(rA, 0); - ComputeRC(); - } + if (inst.Rc) + ComputeRC(rA); } void JitArm::extsbx(UGeckoInstruction inst) { @@ -694,10 +688,8 @@ void JitArm::extsbx(UGeckoInstruction inst) ARMReg rA = gpr.R(a); ARMReg rS = gpr.R(s); SXTB(rA, rS); - if (inst.Rc){ - CMP(rA, 0); - ComputeRC(); - } + if (inst.Rc) + ComputeRC(rA); } void JitArm::cmp (UGeckoInstruction inst) { @@ -713,11 +705,7 @@ void JitArm::cmp (UGeckoInstruction inst) return; } - ARMReg RA = gpr.R(a); - ARMReg RB = gpr.R(b); - CMP(RA, RB); - - ComputeRC(crf); + FALLBACK_IF(true); } void JitArm::cmpi(UGeckoInstruction inst) { @@ -726,71 +714,12 @@ void JitArm::cmpi(UGeckoInstruction inst) u32 a = inst.RA; int crf = inst.CRFD; if (gpr.IsImm(a)) + { ComputeRC((s32)gpr.GetImm(a) - inst.SIMM_16, crf); - else - { - ARMReg RA = gpr.R(a); - if (inst.SIMM_16 >= 0 && inst.SIMM_16 < 256) - CMP(RA, inst.SIMM_16); - else - { - ARMReg rA = gpr.GetReg(); - MOVI2R(rA, inst.SIMM_16); - CMP(RA, rA); - gpr.Unlock(rA); - } - ComputeRC(crf); + return; } -} -void JitArm::cmpl(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - ARMReg RA = gpr.R(inst.RA); - ARMReg RB = gpr.R(inst.RB); - ARMReg rA = gpr.GetReg(); - int crf = inst.CRFD; - - CMP(RA, RB); - // Unsigned GenerateRC() - - MOV(rA, 0x2); // Result == 0 - SetCC(CC_LO); MOV(rA, 0x8); // Result < 0 - SetCC(CC_HI); MOV(rA, 0x4); // Result > 0 - SetCC(); - - STRB(rA, R9, PPCSTATE_OFF(cr_fast) + crf); - gpr.Unlock(rA); -} - -void JitArm::cmpli(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - ARMReg RA = gpr.R(inst.RA); - ARMReg rA = gpr.GetReg(); - int crf = inst.CRFD; - u32 uimm = (u32)inst.UIMM; - if (uimm < 256) - { - CMP(RA, uimm); - } - else - { - MOVI2R(rA, (u32)inst.UIMM); - CMP(RA, rA); - } - // Unsigned GenerateRC() - - MOV(rA, 0x2); // Result == 0 - SetCC(CC_LO); MOV(rA, 0x8); // Result < 0 - SetCC(CC_HI); MOV(rA, 0x4); // Result > 0 - SetCC(); - - STRB(rA, R9, PPCSTATE_OFF(cr_fast) + crf); - gpr.Unlock(rA); + FALLBACK_IF(true); } void JitArm::negx(UGeckoInstruction inst) @@ -801,11 +730,10 @@ void JitArm::negx(UGeckoInstruction inst) ARMReg RA = gpr.R(inst.RA); ARMReg RD = gpr.R(inst.RD); - RSBS(RD, RA, 0); + RSB(RD, RA, 0); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(RD); + if (inst.OE) { BKPT(0x333); @@ -825,19 +753,12 @@ void JitArm::rlwimix(UGeckoInstruction inst) MOVI2R(rA, mask); Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it. + BIC (rB, RA, rA); // RA & ~mask + AND (rA, rA, Shift); + ORR(RA, rB, rA); + if (inst.Rc) - { - BIC (rB, RA, rA); // RA & ~mask - AND (rA, rA, Shift); - ORRS(RA, rB, rA); - GenerateRC(); - } - else - { - BIC (rB, RA, rA); // RA & ~mask - AND (rA, rA, Shift); - ORR(RA, rB, rA); - } + ComputeRC(RA); gpr.Unlock(rA, rB); } @@ -853,13 +774,10 @@ void JitArm::rlwinmx(UGeckoInstruction inst) MOVI2R(rA, mask); Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it. + AND(RA, rA, Shift); + if (inst.Rc) - { - ANDS(RA, rA, Shift); - GenerateRC(); - } - else - AND (RA, rA, Shift); + ComputeRC(RA); gpr.Unlock(rA); //m_GPR[inst.RA] = _rotl(m_GPR[inst.RS],inst.SH) & mask; @@ -882,13 +800,10 @@ void JitArm::rlwnmx(UGeckoInstruction inst) SUB(rB, rB, RB); Operand2 Shift(RS, ST_ROR, rB); // Register shifted register + AND(RA, rA, Shift); + if (inst.Rc) - { - ANDS(RA, rA, Shift); - GenerateRC(); - } - else - AND (RA, rA, Shift); + ComputeRC(RA); gpr.Unlock(rA, rB); } @@ -908,9 +823,9 @@ void JitArm::srawix(UGeckoInstruction inst) Operand2 mask = Operand2(2, 2); // XER_CA_MASK MOV(tmp, RS); - ASRS(RA, RS, amount); + ASR(RA, RS, amount); if (inst.Rc) - GenerateRC(); + ComputeRC(RA); LSL(tmp, tmp, 32 - amount); TST(tmp, RA); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Paired.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Paired.cpp index 4584058206..520e243327 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Paired.cpp @@ -611,263 +611,4 @@ void JitArm::ps_nabs(UGeckoInstruction inst) VABS(vD1, vB1); VNEG(vD1, vD1); } -void JitArm::ps_cmpu0(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - u32 a = inst.FA, b = inst.FB; - int cr = inst.CRFD; - - ARMReg vA = fpr.R0(a); - ARMReg vB = fpr.R0(b); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg crReg = gpr.GetReg(); - Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 - Operand2 LessThan(0x8, 0xA); // 0x8000 - Operand2 GreaterThan(0x4, 0xA); // 0x4000 - Operand2 EqualTo(0x2, 0xA); // 0x2000 - Operand2 NANRes(0x1, 0xA); // 0x1000 - FixupBranch Done1, Done2, Done3; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - BIC(fpscrReg, fpscrReg, FPRFMask); - - VCMPE(vA, vB); - VMRS(_PC); - SetCC(CC_LT); - ORR(fpscrReg, fpscrReg, LessThan); - MOV(crReg, 8); - Done1 = B(); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, GreaterThan); - MOV(crReg, 4); - Done2 = B(); - SetCC(CC_EQ); - ORR(fpscrReg, fpscrReg, EqualTo); - MOV(crReg, 2); - Done3 = B(); - SetCC(); - - ORR(fpscrReg, fpscrReg, NANRes); - MOV(crReg, 1); - - VCMPE(vA, vA); - VMRS(_PC); - FixupBranch NanA = B_CC(CC_NEQ); - VCMPE(vB, vB); - VMRS(_PC); - FixupBranch NanB = B_CC(CC_NEQ); - FixupBranch Done4 = B(); - - SetJumpTarget(NanA); - SetJumpTarget(NanB); - - SetFPException(fpscrReg, FPSCR_VXSNAN); - - SetJumpTarget(Done1); - SetJumpTarget(Done2); - SetJumpTarget(Done3); - SetJumpTarget(Done4); - STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, crReg); -} - -void JitArm::ps_cmpu1(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - u32 a = inst.FA, b = inst.FB; - int cr = inst.CRFD; - - ARMReg vA = fpr.R1(a); - ARMReg vB = fpr.R1(b); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg crReg = gpr.GetReg(); - Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 - Operand2 LessThan(0x8, 0xA); // 0x8000 - Operand2 GreaterThan(0x4, 0xA); // 0x4000 - Operand2 EqualTo(0x2, 0xA); // 0x2000 - Operand2 NANRes(0x1, 0xA); // 0x1000 - FixupBranch Done1, Done2, Done3; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - BIC(fpscrReg, fpscrReg, FPRFMask); - - VCMPE(vA, vB); - VMRS(_PC); - SetCC(CC_LT); - ORR(fpscrReg, fpscrReg, LessThan); - MOV(crReg, 8); - Done1 = B(); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, GreaterThan); - MOV(crReg, 4); - Done2 = B(); - SetCC(CC_EQ); - ORR(fpscrReg, fpscrReg, EqualTo); - MOV(crReg, 2); - Done3 = B(); - SetCC(); - - ORR(fpscrReg, fpscrReg, NANRes); - MOV(crReg, 1); - - VCMPE(vA, vA); - VMRS(_PC); - FixupBranch NanA = B_CC(CC_NEQ); - VCMPE(vB, vB); - VMRS(_PC); - FixupBranch NanB = B_CC(CC_NEQ); - FixupBranch Done4 = B(); - - SetJumpTarget(NanA); - SetJumpTarget(NanB); - - SetFPException(fpscrReg, FPSCR_VXSNAN); - - SetJumpTarget(Done1); - SetJumpTarget(Done2); - SetJumpTarget(Done3); - SetJumpTarget(Done4); - STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, crReg); -} - -void JitArm::ps_cmpo0(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - u32 a = inst.FA, b = inst.FB; - int cr = inst.CRFD; - - ARMReg vA = fpr.R0(a); - ARMReg vB = fpr.R0(b); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg crReg = gpr.GetReg(); - Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 - Operand2 LessThan(0x8, 0xA); // 0x8000 - Operand2 GreaterThan(0x4, 0xA); // 0x4000 - Operand2 EqualTo(0x2, 0xA); // 0x2000 - Operand2 NANRes(0x1, 0xA); // 0x1000 - FixupBranch Done1, Done2, Done3; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - BIC(fpscrReg, fpscrReg, FPRFMask); - - VCMPE(vA, vB); - VMRS(_PC); - SetCC(CC_LT); - ORR(fpscrReg, fpscrReg, LessThan); - MOV(crReg, 8); - Done1 = B(); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, GreaterThan); - MOV(crReg, 4); - Done2 = B(); - SetCC(CC_EQ); - ORR(fpscrReg, fpscrReg, EqualTo); - MOV(crReg, 2); - Done3 = B(); - SetCC(); - - ORR(fpscrReg, fpscrReg, NANRes); - MOV(crReg, 1); - - VCMPE(vA, vA); - VMRS(_PC); - FixupBranch NanA = B_CC(CC_NEQ); - VCMPE(vB, vB); - VMRS(_PC); - FixupBranch NanB = B_CC(CC_NEQ); - - SetFPException(fpscrReg, FPSCR_VXVC); - FixupBranch Done4 = B(); - - SetJumpTarget(NanA); - SetJumpTarget(NanB); - - SetFPException(fpscrReg, FPSCR_VXSNAN); - - TST(fpscrReg, VEMask); - - FixupBranch noVXVC = B_CC(CC_NEQ); - SetFPException(fpscrReg, FPSCR_VXVC); - - SetJumpTarget(noVXVC); - SetJumpTarget(Done1); - SetJumpTarget(Done2); - SetJumpTarget(Done3); - SetJumpTarget(Done4); - STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, crReg); -} - -void JitArm::ps_cmpo1(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - u32 a = inst.FA, b = inst.FB; - int cr = inst.CRFD; - - ARMReg vA = fpr.R1(a); - ARMReg vB = fpr.R1(b); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg crReg = gpr.GetReg(); - Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 - Operand2 LessThan(0x8, 0xA); // 0x8000 - Operand2 GreaterThan(0x4, 0xA); // 0x4000 - Operand2 EqualTo(0x2, 0xA); // 0x2000 - Operand2 NANRes(0x1, 0xA); // 0x1000 - FixupBranch Done1, Done2, Done3; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - BIC(fpscrReg, fpscrReg, FPRFMask); - - VCMPE(vA, vB); - VMRS(_PC); - SetCC(CC_LT); - ORR(fpscrReg, fpscrReg, LessThan); - MOV(crReg, 8); - Done1 = B(); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, GreaterThan); - MOV(crReg, 4); - Done2 = B(); - SetCC(CC_EQ); - ORR(fpscrReg, fpscrReg, EqualTo); - MOV(crReg, 2); - Done3 = B(); - SetCC(); - - ORR(fpscrReg, fpscrReg, NANRes); - MOV(crReg, 1); - - VCMPE(vA, vA); - VMRS(_PC); - FixupBranch NanA = B_CC(CC_NEQ); - VCMPE(vB, vB); - VMRS(_PC); - FixupBranch NanB = B_CC(CC_NEQ); - - SetFPException(fpscrReg, FPSCR_VXVC); - FixupBranch Done4 = B(); - - SetJumpTarget(NanA); - SetJumpTarget(NanB); - - SetFPException(fpscrReg, FPSCR_VXSNAN); - - TST(fpscrReg, VEMask); - - FixupBranch noVXVC = B_CC(CC_NEQ); - SetFPException(fpscrReg, FPSCR_VXVC); - - SetJumpTarget(noVXVC); - SetJumpTarget(Done1); - SetJumpTarget(Done2); - SetJumpTarget(Done3); - SetJumpTarget(Done4); - STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, crReg); -} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp index ff129f9674..d17e61a88f 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp @@ -14,6 +14,46 @@ #include "Core/PowerPC/JitArm32/JitAsm.h" #include "Core/PowerPC/JitArm32/JitRegCache.h" +FixupBranch JitArm::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) +{ + ARMReg RA = gpr.GetReg(); + + Operand2 SOBit(2, 2); // 0x10000000 + Operand2 LTBit(1, 1); // 0x80000000 + + FixupBranch branch; + switch (bit) + { + case CR_SO_BIT: // check bit 61 set + LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32)); + TST(RA, SOBit); + branch = B_CC(jump_if_set ? CC_NEQ : CC_EQ); + break; + case CR_EQ_BIT: // check bits 31-0 == 0 + LDR(RA, R9, PPCSTATE_OFF(cr_val[field])); + CMP(RA, 0); + branch = B_CC(jump_if_set ? CC_EQ : CC_NEQ); + break; + case CR_GT_BIT: // check val > 0 + LDR(RA, R9, PPCSTATE_OFF(cr_val[field])); + CMP(RA, 1); + LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32)); + SBCS(RA, RA, 0); + branch = B_CC(jump_if_set ? CC_GE : CC_LT); + break; + case CR_LT_BIT: // check bit 62 set + LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32)); + TST(RA, LTBit); + branch = B_CC(jump_if_set ? CC_NEQ : CC_EQ); + break; + default: + _assert_msg_(DYNA_REC, false, "Invalid CR bit"); + } + + gpr.Unlock(RA); + return branch; +} + void JitArm::mtspr(UGeckoInstruction inst) { INSTRUCTION_START @@ -84,67 +124,6 @@ void JitArm::mfspr(UGeckoInstruction inst) } } -void JitArm::mfcr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - // USES_CR - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - int d = inst.RD; - LDRB(rA, R9, PPCSTATE_OFF(cr_fast[0])); - - for (int i = 1; i < 8; i++) - { - LDRB(rB, R9, PPCSTATE_OFF(cr_fast[i])); - LSL(rA, rA, 4); - ORR(rA, rA, rB); - } - MOV(gpr.R(d), rA); - gpr.Unlock(rA, rB); -} - -void JitArm::mtcrf(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - ARMReg rA = gpr.GetReg(); - - // USES_CR - u32 crm = inst.CRM; - if (crm != 0) - { - if (gpr.IsImm(inst.RS)) - { - for (int i = 0; i < 8; i++) - { - if ((crm & (0x80 >> i)) != 0) - { - u8 newcr = (gpr.GetImm(inst.RS) >> (28 - (i * 4))) & 0xF; - MOV(rA, newcr); - STRB(rA, R9, PPCSTATE_OFF(cr_fast[i])); - } - } - } - else - { - ARMReg rB = gpr.GetReg(); - MOV(rA, gpr.R(inst.RS)); - for (int i = 0; i < 8; i++) - { - if ((crm & (0x80 >> i)) != 0) - { - UBFX(rB, rA, 28 - (i * 4), 4); - STRB(rB, R9, PPCSTATE_OFF(cr_fast[i])); - } - } - gpr.Unlock(rB); - } - } - gpr.Unlock(rA); -} - void JitArm::mtsr(UGeckoInstruction inst) { INSTRUCTION_START @@ -160,25 +139,6 @@ void JitArm::mfsr(UGeckoInstruction inst) LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(sr[inst.SR])); } -void JitArm::mcrxr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - // Copy XER[0-3] into CR[inst.CRFD] - LDR(rA, R9, PPCSTATE_OFF(spr[SPR_XER])); - MOV(rB, rA); - LSR(rA, rA, 28); - STRB(rA, R9, PPCSTATE_OFF(cr_fast[inst.CRFD])); - - // Clear XER[0-3] - Operand2 Top4(0xF, 2); - BIC(rB, rB, Top4); - STR(rB, R9, PPCSTATE_OFF(spr[SPR_XER])); - gpr.Unlock(rA, rB); -} void JitArm::mtmsr(UGeckoInstruction inst) { @@ -206,84 +166,16 @@ void JitArm::mcrf(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); + ARMReg rA = gpr.GetReg(); if (inst.CRFS != inst.CRFD) { - LDRB(rA, R9, PPCSTATE_OFF(cr_fast[inst.CRFS])); - STRB(rA, R9, PPCSTATE_OFF(cr_fast[inst.CRFD])); + LDR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFS])); + STR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFD])); + LDR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFS]) + sizeof(u32)); + STR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFD]) + sizeof(u32)); } gpr.Unlock(rA); } -void JitArm::crXXX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - // Get bit CRBA aligned with bit CRBD - LDRB(rA, R9, PPCSTATE_OFF(cr_fast[inst.CRBA >> 2])); - int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); - if (shiftA < 0) - LSL(rA, rA, -shiftA); - else if (shiftA > 0) - LSR(rA, rA, shiftA); - - // Get bit CRBB aligned with bit CRBD - int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); - LDRB(rB, R9, PPCSTATE_OFF(cr_fast[inst.CRBB >> 2])); - if (shiftB < 0) - LSL(rB, rB, -shiftB); - else if (shiftB > 0) - LSR(rB, rB, shiftB); - - // Compute combined bit - switch (inst.SUBOP10) - { - case 33: // crnor - ORR(rA, rA, rB); - MVN(rA, rA); - break; - - case 129: // crandc - MVN(rB, rB); - AND(rA, rA, rB); - break; - - case 193: // crxor - EOR(rA, rA, rB); - break; - - case 225: // crnand - AND(rA, rA, rB); - MVN(rA, rA); - break; - - case 257: // crand - AND(rA, rA, rB); - break; - - case 289: // creqv - EOR(rA, rA, rB); - MVN(rA, rA); - break; - - case 417: // crorc - MVN(rA, rA); - ORR(rA, rA, rB); - break; - - case 449: // cror - ORR(rA, rA, rB); - break; - } - // Store result bit in CRBD - AND(rA, rA, 0x8 >> (inst.CRBD & 3)); - LDRB(rB, R9, PPCSTATE_OFF(cr_fast[inst.CRBD >> 2])); - BIC(rB, rB, 0x8 >> (inst.CRBD & 3)); - ORR(rB, rB, rA); - STRB(rB, R9, PPCSTATE_OFF(cr_fast[inst.CRBD >> 2])); - gpr.Unlock(rA, rB); -} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp index 5176a4e8d9..d7c1882f44 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp @@ -47,7 +47,7 @@ static GekkoOPTemplate primarytable[] = {7, &JitArm::arith}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, {8, &JitArm::subfic}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, - {10, &JitArm::cmpli}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, + {10, &JitArm::FallBackToInterpreter}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {11, &JitArm::cmpi}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {12, &JitArm::arith}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, {13, &JitArm::arith}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, @@ -112,14 +112,14 @@ static GekkoOPTemplate primarytable[] = static GekkoOPTemplate table4[] = { //SUBOP10 - {0, &JitArm::ps_cmpu0}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, - {32, &JitArm::ps_cmpo0}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, + {0, &JitArm::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, + {32, &JitArm::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, {40, &JitArm::ps_neg}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, {136, &JitArm::ps_nabs}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, {264, &JitArm::ps_abs}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, - {64, &JitArm::ps_cmpu1}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, + {64, &JitArm::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, {72, &JitArm::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, - {96, &JitArm::ps_cmpo1}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, + {96, &JitArm::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, {528, &JitArm::ps_merge00}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, {560, &JitArm::ps_merge01}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, {592, &JitArm::ps_merge10}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}}, @@ -162,14 +162,14 @@ static GekkoOPTemplate table19[] = { {528, &JitArm::bcctrx}, //"bcctrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, {16, &JitArm::bclrx}, //"bclrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, - {257, &JitArm::crXXX}, //"crand", OPTYPE_CR, FL_EVIL}}, - {129, &JitArm::crXXX}, //"crandc", OPTYPE_CR, FL_EVIL}}, - {289, &JitArm::crXXX}, //"creqv", OPTYPE_CR, FL_EVIL}}, - {225, &JitArm::crXXX}, //"crnand", OPTYPE_CR, FL_EVIL}}, - {33, &JitArm::crXXX}, //"crnor", OPTYPE_CR, FL_EVIL}}, - {449, &JitArm::crXXX}, //"cror", OPTYPE_CR, FL_EVIL}}, - {417, &JitArm::crXXX}, //"crorc", OPTYPE_CR, FL_EVIL}}, - {193, &JitArm::crXXX}, //"crxor", OPTYPE_CR, FL_EVIL}}, + {257, &JitArm::FallBackToInterpreter}, //"crand", OPTYPE_CR, FL_EVIL}}, + {129, &JitArm::FallBackToInterpreter}, //"crandc", OPTYPE_CR, FL_EVIL}}, + {289, &JitArm::FallBackToInterpreter}, //"creqv", OPTYPE_CR, FL_EVIL}}, + {225, &JitArm::FallBackToInterpreter}, //"crnand", OPTYPE_CR, FL_EVIL}}, + {33, &JitArm::FallBackToInterpreter}, //"crnor", OPTYPE_CR, FL_EVIL}}, + {449, &JitArm::FallBackToInterpreter}, //"cror", OPTYPE_CR, FL_EVIL}}, + {417, &JitArm::FallBackToInterpreter}, //"crorc", OPTYPE_CR, FL_EVIL}}, + {193, &JitArm::FallBackToInterpreter}, //"crxor", OPTYPE_CR, FL_EVIL}}, {150, &JitArm::DoNothing}, //"isync", OPTYPE_ICACHE, FL_EVIL}}, {0, &JitArm::mcrf}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}}, @@ -190,7 +190,7 @@ static GekkoOPTemplate table31[] = {476, &JitArm::arith}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {284, &JitArm::arith}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {0, &JitArm::cmp}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, - {32, &JitArm::cmpl}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, + {32, &JitArm::FallBackToInterpreter}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, {26, &JitArm::cntlzwx}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {922, &JitArm::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {954, &JitArm::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, @@ -266,16 +266,16 @@ static GekkoOPTemplate table31[] = {759, &JitArm::stfXX}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, {983, &JitArm::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {19, &JitArm::mfcr}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}}, + {19, &JitArm::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}}, {83, &JitArm::mfmsr}, //"mfmsr", OPTYPE_SYSTEM, FL_OUT_D}}, - {144, &JitArm::mtcrf}, //"mtcrf", OPTYPE_SYSTEM, 0}}, + {144, &JitArm::FallBackToInterpreter}, //"mtcrf", OPTYPE_SYSTEM, 0}}, {146, &JitArm::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}}, {210, &JitArm::mtsr}, //"mtsr", OPTYPE_SYSTEM, 0}}, {242, &JitArm::FallBackToInterpreter}, //"mtsrin", OPTYPE_SYSTEM, 0}}, {339, &JitArm::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, {467, &JitArm::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}}, {371, &JitArm::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, - {512, &JitArm::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, 0}}, + {512, &JitArm::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}}, {595, &JitArm::mfsr}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, {659, &JitArm::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, @@ -337,8 +337,8 @@ static GekkoOPTemplate table59[] = static GekkoOPTemplate table63[] = { {264, &JitArm::fabsx}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {32, &JitArm::fcmpo}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, - {0, &JitArm::fcmpu}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, + {32, &JitArm::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, + {0, &JitArm::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, {14, &JitArm::fctiwx}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, {15, &JitArm::fctiwzx}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, {72, &JitArm::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, diff --git a/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp b/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp deleted file mode 100644 index 7847c6f953..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp +++ /dev/null @@ -1,744 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include - -#include "Common/ArmEmitter.h" -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/PPCTables.h" -#include "Core/PowerPC/JitArmIL/JitIL.h" -#include "Core/PowerPC/JitArmIL/JitILAsm.h" -#include "Core/PowerPC/JitILCommon/IR.h" - -using namespace IREmitter; -using namespace ArmGen; -static const unsigned int MAX_NUMBER_OF_REGS = 32; - -struct RegInfo { - JitArmIL *Jit; - IRBuilder* Build; - InstLoc FirstI; - std::vector IInfo; - std::vector lastUsed; - InstLoc regs[MAX_NUMBER_OF_REGS]; - InstLoc fregs[MAX_NUMBER_OF_REGS]; - unsigned numSpills; - unsigned numFSpills; - unsigned exitNumber; - - RegInfo(JitArmIL* j, InstLoc f, unsigned insts) : Jit(j), FirstI(f), IInfo(insts), lastUsed(insts) { - for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) { - regs[i] = 0; - fregs[i] = 0; - } - numSpills = 0; - numFSpills = 0; - exitNumber = 0; - } - - private: - RegInfo(RegInfo&); // DO NOT IMPLEMENT -}; - -static const ARMReg RegAllocOrder[] = {R0, R1, R2, R3, R4, R5, R6, R7, R8}; -static const int RegAllocSize = sizeof(RegAllocOrder) / sizeof(ARMReg); - -static unsigned SlotSet[1000]; - -static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) { - unsigned& info = R.IInfo[Op - R.FirstI]; - if (info == 0) R.IInfo[I - R.FirstI] |= 1 << (OpNum + 1); - if (info < 2) info++; - R.lastUsed[Op - R.FirstI] = std::max(R.lastUsed[Op - R.FirstI], I); -} -static void regClearInst(RegInfo& RI, InstLoc I) { - for (int i = 0; i < RegAllocSize; i++) - if (RI.regs[RegAllocOrder[i]] == I) - RI.regs[RegAllocOrder[i]] = 0; -} -static void regNormalRegClear(RegInfo& RI, InstLoc I) { - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(I)); - if (RI.IInfo[I - RI.FirstI] & 8) - regClearInst(RI, getOp2(I)); -} - -static unsigned regReadUse(RegInfo& R, InstLoc I) { - return R.IInfo[I - R.FirstI] & 3; -} - -static u32 regLocForSlot(RegInfo& RI, unsigned slot) { - return (u32)&SlotSet[slot - 1]; -} - -static unsigned regCreateSpill(RegInfo& RI, InstLoc I) { - unsigned newSpill = ++RI.numSpills; - RI.IInfo[I - RI.FirstI] |= newSpill << 16; - return newSpill; -} - -static unsigned regGetSpill(RegInfo& RI, InstLoc I) { - return RI.IInfo[I - RI.FirstI] >> 16; -} - -static void regSpill(RegInfo& RI, ARMReg reg) { - if (!RI.regs[reg]) return; - unsigned slot = regGetSpill(RI, RI.regs[reg]); - if (!slot) { - slot = regCreateSpill(RI, RI.regs[reg]); - RI.Jit->MOVI2R(R14, regLocForSlot(RI, slot)); - RI.Jit->STR(reg, R14, 0); - } - RI.regs[reg] = 0; -} - -static ARMReg regFindFreeReg(RegInfo& RI) { - for (int i = 0; i < RegAllocSize; i++) - if (RI.regs[RegAllocOrder[i]] == 0) - return RegAllocOrder[i]; - - int bestIndex = -1; - InstLoc bestEnd = 0; - for (int i = 0; i < RegAllocSize; ++i) { - const InstLoc start = RI.regs[RegAllocOrder[i]]; - const InstLoc end = RI.lastUsed[start - RI.FirstI]; - if (bestEnd < end) { - bestEnd = end; - bestIndex = i; - } - } - - ARMReg reg = RegAllocOrder[bestIndex]; - regSpill(RI, reg); - return reg; -} -static ARMReg regLocForInst(RegInfo& RI, InstLoc I) { - for (int i = 0; i < RegAllocSize; i++) - if (RI.regs[RegAllocOrder[i]] == I) - return RegAllocOrder[i]; - - if (regGetSpill(RI, I) == 0) - PanicAlert("Retrieving unknown spill slot?!"); - RI.Jit->MOVI2R(R14, regLocForSlot(RI, regGetSpill(RI, I))); - ARMReg reg = regFindFreeReg(RI); - RI.Jit->LDR(reg, R14, 0); - return reg; -} -static ARMReg regBinLHSReg(RegInfo& RI, InstLoc I) { - ARMReg reg = regFindFreeReg(RI); - RI.Jit->MOV(reg, regLocForInst(RI, getOp1(I))); - return reg; -} - -// If the lifetime of the register used by an operand ends at I, -// return the register. Otherwise return a free register. -static ARMReg regBinReg(RegInfo& RI, InstLoc I) { - // FIXME: When regLocForInst() is extracted as a local variable, - // "Retrieving unknown spill slot?!" is shown. - if (RI.IInfo[I - RI.FirstI] & 4) - return regLocForInst(RI, getOp1(I)); - else if (RI.IInfo[I - RI.FirstI] & 8) - return regLocForInst(RI, getOp2(I)); - - return regFindFreeReg(RI); -} - -static void regSpillCallerSaved(RegInfo& RI) { - regSpill(RI, R0); - regSpill(RI, R1); - regSpill(RI, R2); - regSpill(RI, R3); -} - -static ARMReg regEnsureInReg(RegInfo& RI, InstLoc I) { - return regLocForInst(RI, I); -} - -static void regWriteExit(RegInfo& RI, InstLoc dest) { - if (isImm(*dest)) { - RI.exitNumber++; - RI.Jit->WriteExit(RI.Build->GetImmValue(dest)); - } else { - RI.Jit->WriteExitDestInReg(regLocForInst(RI, dest)); - } -} -static void regStoreInstToPPCState(RegInfo& RI, unsigned width, InstLoc I, s32 offset) { - void (JitArmIL::*op)(ARMReg, ARMReg, Operand2, bool); - switch (width) - { - case 32: - op = &JitArmIL::STR; - break; - case 8: - op = &JitArmIL::STRB; - break; - default: - PanicAlert("Not implemented!"); - return; - break; - } - - if (isImm(*I)) { - RI.Jit->MOVI2R(R12, RI.Build->GetImmValue(I)); - (RI.Jit->*op)(R12, R9, offset, true); - return; - } - ARMReg reg = regEnsureInReg(RI, I); - (RI.Jit->*op)(reg, R9, offset, true); -} - -// -// Mark and calculation routines for profiled load/store addresses -// Could be extended to unprofiled addresses. -static void regMarkMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum) { - if (isImm(*AI)) { - unsigned addr = RI.Build->GetImmValue(AI); - if (Memory::IsRAMAddress(addr)) - return; - } - if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) { - regMarkUse(RI, I, getOp1(AI), OpNum); - return; - } - regMarkUse(RI, I, AI, OpNum); -} -// Binary ops -void JitArmIL::BIN_XOR(ARMReg reg, Operand2 op2) -{ - EOR(reg, reg, op2); -} -void JitArmIL::BIN_OR(ARMReg reg, Operand2 op2) -{ - ORR(reg, reg, op2); -} -void JitArmIL::BIN_AND(ARMReg reg, Operand2 op2) -{ - AND(reg, reg, op2); -} -void JitArmIL::BIN_ADD(ARMReg reg, Operand2 op2) -{ - ADD(reg, reg, op2); -} -static void regEmitShiftInst(RegInfo& RI, InstLoc I, void (JitArmIL::*op)(ARMReg, ARMReg, Operand2)) -{ - ARMReg reg = regBinLHSReg(RI, I); - if (isImm(*getOp2(I))) { - unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - (RI.Jit->*op)(reg, reg, RHS); - RI.regs[reg] = I; - return; - } - (RI.Jit->*op)(reg, reg, regLocForInst(RI, getOp2(I))); - RI.regs[reg] = I; - regNormalRegClear(RI, I); -} - -static void regEmitBinInst(RegInfo& RI, InstLoc I, - void (JitArmIL::*op)(ARMReg, Operand2), - bool commutable = false) { - ARMReg reg; - bool commuted = false; - if (RI.IInfo[I - RI.FirstI] & 4) { - reg = regEnsureInReg(RI, getOp1(I)); - } else if (commutable && (RI.IInfo[I - RI.FirstI] & 8)) { - reg = regEnsureInReg(RI, getOp2(I)); - commuted = true; - } else { - reg = regFindFreeReg(RI); - RI.Jit->MOV(reg, regLocForInst(RI, getOp1(I))); - } - if (isImm(*getOp2(I))) { - unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - Operand2 RHSop; - if (TryMakeOperand2(RHS, RHSop)) - (RI.Jit->*op)(reg, RHSop); - else - { - RI.Jit->MOVI2R(R12, RHS); - (RI.Jit->*op)(reg, R12); - } - } else if (commuted) { - (RI.Jit->*op)(reg, regLocForInst(RI, getOp1(I))); - } else { - (RI.Jit->*op)(reg, regLocForInst(RI, getOp2(I))); - } - RI.regs[reg] = I; - regNormalRegClear(RI, I); -} -static void regEmitCmp(RegInfo& RI, InstLoc I) { - if (isImm(*getOp2(I))) { - unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - Operand2 op; - if (TryMakeOperand2(RHS, op)) - RI.Jit->CMP(regLocForInst(RI, getOp1(I)), op); - else - { - RI.Jit->MOVI2R(R12, RHS); - RI.Jit->CMP(regLocForInst(RI, getOp1(I)), R12); - } - } else { - ARMReg reg = regEnsureInReg(RI, getOp1(I)); - RI.Jit->CMP(reg, regLocForInst(RI, getOp2(I))); - } -} - -static void DoWriteCode(IRBuilder* ibuild, JitArmIL* Jit, u32 exitAddress) { - RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts()); - RI.Build = ibuild; - - // Pass to compute liveness - ibuild->StartBackPass(); - for (unsigned int index = (unsigned int)RI.IInfo.size() - 1; index != -1U; --index) { - InstLoc I = ibuild->ReadBackward(); - unsigned int op = getOpcode(*I); - bool thisUsed = regReadUse(RI, I) ? true : false; - switch (op) { - default: - PanicAlert("Unexpected inst!"); - case Nop: - case CInt16: - case CInt32: - case LoadGReg: - case LoadLink: - case LoadCR: - case LoadCarry: - case LoadCTR: - case LoadMSR: - case LoadFReg: - case LoadFRegDENToZero: - case LoadGQR: - case BlockEnd: - case BlockStart: - case FallBackToInterpreter: - case SystemCall: - case RFIExit: - case InterpreterBranch: - case ShortIdleLoop: - case FPExceptionCheck: - case DSIExceptionCheck: - case ISIException: - case ExtExceptionCheck: - case BreakPointCheck: - case Int3: - case Tramp: - // No liveness effects - break; - case SExt8: - case SExt16: - case BSwap32: - case BSwap16: - case Cntlzw: - case Not: - case DupSingleToMReg: - case DoubleToSingle: - case ExpandPackedToMReg: - case CompactMRegToPacked: - case FPNeg: - case FPDup0: - case FPDup1: - case FSNeg: - case FDNeg: - if (thisUsed) - regMarkUse(RI, I, getOp1(I), 1); - break; - case Load8: - case Load16: - case Load32: - regMarkMemAddress(RI, I, getOp1(I), 1); - break; - case LoadDouble: - case LoadSingle: - case LoadPaired: - if (thisUsed) - regMarkUse(RI, I, getOp1(I), 1); - break; - case StoreCR: - case StoreCarry: - case StoreFPRF: - regMarkUse(RI, I, getOp1(I), 1); - break; - case StoreGReg: - case StoreLink: - case StoreCTR: - case StoreMSR: - case StoreGQR: - case StoreSRR: - case StoreFReg: - if (!isImm(*getOp1(I))) - regMarkUse(RI, I, getOp1(I), 1); - break; - case Add: - case Sub: - case And: - case Or: - case Xor: - case Mul: - case MulHighUnsigned: - case Rol: - case Shl: - case Shrl: - case Sarl: - case ICmpCRUnsigned: - case ICmpCRSigned: - case ICmpEq: - case ICmpNe: - case ICmpUgt: - case ICmpUlt: - case ICmpUge: - case ICmpUle: - case ICmpSgt: - case ICmpSlt: - case ICmpSge: - case ICmpSle: - case FSMul: - case FSAdd: - case FSSub: - case FDMul: - case FDAdd: - case FDSub: - case FPAdd: - case FPMul: - case FPSub: - case FPMerge00: - case FPMerge01: - case FPMerge10: - case FPMerge11: - case FDCmpCR: - case InsertDoubleInMReg: - if (thisUsed) { - regMarkUse(RI, I, getOp1(I), 1); - if (!isImm(*getOp2(I))) - regMarkUse(RI, I, getOp2(I), 2); - } - break; - case Store8: - case Store16: - case Store32: - if (!isImm(*getOp1(I))) - regMarkUse(RI, I, getOp1(I), 1); - regMarkMemAddress(RI, I, getOp2(I), 2); - break; - case StoreSingle: - case StoreDouble: - case StorePaired: - regMarkUse(RI, I, getOp1(I), 1); - regMarkUse(RI, I, getOp2(I), 2); - break; - case BranchUncond: - if (!isImm(*getOp1(I))) - regMarkUse(RI, I, getOp1(I), 1); - break; - case IdleBranch: - regMarkUse(RI, I, getOp1(getOp1(I)), 1); - break; - case BranchCond: { - if (isICmp(*getOp1(I)) && - isImm(*getOp2(getOp1(I)))) { - regMarkUse(RI, I, getOp1(getOp1(I)), 1); - } else { - regMarkUse(RI, I, getOp1(I), 1); - } - if (!isImm(*getOp2(I))) - regMarkUse(RI, I, getOp2(I), 2); - break; - } - } - } - - ibuild->StartForwardPass(); - for (unsigned i = 0; i != RI.IInfo.size(); i++) { - InstLoc I = ibuild->ReadForward(); - - bool thisUsed = regReadUse(RI, I) ? true : false; - if (thisUsed) { - // Needed for IR Writer - ibuild->SetMarkUsed(I); - } - - switch (getOpcode(*I)) { - case CInt32: - case CInt16: { - if (!thisUsed) break; - ARMReg reg = regFindFreeReg(RI); - Jit->MOVI2R(reg, ibuild->GetImmValue(I)); - RI.regs[reg] = I; - break; - } - case BranchUncond: { - regWriteExit(RI, getOp1(I)); - regNormalRegClear(RI, I); - break; - } - case BranchCond: { - if (isICmp(*getOp1(I)) && - isImm(*getOp2(getOp1(I)))) { - unsigned imm = RI.Build->GetImmValue(getOp2(getOp1(I))); - if (imm > 255) - { - Jit->MOVI2R(R14, imm); - Jit->CMP(regLocForInst(RI, getOp1(getOp1(I))), R14); - } - else - Jit->CMP(regLocForInst(RI, getOp1(getOp1(I))), imm); - CCFlags flag; - switch (getOpcode(*getOp1(I))) { - case ICmpEq: flag = CC_NEQ; break; - case ICmpNe: flag = CC_EQ; break; - case ICmpUgt: flag = CC_LS; break; - case ICmpUlt: flag = CC_HI; break; - case ICmpUge: flag = CC_HS; break; - case ICmpUle: flag = CC_LO; break; - case ICmpSgt: flag = CC_LT; break; - case ICmpSlt: flag = CC_GT; break; - case ICmpSge: flag = CC_LE; break; - case ICmpSle: flag = CC_GE; break; - default: PanicAlert("cmpXX"); flag = CC_AL; break; - } - FixupBranch cont = Jit->B_CC(flag); - regWriteExit(RI, getOp2(I)); - Jit->SetJumpTarget(cont); - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(getOp1(I))); - } else { - Jit->CMP(regLocForInst(RI, getOp1(I)), 0); - FixupBranch cont = Jit->B_CC(CC_EQ); - regWriteExit(RI, getOp2(I)); - Jit->SetJumpTarget(cont); - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(I)); - } - if (RI.IInfo[I - RI.FirstI] & 8) - regClearInst(RI, getOp2(I)); - break; - } - - case StoreGReg: { - unsigned ppcreg = *I >> 16; - regStoreInstToPPCState(RI, 32, getOp1(I), PPCSTATE_OFF(gpr[ppcreg])); - regNormalRegClear(RI, I); - break; - } - case StoreCR: { - unsigned ppcreg = *I >> 16; - regStoreInstToPPCState(RI, 8, getOp1(I), PPCSTATE_OFF(cr_fast[ppcreg])); - regNormalRegClear(RI, I); - break; - } - case StoreLink: { - regStoreInstToPPCState(RI, 32, getOp1(I), PPCSTATE_OFF(spr[SPR_LR])); - regNormalRegClear(RI, I); - break; - } - case StoreCTR: { - regStoreInstToPPCState(RI, 32, getOp1(I), PPCSTATE_OFF(spr[SPR_CTR])); - regNormalRegClear(RI, I); - break; - } - case StoreMSR: { - regStoreInstToPPCState(RI, 32, getOp1(I), PPCSTATE_OFF(msr)); - regNormalRegClear(RI, I); - break; - } - case LoadGReg: { - if (!thisUsed) break; - ARMReg reg = regFindFreeReg(RI); - unsigned ppcreg = *I >> 8; - Jit->LDR(reg, R9, PPCSTATE_OFF(gpr[ppcreg])); - RI.regs[reg] = I; - break; - } - case LoadCR: { - if (!thisUsed) break; - ARMReg reg = regFindFreeReg(RI); - unsigned ppcreg = *I >> 8; - Jit->LDRB(reg, R9, PPCSTATE_OFF(cr_fast[ppcreg])); - RI.regs[reg] = I; - break; - } - case LoadCTR: { - if (!thisUsed) break; - ARMReg reg = regFindFreeReg(RI); - Jit->LDR(reg, R9, PPCSTATE_OFF(spr[SPR_CTR])); - RI.regs[reg] = I; - break; - } - case LoadLink: { - if (!thisUsed) break; - ARMReg reg = regFindFreeReg(RI); - Jit->LDR(reg, R9, PPCSTATE_OFF(spr[SPR_LR])); - RI.regs[reg] = I; - break; - } - case FallBackToInterpreter: { - unsigned InstCode = ibuild->GetImmValue(getOp1(I)); - unsigned InstLoc = ibuild->GetImmValue(getOp2(I)); - // There really shouldn't be anything live across an - // interpreter call at the moment, but optimizing interpreter - // calls isn't completely out of the question... - regSpillCallerSaved(RI); - Jit->MOVI2R(R14, InstLoc); - Jit->STR(R14, R9, PPCSTATE_OFF(pc)); - Jit->MOVI2R(R14, InstLoc + 4); - Jit->STR(R14, R9, PPCSTATE_OFF(npc)); - - Jit->MOVI2R(R0, InstCode); - Jit->MOVI2R(R14, (u32)GetInterpreterOp(InstCode)); - Jit->BL(R14); - break; - } - case SystemCall: { - unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->MOVI2R(R14, InstLoc + 4); - Jit->STR(R14, R9, PPCSTATE_OFF(pc)); - Jit->LDR(R14, R9, PPCSTATE_OFF(Exceptions)); - Jit->ORR(R14, R14, EXCEPTION_SYSCALL); - Jit->STR(R14, R9, PPCSTATE_OFF(Exceptions)); - Jit->WriteExceptionExit(); - break; - } - case ShortIdleLoop: { - unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->MOVI2R(R14, (u32)&CoreTiming::Idle); - Jit->BL(R14); - Jit->MOVI2R(R14, InstLoc); - Jit->STR(R14, R9, PPCSTATE_OFF(pc)); - Jit->WriteExceptionExit(); - break; - } - case InterpreterBranch: { - Jit->LDR(R14, R9, PPCSTATE_OFF(npc)); - Jit->WriteExitDestInReg(R14); - break; - } - case RFIExit: { - const u32 mask = 0x87C0FFFF; - const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] - // MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13; - // R0 = MSR location - // R1 = MSR contents - // R2 = Mask - // R3 = Mask - ARMReg rA = R14; - ARMReg rB = R12; - ARMReg rC = R11; - ARMReg rD = R10; - Jit->MOVI2R(rB, (~mask) & clearMSR13); - Jit->MOVI2R(rC, mask & clearMSR13); - - Jit->LDR(rD, R9, PPCSTATE_OFF(msr)); - - Jit->AND(rD, rD, rB); // rD = Masked MSR - - Jit->LDR(rB, R9, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here - - Jit->AND(rB, rB, rC); // rB contains masked SRR1 here - Jit->ORR(rB, rD, rB); // rB = Masked MSR OR masked SRR1 - - Jit->STR(rB, R9, PPCSTATE_OFF(msr)); // STR rB in to rA - - Jit->LDR(rA, R9, PPCSTATE_OFF(spr[SPR_SRR0])); - - Jit->WriteRfiExitDestInR(rA); // rA gets unlocked here - break; - } - case Shl: { - if (!thisUsed) break; - regEmitShiftInst(RI, I, &JitArmIL::LSL); - break; - } - case Shrl: { - if (!thisUsed) break; - regEmitShiftInst(RI, I, &JitArmIL::LSR); - break; - } - case Sarl: { - if (!thisUsed) break; - regEmitShiftInst(RI, I, &JitArmIL::ASR); - break; - } - case And: { - if (!thisUsed) break; - regEmitBinInst(RI, I, &JitArmIL::BIN_AND, true); - break; - } - case Not: { - if (!thisUsed) break; - ARMReg reg = regBinLHSReg(RI, I); - Jit->MVN(reg, reg); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case Or: { - if (!thisUsed) break; - regEmitBinInst(RI, I, &JitArmIL::BIN_OR, true); - break; - } - case Xor: { - if (!thisUsed) break; - regEmitBinInst(RI, I, &JitArmIL::BIN_XOR, true); - break; - } - case Add: { - if (!thisUsed) break; - regEmitBinInst(RI, I, &JitArmIL::BIN_ADD, true); - break; - } - case ICmpCRUnsigned: { - if (!thisUsed) break; - regEmitCmp(RI, I); - ARMReg reg = regBinReg(RI, I); - Jit->MOV(reg, 0x2); // Result == 0 - Jit->SetCC(CC_LO); Jit->MOV(reg, 0x8); // Result < 0 - Jit->SetCC(CC_HI); Jit->MOV(reg, 0x4); // Result > 0 - Jit->SetCC(); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - - case ICmpCRSigned: { - if (!thisUsed) break; - regEmitCmp(RI, I); - ARMReg reg = regBinReg(RI, I); - Jit->MOV(reg, 0x2); // Result == 0 - Jit->SetCC(CC_LT); Jit->MOV(reg, 0x8); // Result < 0 - Jit->SetCC(CC_GT); Jit->MOV(reg, 0x4); // Result > 0 - Jit->SetCC(); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case Int3: - Jit->BKPT(0x321); - break; - case Tramp: break; - case Nop: break; - default: - PanicAlert("Unknown JIT instruction; aborting!"); - ibuild->WriteToFile(0); - exit(1); - } - } - for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) { - if (RI.regs[i]) { - // Start a game in Burnout 2 to get this. Or animal crossing. - PanicAlert("Incomplete cleanup! (regs)"); - exit(1); - } - if (RI.fregs[i]) { - PanicAlert("Incomplete cleanup! (fregs)"); - exit(1); - } - } - - Jit->WriteExit(exitAddress); - Jit->BKPT(0x111); - -} -void JitArmIL::WriteCode(u32 exitAddress) { - DoWriteCode(&ibuild, this, exitAddress); -} diff --git a/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.h b/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.h deleted file mode 100644 index 8b13789179..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.h +++ /dev/null @@ -1 +0,0 @@ - diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitIL.cpp b/Source/Core/Core/PowerPC/JitArmIL/JitIL.cpp deleted file mode 100644 index e1a711e432..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitIL.cpp +++ /dev/null @@ -1,345 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include - -#include "Common/ArmEmitter.h" -#include "Common/Common.h" -#include "Core/ConfigManager.h" -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/PatchEngine.h" -#include "Core/HLE/HLE.h" -#include "Core/HW/GPFifo.h" -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/JitInterface.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCAnalyst.h" -#include "Core/PowerPC/PPCTables.h" -#include "Core/PowerPC/Profiler.h" -#include "Core/PowerPC/JitArmIL/JitIL.h" -#include "Core/PowerPC/JitArmIL/JitIL_Tables.h" - -using namespace ArmGen; -using namespace PowerPC; - -static int CODE_SIZE = 1024*1024*32; - -void JitArmIL::Init() -{ - AllocCodeSpace(CODE_SIZE); - blocks.Init(); - asm_routines.Init(); - - code_block.m_stats = &js.st; - code_block.m_gpa = &js.gpa; - code_block.m_fpa = &js.fpa; -} - -void JitArmIL::ClearCache() -{ - ClearCodeSpace(); - blocks.Clear(); -} - -void JitArmIL::Shutdown() -{ - FreeCodeSpace(); - blocks.Shutdown(); - asm_routines.Shutdown(); -} -void JitArmIL::unknown_instruction(UGeckoInstruction inst) -{ - // CCPU::Break(); - PanicAlert("unknown_instruction %08x - Fix me ;)", inst.hex); -} - -void JitArmIL::FallBackToInterpreter(UGeckoInstruction _inst) -{ - ibuild.EmitFallBackToInterpreter( - ibuild.EmitIntConst(_inst.hex), - ibuild.EmitIntConst(js.compilerPC)); -} - -void JitArmIL::HLEFunction(UGeckoInstruction _inst) -{ - // XXX -} - -void JitArmIL::DoNothing(UGeckoInstruction _inst) -{ - // Yup, just don't do anything. -} -void JitArmIL::Break(UGeckoInstruction _inst) -{ - ibuild.EmitINT3(); -} - -void JitArmIL::DoDownCount() -{ - ARMReg rA = R12; - LDR(rA, R9, PPCSTATE_OFF(downcount)); - if (js.downcountAmount < 255) // We can enlarge this if we used rotations - { - SUBS(rA, rA, js.downcountAmount); - } - else - { - ARMReg rB = R11; - MOVI2R(rB, js.downcountAmount); - SUBS(rA, rA, rB); - } - STR(rA, R9, PPCSTATE_OFF(downcount)); -} - -void JitArmIL::WriteExitDestInReg(ARMReg Reg) -{ - STR(Reg, R9, PPCSTATE_OFF(pc)); - DoDownCount(); - MOVI2R(Reg, (u32)asm_routines.dispatcher); - B(Reg); -} - -void JitArmIL::WriteRfiExitDestInR(ARMReg Reg) -{ - STR(Reg, R9, PPCSTATE_OFF(pc)); - DoDownCount(); - - LDR(R0, R9, PPCSTATE_OFF(pc)); - STR(R0, R9, PPCSTATE_OFF(npc)); - QuickCallFunction(R0, (void*)&PowerPC::CheckExceptions); - LDR(R0, R9, PPCSTATE_OFF(npc)); - STR(R0, R9, PPCSTATE_OFF(pc)); - - MOVI2R(R0, (u32)asm_routines.dispatcher); - B(R0); -} -void JitArmIL::WriteExceptionExit() -{ - DoDownCount(); - - LDR(R0, R9, PPCSTATE_OFF(pc)); - STR(R0, R9, PPCSTATE_OFF(npc)); - QuickCallFunction(R0, (void*)&PowerPC::CheckExceptions); - LDR(R0, R9, PPCSTATE_OFF(npc)); - STR(R0, R9, PPCSTATE_OFF(pc)); - - MOVI2R(R0, (u32)asm_routines.dispatcher); - B(R0); -} -void JitArmIL::WriteExit(u32 destination) -{ - DoDownCount(); - //If nobody has taken care of this yet (this can be removed when all branches are done) - JitBlock *b = js.curBlock; - JitBlock::LinkData linkData; - linkData.exitAddress = destination; - linkData.exitPtrs = GetWritableCodePtr(); - linkData.linkStatus = false; - - // Link opportunity! - int block; - if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0) - { - // It exists! Joy of joy! - B(blocks.GetBlock(block)->checkedEntry); - linkData.linkStatus = true; - } - else - { - MOVI2R(R14, destination); - STR(R14, R9, PPCSTATE_OFF(pc)); - MOVI2R(R14, (u32)asm_routines.dispatcher); - B(R14); - } - - b->linkData.push_back(linkData); -} -void JitArmIL::PrintDebug(UGeckoInstruction inst, u32 level) -{ - if (level > 0) - printf("Start: %08x OP '%s' Info\n", (u32)GetCodePtr(), PPCTables::GetInstructionName(inst)); - if (level > 1) - { - GekkoOPInfo* Info = GetOpInfo(inst.hex); - printf("\tOuts\n"); - if (Info->flags & FL_OUT_A) - printf("\t-OUT_A: %x\n", inst.RA); - if (Info->flags & FL_OUT_D) - printf("\t-OUT_D: %x\n", inst.RD); - printf("\tIns\n"); - // A, AO, B, C, S - if (Info->flags & FL_IN_A) - printf("\t-IN_A: %x\n", inst.RA); - if (Info->flags & FL_IN_A0) - printf("\t-IN_A0: %x\n", inst.RA); - if (Info->flags & FL_IN_B) - printf("\t-IN_B: %x\n", inst.RB); - if (Info->flags & FL_IN_C) - printf("\t-IN_C: %x\n", inst.RC); - if (Info->flags & FL_IN_S) - printf("\t-IN_S: %x\n", inst.RS); - } -} - -void STACKALIGN JitArmIL::Run() -{ - CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; - pExecAddr(); -} - -void JitArmIL::SingleStep() -{ - CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; - pExecAddr(); -} -void STACKALIGN JitArmIL::Jit(u32 em_address) -{ - if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || Core::g_CoreStartupParameter.bJITNoBlockCache) - { - ClearCache(); - } - - int block_num = blocks.AllocateBlock(PowerPC::ppcState.pc); - JitBlock *b = blocks.GetBlock(block_num); - const u8* BlockPtr = DoJit(PowerPC::ppcState.pc, &code_buffer, b); - blocks.FinalizeBlock(block_num, jo.enableBlocklink, BlockPtr); -} - -const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b) -{ - int blockSize = code_buf->GetSize(); - - if (Core::g_CoreStartupParameter.bEnableDebugging) - { - // Comment out the following to disable breakpoints (speed-up) - blockSize = 1; - } - - if (em_address == 0) - { - Core::SetState(Core::CORE_PAUSE); - PanicAlert("ERROR: Compiling at 0. LR=%08x CTR=%08x", LR, CTR); - } - - js.isLastInstruction = false; - js.blockStart = em_address; - js.fifoBytesThisBlock = 0; - js.curBlock = b; - - u32 nextPC = em_address; - // Analyze the block, collect all instructions it is made of (including inlining, - // if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize); - - PPCAnalyst::CodeOp *ops = code_buf->codebuffer; - - const u8 *start = GetCodePtr(); - b->checkedEntry = start; - b->runCount = 0; - - // Downcount flag check, Only valid for linked blocks - { - // XXX - } - - const u8 *normalEntry = GetCodePtr(); - b->normalEntry = normalEntry; - - if (js.fpa.any) - { - // XXX - // This block uses FPU - needs to add FP exception bailout - } - js.rewriteStart = (u8*)GetCodePtr(); - - u64 codeHash = -1; - { - // For profiling and IR Writer - for (u32 i = 0; i < code_block.m_num_instructions; i++) - { - const u64 inst = ops[i].inst.hex; - // Ported from boost::hash - codeHash ^= inst + (codeHash << 6) + (codeHash >> 2); - } - } - - // Conditionally add profiling code. - if (Profiler::g_ProfileBlocks) { - // XXX - } - // Start up IR builder (structure that collects the - // instruction processed by the JIT routines) - ibuild.Reset(); - - js.downcountAmount = 0; - if (!Core::g_CoreStartupParameter.bEnableDebugging) - js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address); - - js.skipnext = false; - js.compilerPC = nextPC; - // Translate instructions - for (u32 i = 0; i < code_block.m_num_instructions; i++) - { - js.compilerPC = ops[i].address; - js.op = &ops[i]; - js.instructionNumber = i; - const GekkoOPInfo *opinfo = ops[i].opinfo; - js.downcountAmount += opinfo->numCycles; - - if (i == (code_block.m_num_instructions - 1)) - { - // WARNING - cmp->branch merging will screw this up. - js.isLastInstruction = true; - js.next_inst = 0; - if (Profiler::g_ProfileBlocks) { - // CAUTION!!! push on stack regs you use, do your stuff, then pop - PROFILER_VPUSH; - // get end tic - PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop); - // tic counter += (end tic - start tic) - PROFILER_ADD_DIFF_LARGE_INTEGER(&b->ticCounter, &b->ticStop, &b->ticStart); - PROFILER_VPOP; - } - } - else - { - // help peephole optimizations - js.next_inst = ops[i + 1].inst; - js.next_compilerPC = ops[i + 1].address; - } - if (!ops[i].skip) - { - PrintDebug(ops[i].inst, 0); - if (js.memcheck && (opinfo->flags & FL_USE_FPU)) - { - // Don't do this yet - BKPT(0x7777); - } - JitArmILTables::CompileInstruction(ops[i]); - if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) - { - // Don't do this yet - BKPT(0x666); - } - } - } - if (code_block.m_memory_exception) - BKPT(0x500); - - if (code_block.m_broken) - { - printf("Broken Block going to 0x%08x\n", nextPC); - WriteExit(nextPC); - } - - // Perform actual code generation - WriteCode(nextPC); - b->codeSize = (u32)(GetCodePtr() - normalEntry); - b->originalSize = code_block.m_num_instructions;; - - FlushIcache(); - return start; - -} diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitIL.h b/Source/Core/Core/PowerPC/JitArmIL/JitIL.h deleted file mode 100644 index 8260ffe114..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitIL.h +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#pragma once - -#include "Common/ArmEmitter.h" -#include "Core/PowerPC/PPCAnalyst.h" -#include "Core/PowerPC/JitArm32/JitArmCache.h" -#include "Core/PowerPC/JitArmIL/JitILAsm.h" -#include "Core/PowerPC/JitCommon/JitBase.h" -#include "Core/PowerPC/JitILCommon/IR.h" -#include "Core/PowerPC/JitILCommon/JitILBase.h" - -#define PPCSTATE_OFF(elem) ((s32)STRUCT_OFF(PowerPC::ppcState, elem) - (s32)STRUCT_OFF(PowerPC::ppcState, spr[0])) -class JitArmIL : public JitILBase, public ArmGen::ARMCodeBlock -{ -private: - JitArmBlockCache blocks; - JitArmILAsmRoutineManager asm_routines; - - void PrintDebug(UGeckoInstruction inst, u32 level); - void DoDownCount(); - -public: - // Initialization, etc - JitArmIL() {} - ~JitArmIL() {} - - void Init(); - void Shutdown(); - - // Jit! - - void Jit(u32 em_address); - const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b); - - JitBaseBlockCache *GetBlockCache() { return &blocks; } - - const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) { return nullptr; } - - bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); } - - void ClearCache(); - const u8 *GetDispatcher() { - return asm_routines.dispatcher; // asm_routines.dispatcher - } - const CommonAsmRoutinesBase *GetAsmRoutines() { - return &asm_routines; - } - - const char *GetName() { - return "JITARMIL"; - } - - // Run! - - void Run(); - void SingleStep(); - // - void WriteCode(u32 exitAddress); - void WriteExit(u32 destination); - void WriteExitDestInReg(ArmGen::ARMReg Reg); - void WriteRfiExitDestInR(ArmGen::ARMReg Reg); - void WriteExceptionExit(); - - // OPCODES - void unknown_instruction(UGeckoInstruction inst); - void FallBackToInterpreter(UGeckoInstruction inst); - void DoNothing(UGeckoInstruction inst); - void HLEFunction(UGeckoInstruction inst); - void Break(UGeckoInstruction inst); - - void DynaRunTable4(UGeckoInstruction inst); - void DynaRunTable19(UGeckoInstruction inst); - void DynaRunTable31(UGeckoInstruction inst); - void DynaRunTable59(UGeckoInstruction inst); - void DynaRunTable63(UGeckoInstruction inst); - - // Binary ops - void BIN_AND(ArmGen::ARMReg reg, ArmGen::Operand2 op2); - void BIN_XOR(ArmGen::ARMReg reg, ArmGen::Operand2 op2); - void BIN_OR(ArmGen::ARMReg reg, ArmGen::Operand2 op2); - void BIN_ADD(ArmGen::ARMReg reg, ArmGen::Operand2 op2); - - // Branches - void bx(UGeckoInstruction inst); - void bcx(UGeckoInstruction inst); - void bclrx(UGeckoInstruction inst); - void bcctrx(UGeckoInstruction inst); -}; diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitILAsm.cpp b/Source/Core/Core/PowerPC/JitArmIL/JitILAsm.cpp deleted file mode 100644 index c85bcf8aeb..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitILAsm.cpp +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include "Common/ArmEmitter.h" -#include "Common/MemoryUtil.h" - -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/HW/GPFifo.h" -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/JitArmIL/JitIL.h" -#include "Core/PowerPC/JitArmIL/JitILAsm.h" -#include "Core/PowerPC/JitCommon/JitCache.h" - -using namespace ArmGen; - -void JitArmILAsmRoutineManager::Generate() -{ - enterCode = GetCodePtr(); - PUSH(9, R4, R5, R6, R7, R8, R9, R10, R11, _LR); - // Take care to 8-byte align stack for function calls. - // We are misaligned here because of an odd number of args for PUSH. - // It's not like x86 where you need to account for an extra 4 bytes - // consumed by CALL. - SUB(_SP, _SP, 4); - - MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]); - - FixupBranch skipToRealDispatcher = B(); - dispatcher = GetCodePtr(); - printf("ILDispatcher is %p\n", dispatcher); - - // Downcount Check - // The result of slice decrementation should be in flags if somebody jumped here - // IMPORTANT - We jump on negative, not carry!!! - FixupBranch bail = B_CC(CC_MI); - - SetJumpTarget(skipToRealDispatcher); - dispatcherNoCheck = GetCodePtr(); - - // This block of code gets the address of the compiled block of code - // It runs though to the compiling portion if it isn't found - LDR(R12, R9, PPCSTATE_OFF(pc));// Load the current PC into R12 - - Operand2 iCacheMask = Operand2(0xE, 2); // JIT_ICACHE_MASK - BIC(R12, R12, iCacheMask); // R12 contains PC & JIT_ICACHE_MASK here. - - MOVI2R(R14, (u32)jit->GetBlockCache()->iCache); - - LDR(R12, R14, R12); // R12 contains iCache[PC & JIT_ICACHE_MASK] here - // R12 Confirmed this is the correct iCache Location loaded. - TST(R12, 0x80); // Test to see if it is a JIT block. - - SetCC(CC_EQ); - // Success, it is our Jitblock. - MOVI2R(R14, (u32)jit->GetBlockCache()->GetCodePointers()); - // LDR R14 right here to get CodePointers()[0] pointer. - LSL(R12, R12, 2); // Multiply by four because address locations are u32 in size - LDR(R14, R14, R12); // Load the block address in to R14 - - B(R14); - // No need to jump anywhere after here, the block will go back to dispatcher start - SetCC(); - - // If we get to this point, that means that we don't have the block cached to execute - // So call ArmJit to compile the block and then execute it. - MOVI2R(R14, (u32)&Jit); - BL(R14); - - B(dispatcherNoCheck); - - SetJumpTarget(bail); - doTiming = GetCodePtr(); - // XXX: In JIT64, Advance() gets called /after/ the exception checking - // once it jumps back to the start of outerLoop - QuickCallFunction(R14, (void*)&CoreTiming::Advance); - - // Does exception checking - LDR(R0, R9, PPCSTATE_OFF(pc)); - STR(R0, R9, PPCSTATE_OFF(npc)); - QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); - LDR(R0, R9, PPCSTATE_OFF(npc)); - STR(R0, R9, PPCSTATE_OFF(pc)); - // Check the state pointer to see if we are exiting - // Gets checked on every exception check - MOVI2R(R0, (u32)PowerPC::GetStatePtr()); - MVN(R1, 0); - LDR(R0, R0); - TST(R0, R1); - FixupBranch Exit = B_CC(CC_NEQ); - - B(dispatcher); - - SetJumpTarget(Exit); - - ADD(_SP, _SP, 4); - - POP(9, R4, R5, R6, R7, R8, R9, R10, R11, _PC); // Returns - - GenerateCommon(); - - FlushIcache(); -} - diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitILAsm.h b/Source/Core/Core/PowerPC/JitArmIL/JitILAsm.h deleted file mode 100644 index a068ec8076..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitILAsm.h +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#pragma once - -#include "Common/ArmEmitter.h" -#include "Core/PowerPC/JitCommon/JitAsmCommon.h" - -class JitArmILAsmRoutineManager : public CommonAsmRoutinesBase, public ArmGen::ARMCodeBlock -{ -private: - void Generate(); - void GenerateCommon() {} - -public: - void Init() { - AllocCodeSpace(8192); - Generate(); - WriteProtect(); - } - - void Shutdown() { - FreeCodeSpace(); - } -}; - diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitIL_Branch.cpp b/Source/Core/Core/PowerPC/JitArmIL/JitIL_Branch.cpp deleted file mode 100644 index 7661a75dae..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitIL_Branch.cpp +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include "Common/Common.h" - -#include "Core/ConfigManager.h" -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCTables.h" -#include "Core/PowerPC/JitArmIL/JitIL.h" - -// FIXME -#define NORMALBRANCH_START FallBackToInterpreter(inst); ibuild.EmitInterpreterBranch(); return; -//#define NORMALBRANCH_START - -void JitArmIL::bx(UGeckoInstruction inst) -{ - //NORMALBRANCH_START - INSTRUCTION_START; - - // We must always process the following sentence - // even if the blocks are merged by PPCAnalyst::Flatten(). - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - - // If this is not the last instruction of a block, - // we will skip the rest process. - // Because PPCAnalyst::Flatten() merged the blocks. - if (!js.isLastInstruction) { - return; - } - - u32 destination; - if (inst.AA) - destination = SignExt26(inst.LI << 2); - else - destination = js.compilerPC + SignExt26(inst.LI << 2); - - if (destination == js.compilerPC) { - ibuild.EmitShortIdleLoop(ibuild.EmitIntConst(js.compilerPC)); - return; - } - - ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination)); -} -static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) { - IREmitter::InstLoc CRTest = 0, CTRTest = 0; - if ((inst.BO & 16) == 0) // Test a CR bit - { - IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); - IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); - CRTest = ibuild.EmitAnd(CRReg, CRCmp); - if (!(inst.BO & 8)) - CRTest = ibuild.EmitXor(CRCmp, CRTest); - } - - if ((inst.BO & 4) == 0) { - IREmitter::InstLoc c = ibuild.EmitLoadCTR(); - c = ibuild.EmitSub(c, ibuild.EmitIntConst(1)); - ibuild.EmitStoreCTR(c); - if (inst.BO & 2) { - CTRTest = ibuild.EmitICmpEq(c, - ibuild.EmitIntConst(0)); - } else { - CTRTest = c; - } - } - - IREmitter::InstLoc Test = CRTest; - if (CTRTest) { - if (Test) - Test = ibuild.EmitAnd(Test, CTRTest); - else - Test = CTRTest; - } - - if (!Test) { - Test = ibuild.EmitIntConst(1); - } - return Test; -} - -void JitArmIL::bclrx(UGeckoInstruction inst) -{ - NORMALBRANCH_START - - if (!js.isLastInstruction && - (inst.BO & (1 << 4)) && (inst.BO & (1 << 2))) { - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - return; - } - - if (inst.hex == 0x4e800020) { - ibuild.EmitBranchUncond(ibuild.EmitLoadLink()); - return; - } - IREmitter::InstLoc test = TestBranch(ibuild, inst); - test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0)); - ibuild.EmitBranchCond(test, ibuild.EmitIntConst(js.compilerPC + 4)); - - IREmitter::InstLoc destination = ibuild.EmitLoadLink(); - destination = ibuild.EmitAnd(destination, ibuild.EmitIntConst(-4)); - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - ibuild.EmitBranchUncond(destination); -} -void JitArmIL::bcx(UGeckoInstruction inst) -{ - NORMALBRANCH_START - if (inst.LK) - ibuild.EmitStoreLink( - ibuild.EmitIntConst(js.compilerPC + 4)); - - IREmitter::InstLoc Test = TestBranch(ibuild, inst); - - u32 destination; - if (inst.AA) - destination = SignExt16(inst.BD << 2); - else - destination = js.compilerPC + SignExt16(inst.BD << 2); - - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && - inst.hex == 0x4182fff8 && - (Memory::ReadUnchecked_U32(js.compilerPC - 8) & 0xFFFF0000) == 0x800D0000 && - (Memory::ReadUnchecked_U32(js.compilerPC - 4) == 0x28000000 || - (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC - 4) == 0x2C000000)) - ) - { - ibuild.EmitIdleBranch(Test, ibuild.EmitIntConst(destination)); - } - else - { - ibuild.EmitBranchCond(Test, ibuild.EmitIntConst(destination)); - } - ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4)); -} - -void JitArmIL::bcctrx(UGeckoInstruction inst) -{ - NORMALBRANCH_START - if ((inst.BO & 4) == 0) { - IREmitter::InstLoc c = ibuild.EmitLoadCTR(); - c = ibuild.EmitSub(c, ibuild.EmitIntConst(1)); - ibuild.EmitStoreCTR(c); - } - IREmitter::InstLoc test; - if ((inst.BO & 16) == 0) // Test a CR bit - { - IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); - IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); - test = ibuild.EmitAnd(CRReg, CRCmp); - if (!(inst.BO & 8)) - test = ibuild.EmitXor(test, CRCmp); - } else { - test = ibuild.EmitIntConst(1); - } - test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0)); - ibuild.EmitBranchCond(test, ibuild.EmitIntConst(js.compilerPC + 4)); - - IREmitter::InstLoc destination = ibuild.EmitLoadCTR(); - destination = ibuild.EmitAnd(destination, ibuild.EmitIntConst(-4)); - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - ibuild.EmitBranchUncond(destination); -} - diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.cpp b/Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.cpp deleted file mode 100644 index 882c4825c0..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.cpp +++ /dev/null @@ -1,492 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include "Core/PowerPC/JitInterface.h" -#include "Core/PowerPC/JitArmIL/JitIL.h" -#include "Core/PowerPC/JitArmIL/JitIL_Tables.h" - -// Should be moved in to the Jit class -typedef void (JitArmIL::*_Instruction) (UGeckoInstruction instCode); - -static _Instruction dynaOpTable[64]; -static _Instruction dynaOpTable4[1024]; -static _Instruction dynaOpTable19[1024]; -static _Instruction dynaOpTable31[1024]; -static _Instruction dynaOpTable59[32]; -static _Instruction dynaOpTable63[1024]; - -void JitArmIL::DynaRunTable4(UGeckoInstruction _inst) {(this->*dynaOpTable4 [_inst.SUBOP10])(_inst);} -void JitArmIL::DynaRunTable19(UGeckoInstruction _inst) {(this->*dynaOpTable19[_inst.SUBOP10])(_inst);} -void JitArmIL::DynaRunTable31(UGeckoInstruction _inst) {(this->*dynaOpTable31[_inst.SUBOP10])(_inst);} -void JitArmIL::DynaRunTable59(UGeckoInstruction _inst) {(this->*dynaOpTable59[_inst.SUBOP5 ])(_inst);} -void JitArmIL::DynaRunTable63(UGeckoInstruction _inst) {(this->*dynaOpTable63[_inst.SUBOP10])(_inst);} - -struct GekkoOPTemplate -{ - int opcode; - _Instruction Inst; - //GekkoOPInfo opinfo; // Doesn't need opinfo, Interpreter fills it out -}; - -static GekkoOPTemplate primarytable[] = -{ - {4, &JitArmIL::DynaRunTable4}, //"RunTable4", OPTYPE_SUBTABLE | (4<<24), 0}}, - {19, &JitArmIL::DynaRunTable19}, //"RunTable19", OPTYPE_SUBTABLE | (19<<24), 0}}, - {31, &JitArmIL::DynaRunTable31}, //"RunTable31", OPTYPE_SUBTABLE | (31<<24), 0}}, - {59, &JitArmIL::DynaRunTable59}, //"RunTable59", OPTYPE_SUBTABLE | (59<<24), 0}}, - {63, &JitArmIL::DynaRunTable63}, //"RunTable63", OPTYPE_SUBTABLE | (63<<24), 0}}, - - {16, &JitArmIL::bcx}, //"bcx", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - {18, &JitArmIL::bx}, //"bx", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - - {1, &JitArmIL::HLEFunction}, //"HLEFunction", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - {2, &JitArmIL::FallBackToInterpreter}, //"DynaBlock", OPTYPE_SYSTEM, 0}}, - {3, &JitArmIL::Break}, //"twi", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - {17, &JitArmIL::sc}, //"sc", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, - - {7, &JitArmIL::FallBackToInterpreter}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, - {8, &JitArmIL::FallBackToInterpreter}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, - {10, &JitArmIL::cmpXX}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, - {11, &JitArmIL::cmpXX}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, - {12, &JitArmIL::FallBackToInterpreter}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, - {13, &JitArmIL::FallBackToInterpreter}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, - {14, &JitArmIL::reg_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, - {15, &JitArmIL::reg_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, - - {20, &JitArmIL::FallBackToInterpreter}, //"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT}}, - {21, &JitArmIL::FallBackToInterpreter}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, - {23, &JitArmIL::FallBackToInterpreter}, //"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT}}, - - {24, &JitArmIL::reg_imm}, //"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, - {25, &JitArmIL::reg_imm}, //"oris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, - {26, &JitArmIL::reg_imm}, //"xori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, - {27, &JitArmIL::reg_imm}, //"xoris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, - {28, &JitArmIL::reg_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, - {29, &JitArmIL::reg_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, - - {32, &JitArmIL::FallBackToInterpreter}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {33, &JitArmIL::FallBackToInterpreter}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {34, &JitArmIL::FallBackToInterpreter}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {35, &JitArmIL::FallBackToInterpreter}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {40, &JitArmIL::FallBackToInterpreter}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {41, &JitArmIL::FallBackToInterpreter}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {42, &JitArmIL::FallBackToInterpreter}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {43, &JitArmIL::FallBackToInterpreter}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - - {44, &JitArmIL::FallBackToInterpreter}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {45, &JitArmIL::FallBackToInterpreter}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - {36, &JitArmIL::FallBackToInterpreter}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {37, &JitArmIL::FallBackToInterpreter}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - {38, &JitArmIL::FallBackToInterpreter}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {39, &JitArmIL::FallBackToInterpreter}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - - {46, &JitArmIL::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, - {47, &JitArmIL::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, - - {48, &JitArmIL::FallBackToInterpreter}, //"lfs", OPTYPE_LOADFP, FL_IN_A}}, - {49, &JitArmIL::FallBackToInterpreter}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, - {50, &JitArmIL::FallBackToInterpreter}, //"lfd", OPTYPE_LOADFP, FL_IN_A}}, - {51, &JitArmIL::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, - - {52, &JitArmIL::FallBackToInterpreter}, //"stfs", OPTYPE_STOREFP, FL_IN_A}}, - {53, &JitArmIL::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, - {54, &JitArmIL::FallBackToInterpreter}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, - {55, &JitArmIL::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, - - {56, &JitArmIL::FallBackToInterpreter}, //"psq_l", OPTYPE_PS, FL_IN_A}}, - {57, &JitArmIL::FallBackToInterpreter}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, - {60, &JitArmIL::FallBackToInterpreter}, //"psq_st", OPTYPE_PS, FL_IN_A}}, - {61, &JitArmIL::FallBackToInterpreter}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, - - //missing: 0, 5, 6, 9, 22, 30, 62, 58 - {0, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {5, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {6, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {9, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {22, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {30, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {62, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {58, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, -}; - -static GekkoOPTemplate table4[] = -{ //SUBOP10 - {0, &JitArmIL::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, - {32, &JitArmIL::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, - {40, &JitArmIL::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, - {136, &JitArmIL::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, - {264, &JitArmIL::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, - {64, &JitArmIL::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, - {72, &JitArmIL::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, - {96, &JitArmIL::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, - {528, &JitArmIL::FallBackToInterpreter}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, - {560, &JitArmIL::FallBackToInterpreter}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, - {592, &JitArmIL::FallBackToInterpreter}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}}, - {624, &JitArmIL::FallBackToInterpreter}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}}, - - {1014, &JitArmIL::FallBackToInterpreter}, //"dcbz_l", OPTYPE_SYSTEM, 0}}, -}; - -static GekkoOPTemplate table4_2[] = -{ - {10, &JitArmIL::FallBackToInterpreter}, //"ps_sum0", OPTYPE_PS, 0}}, - {11, &JitArmIL::FallBackToInterpreter}, //"ps_sum1", OPTYPE_PS, 0}}, - {12, &JitArmIL::FallBackToInterpreter}, //"ps_muls0", OPTYPE_PS, 0}}, - {13, &JitArmIL::FallBackToInterpreter}, //"ps_muls1", OPTYPE_PS, 0}}, - {14, &JitArmIL::FallBackToInterpreter}, //"ps_madds0", OPTYPE_PS, 0}}, - {15, &JitArmIL::FallBackToInterpreter}, //"ps_madds1", OPTYPE_PS, 0}}, - {18, &JitArmIL::FallBackToInterpreter}, //"ps_div", OPTYPE_PS, 0, 16}}, - {20, &JitArmIL::FallBackToInterpreter}, //"ps_sub", OPTYPE_PS, 0}}, - {21, &JitArmIL::FallBackToInterpreter}, //"ps_add", OPTYPE_PS, 0}}, - {23, &JitArmIL::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}}, - {24, &JitArmIL::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}}, - {25, &JitArmIL::FallBackToInterpreter}, //"ps_mul", OPTYPE_PS, 0}}, - {26, &JitArmIL::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, - {28, &JitArmIL::FallBackToInterpreter}, //"ps_msub", OPTYPE_PS, 0}}, - {29, &JitArmIL::FallBackToInterpreter}, //"ps_madd", OPTYPE_PS, 0}}, - {30, &JitArmIL::FallBackToInterpreter}, //"ps_nmsub", OPTYPE_PS, 0}}, - {31, &JitArmIL::FallBackToInterpreter}, //"ps_nmadd", OPTYPE_PS, 0}}, -}; - - -static GekkoOPTemplate table4_3[] = -{ - {6, &JitArmIL::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}}, - {7, &JitArmIL::FallBackToInterpreter}, //"psq_stx", OPTYPE_PS, 0}}, - {38, &JitArmIL::FallBackToInterpreter}, //"psq_lux", OPTYPE_PS, 0}}, - {39, &JitArmIL::FallBackToInterpreter}, //"psq_stux", OPTYPE_PS, 0}}, -}; - -static GekkoOPTemplate table19[] = -{ - {528, &JitArmIL::bcctrx}, //"bcctrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, - {16, &JitArmIL::bclrx}, //"bclrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, - {257, &JitArmIL::crXX}, //"crand", OPTYPE_CR, FL_EVIL}}, - {129, &JitArmIL::crXX}, //"crandc", OPTYPE_CR, FL_EVIL}}, - {289, &JitArmIL::crXX}, //"creqv", OPTYPE_CR, FL_EVIL}}, - {225, &JitArmIL::crXX}, //"crnand", OPTYPE_CR, FL_EVIL}}, - {33, &JitArmIL::crXX}, //"crnor", OPTYPE_CR, FL_EVIL}}, - {449, &JitArmIL::crXX}, //"cror", OPTYPE_CR, FL_EVIL}}, - {417, &JitArmIL::crXX}, //"crorc", OPTYPE_CR, FL_EVIL}}, - {193, &JitArmIL::crXX}, //"crxor", OPTYPE_CR, FL_EVIL}}, - - {150, &JitArmIL::FallBackToInterpreter}, //"isync", OPTYPE_ICACHE, FL_EVIL}}, - {0, &JitArmIL::FallBackToInterpreter}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}}, - - {50, &JitArmIL::rfi}, //"rfi", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 1}}, - {18, &JitArmIL::Break}, //"rfid", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS}} -}; - - -static GekkoOPTemplate table31[] = -{ - {28, &JitArmIL::boolX}, //"andx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {60, &JitArmIL::boolX}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {444, &JitArmIL::boolX}, //"orx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {124, &JitArmIL::boolX}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {316, &JitArmIL::boolX}, //"xorx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {412, &JitArmIL::boolX}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {476, &JitArmIL::boolX}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {284, &JitArmIL::boolX}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {0, &JitArmIL::cmpXX}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, - {32, &JitArmIL::cmpXX}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, - {26, &JitArmIL::FallBackToInterpreter}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, - {922, &JitArmIL::FallBackToInterpreter}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, - {954, &JitArmIL::FallBackToInterpreter}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, - {536, &JitArmIL::FallBackToInterpreter}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - {792, &JitArmIL::FallBackToInterpreter}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - {824, &JitArmIL::FallBackToInterpreter}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - {24, &JitArmIL::FallBackToInterpreter}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - - {54, &JitArmIL::FallBackToInterpreter}, //"dcbst", OPTYPE_DCACHE, 0, 4}}, - {86, &JitArmIL::FallBackToInterpreter}, //"dcbf", OPTYPE_DCACHE, 0, 4}}, - {246, &JitArmIL::FallBackToInterpreter}, //"dcbtst", OPTYPE_DCACHE, 0, 1}}, - {278, &JitArmIL::FallBackToInterpreter}, //"dcbt", OPTYPE_DCACHE, 0, 1}}, - {470, &JitArmIL::FallBackToInterpreter}, //"dcbi", OPTYPE_DCACHE, 0, 4}}, - {758, &JitArmIL::FallBackToInterpreter}, //"dcba", OPTYPE_DCACHE, 0, 4}}, - {1014, &JitArmIL::FallBackToInterpreter}, //"dcbz", OPTYPE_DCACHE, 0, 4}}, - - //load word - {23, &JitArmIL::FallBackToInterpreter}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {55, &JitArmIL::FallBackToInterpreter}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //load halfword - {279, &JitArmIL::FallBackToInterpreter}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {311, &JitArmIL::FallBackToInterpreter}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //load halfword signextend - {343, &JitArmIL::FallBackToInterpreter}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {375, &JitArmIL::FallBackToInterpreter}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //load byte - {87, &JitArmIL::FallBackToInterpreter}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {119, &JitArmIL::FallBackToInterpreter}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //load byte reverse - {534, &JitArmIL::FallBackToInterpreter}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {790, &JitArmIL::FallBackToInterpreter}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - - // Conditional load/store (Wii SMP) - {150, &JitArmIL::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}}, - {20, &JitArmIL::FallBackToInterpreter}, //"lwarx", OPTYPE_LOAD, FL_EVIL | FL_OUT_D | FL_IN_A0B | FL_SET_CR0}}, - - //load string (interpret these) - {533, &JitArmIL::FallBackToInterpreter}, //"lswx", OPTYPE_LOAD, FL_EVIL | FL_IN_A | FL_OUT_D}}, - {597, &JitArmIL::FallBackToInterpreter}, //"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}}, - - //store word - {151, &JitArmIL::FallBackToInterpreter}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {183, &JitArmIL::FallBackToInterpreter}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //store halfword - {407, &JitArmIL::FallBackToInterpreter}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {439, &JitArmIL::FallBackToInterpreter}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //store byte - {215, &JitArmIL::FallBackToInterpreter}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {247, &JitArmIL::FallBackToInterpreter}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //store bytereverse - {662, &JitArmIL::FallBackToInterpreter}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {918, &JitArmIL::FallBackToInterpreter}, //"sthbrx", OPTYPE_STORE, FL_IN_A | FL_IN_B}}, - - {661, &JitArmIL::FallBackToInterpreter}, //"stswx", OPTYPE_STORE, FL_EVIL}}, - {725, &JitArmIL::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}}, - - // fp load/store - {535, &JitArmIL::FallBackToInterpreter}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, - {567, &JitArmIL::FallBackToInterpreter}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, - {599, &JitArmIL::FallBackToInterpreter}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, - {631, &JitArmIL::FallBackToInterpreter}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, - - {663, &JitArmIL::FallBackToInterpreter}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {695, &JitArmIL::FallBackToInterpreter}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, - {727, &JitArmIL::FallBackToInterpreter}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {759, &JitArmIL::FallBackToInterpreter}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, - {983, &JitArmIL::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - - {19, &JitArmIL::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}}, - {83, &JitArmIL::FallBackToInterpreter}, //"mfmsr", OPTYPE_SYSTEM, FL_OUT_D}}, - {144, &JitArmIL::FallBackToInterpreter}, //"mtcrf", OPTYPE_SYSTEM, 0}}, - {146, &JitArmIL::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - {210, &JitArmIL::FallBackToInterpreter}, //"mtsr", OPTYPE_SYSTEM, 0}}, - {242, &JitArmIL::FallBackToInterpreter}, //"mtsrin", OPTYPE_SYSTEM, 0}}, - {339, &JitArmIL::FallBackToInterpreter}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, - {467, &JitArmIL::FallBackToInterpreter}, //"mtspr", OPTYPE_SPR, 0, 2}}, - {371, &JitArmIL::FallBackToInterpreter}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, - {512, &JitArmIL::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}}, - {595, &JitArmIL::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, - {659, &JitArmIL::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, - - {4, &JitArmIL::Break}, //"tw", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, - {598, &JitArmIL::FallBackToInterpreter}, //"sync", OPTYPE_SYSTEM, 0, 2}}, - {982, &JitArmIL::icbi}, //"icbi", OPTYPE_SYSTEM, FL_ENDBLOCK, 3}}, - - // Unused instructions on GC - {310, &JitArmIL::FallBackToInterpreter}, //"eciwx", OPTYPE_INTEGER, FL_RC_BIT}}, - {438, &JitArmIL::FallBackToInterpreter}, //"ecowx", OPTYPE_INTEGER, FL_RC_BIT}}, - {854, &JitArmIL::FallBackToInterpreter}, //"eieio", OPTYPE_INTEGER, FL_RC_BIT}}, - {306, &JitArmIL::FallBackToInterpreter}, //"tlbie", OPTYPE_SYSTEM, 0}}, - {370, &JitArmIL::FallBackToInterpreter}, //"tlbia", OPTYPE_SYSTEM, 0}}, - {566, &JitArmIL::FallBackToInterpreter}, //"tlbsync", OPTYPE_SYSTEM, 0}}, -}; - -static GekkoOPTemplate table31_2[] = -{ - {266, &JitArmIL::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {778, &JitArmIL::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {10, &JitArmIL::FallBackToInterpreter}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {522, &JitArmIL::FallBackToInterpreter}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {138, &JitArmIL::FallBackToInterpreter}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {650, &JitArmIL::FallBackToInterpreter}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {234, &JitArmIL::FallBackToInterpreter}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {202, &JitArmIL::FallBackToInterpreter}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {491, &JitArmIL::FallBackToInterpreter}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, - {1003, &JitArmIL::FallBackToInterpreter}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, - {459, &JitArmIL::FallBackToInterpreter}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, - {971, &JitArmIL::FallBackToInterpreter}, //"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, - {75, &JitArmIL::FallBackToInterpreter}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {11, &JitArmIL::FallBackToInterpreter}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {235, &JitArmIL::FallBackToInterpreter}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {747, &JitArmIL::FallBackToInterpreter}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {104, &JitArmIL::FallBackToInterpreter}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {40, &JitArmIL::FallBackToInterpreter}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {552, &JitArmIL::FallBackToInterpreter}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {8, &JitArmIL::FallBackToInterpreter}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {520, &JitArmIL::FallBackToInterpreter}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {136, &JitArmIL::FallBackToInterpreter}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {232, &JitArmIL::FallBackToInterpreter}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {200, &JitArmIL::FallBackToInterpreter}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, -}; - -static GekkoOPTemplate table59[] = -{ - {18, &JitArmIL::FallBackToInterpreter}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}}, - {20, &JitArmIL::FallBackToInterpreter}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {21, &JitArmIL::FallBackToInterpreter}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, -// {22, &JitArmIL::FallBackToInterpreter}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {24, &JitArmIL::FallBackToInterpreter}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}}, - {25, &JitArmIL::FallBackToInterpreter}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {28, &JitArmIL::FallBackToInterpreter}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {29, &JitArmIL::FallBackToInterpreter}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {30, &JitArmIL::FallBackToInterpreter}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {31, &JitArmIL::FallBackToInterpreter}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, -}; - -static GekkoOPTemplate table63[] = -{ - {264, &JitArmIL::FallBackToInterpreter}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {32, &JitArmIL::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, - {0, &JitArmIL::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, - {14, &JitArmIL::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, - {15, &JitArmIL::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, - {72, &JitArmIL::FallBackToInterpreter}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, - {136, &JitArmIL::FallBackToInterpreter}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {40, &JitArmIL::FallBackToInterpreter}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, - {12, &JitArmIL::FallBackToInterpreter}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}}, - - {64, &JitArmIL::FallBackToInterpreter}, //"mcrfs", OPTYPE_SYSTEMFP, 0}}, - {583, &JitArmIL::FallBackToInterpreter}, //"mffsx", OPTYPE_SYSTEMFP, 0}}, - {70, &JitArmIL::FallBackToInterpreter}, //"mtfsb0x", OPTYPE_SYSTEMFP, 0, 2}}, - {38, &JitArmIL::FallBackToInterpreter}, //"mtfsb1x", OPTYPE_SYSTEMFP, 0, 2}}, - {134, &JitArmIL::FallBackToInterpreter}, //"mtfsfix", OPTYPE_SYSTEMFP, 0, 2}}, - {711, &JitArmIL::FallBackToInterpreter}, //"mtfsfx", OPTYPE_SYSTEMFP, 0, 2}}, -}; - -static GekkoOPTemplate table63_2[] = -{ - {18, &JitArmIL::FallBackToInterpreter}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}}, - {20, &JitArmIL::FallBackToInterpreter}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {21, &JitArmIL::FallBackToInterpreter}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, - {22, &JitArmIL::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, - {23, &JitArmIL::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, - {25, &JitArmIL::FallBackToInterpreter}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, - {26, &JitArmIL::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, - {28, &JitArmIL::FallBackToInterpreter}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {29, &JitArmIL::FallBackToInterpreter}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, - {30, &JitArmIL::FallBackToInterpreter}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {31, &JitArmIL::FallBackToInterpreter}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, -}; - - -namespace JitArmILTables -{ - -void CompileInstruction(PPCAnalyst::CodeOp & op) -{ - JitArmIL *jitarm = (JitArmIL *)jit; - (jitarm->*dynaOpTable[op.inst.OPCD])(op.inst); - GekkoOPInfo *info = op.opinfo; - if (info) { -#ifdef OPLOG - if (!strcmp(info->opname, OP_TO_LOG)){ ///"mcrfs" - rsplocations.push_back(jit.js.compilerPC); - } -#endif - info->compileCount++; - info->lastUse = jit->js.compilerPC; - } -} - -void InitTables() -{ - // once initialized, tables are read-only - static bool initialized = false; - if (initialized) - return; - - //clear - for (int i = 0; i < 32; i++) - { - dynaOpTable59[i] = &JitArmIL::unknown_instruction; - } - - for (int i = 0; i < 1024; i++) - { - dynaOpTable4 [i] = &JitArmIL::unknown_instruction; - dynaOpTable19[i] = &JitArmIL::unknown_instruction; - dynaOpTable31[i] = &JitArmIL::unknown_instruction; - dynaOpTable63[i] = &JitArmIL::unknown_instruction; - } - - for (int i = 0; i < (int)(sizeof(primarytable) / sizeof(GekkoOPTemplate)); i++) - { - dynaOpTable[primarytable[i].opcode] = primarytable[i].Inst; - } - - for (int i = 0; i < 32; i++) - { - int fill = i << 5; - for (int j = 0; j < (int)(sizeof(table4_2) / sizeof(GekkoOPTemplate)); j++) - { - int op = fill+table4_2[j].opcode; - dynaOpTable4[op] = table4_2[j].Inst; - } - } - - for (int i = 0; i < 16; i++) - { - int fill = i << 6; - for (int j = 0; j < (int)(sizeof(table4_3) / sizeof(GekkoOPTemplate)); j++) - { - int op = fill+table4_3[j].opcode; - dynaOpTable4[op] = table4_3[j].Inst; - } - } - - for (int i = 0; i < (int)(sizeof(table4) / sizeof(GekkoOPTemplate)); i++) - { - int op = table4[i].opcode; - dynaOpTable4[op] = table4[i].Inst; - } - - for (int i = 0; i < (int)(sizeof(table31) / sizeof(GekkoOPTemplate)); i++) - { - int op = table31[i].opcode; - dynaOpTable31[op] = table31[i].Inst; - } - - for (int i = 0; i < 1; i++) - { - int fill = i << 9; - for (int j = 0; j < (int)(sizeof(table31_2) / sizeof(GekkoOPTemplate)); j++) - { - int op = fill + table31_2[j].opcode; - dynaOpTable31[op] = table31_2[j].Inst; - } - } - - for (int i = 0; i < (int)(sizeof(table19) / sizeof(GekkoOPTemplate)); i++) - { - int op = table19[i].opcode; - dynaOpTable19[op] = table19[i].Inst; - } - - for (int i = 0; i < (int)(sizeof(table59) / sizeof(GekkoOPTemplate)); i++) - { - int op = table59[i].opcode; - dynaOpTable59[op] = table59[i].Inst; - } - - for (int i = 0; i < (int)(sizeof(table63) / sizeof(GekkoOPTemplate)); i++) - { - int op = table63[i].opcode; - dynaOpTable63[op] = table63[i].Inst; - } - - for (int i = 0; i < 32; i++) - { - int fill = i << 5; - for (int j = 0; j < (int)(sizeof(table63_2) / sizeof(GekkoOPTemplate)); j++) - { - int op = fill + table63_2[j].opcode; - dynaOpTable63[op] = table63_2[j].Inst; - } - } - - initialized = true; - -} - -} // namespace diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.h b/Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.h deleted file mode 100644 index 7d92e6cf0b..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.h +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#pragma once - -#include "Core/PowerPC/Gekko.h" -#include "Core/PowerPC/PPCTables.h" - -namespace JitArmILTables -{ - void CompileInstruction(PPCAnalyst::CodeOp & op); - void InitTables(); -} diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp index c7515e5bfc..8bb10142d1 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp @@ -356,6 +356,31 @@ InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) { return getOp1(Op1); } } + if (Opcode == FastCRGTSet) + { + if (getOpcode(*Op1) == ICmpCRSigned) + return EmitICmpSgt(getOp1(Op1), getOp2(Op1)); + if (getOpcode(*Op1) == ICmpCRUnsigned) + return EmitICmpUgt(getOp1(Op1), getOp2(Op1)); + if (isImm(*Op1)) + return EmitIntConst((s64)GetImmValue64(Op1) > 0); + } + if (Opcode == FastCRLTSet) + { + if (getOpcode(*Op1) == ICmpCRSigned) + return EmitICmpSlt(getOp1(Op1), getOp2(Op1)); + if (getOpcode(*Op1) == ICmpCRUnsigned) + return EmitICmpUlt(getOp1(Op1), getOp2(Op1)); + if (isImm(*Op1)) + return EmitIntConst(!!(GetImmValue64(Op1) & (1ull << 62))); + } + if (Opcode == FastCREQSet) + { + if (getOpcode(*Op1) == ICmpCRSigned || getOpcode(*Op1) == ICmpCRUnsigned) + return EmitICmpEq(getOp1(Op1), getOp2(Op1)); + if (isImm(*Op1)) + return EmitIntConst((GetImmValue64(Op1) & 0xFFFFFFFFU) == 0); + } return EmitUOp(Opcode, Op1, extra); } @@ -778,6 +803,35 @@ InstLoc IRBuilder::FoldOr(InstLoc Op1, InstLoc Op2) { return EmitBiOp(Or, Op1, Op2); } +static unsigned ICmpInverseOp(unsigned op) +{ + switch (op) + { + case ICmpEq: + return ICmpNe; + case ICmpNe: + return ICmpEq; + case ICmpUlt: + return ICmpUge; + case ICmpUgt: + return ICmpUle; + case ICmpUle: + return ICmpUgt; + case ICmpUge: + return ICmpUlt; + case ICmpSlt: + return ICmpSge; + case ICmpSgt: + return ICmpSle; + case ICmpSle: + return ICmpSgt; + case ICmpSge: + return ICmpSlt; + } + PanicAlert("Bad opcode"); + return Nop; +} + InstLoc IRBuilder::FoldXor(InstLoc Op1, InstLoc Op2) { simplifyCommutative(Xor, Op1, Op2); @@ -794,6 +848,11 @@ InstLoc IRBuilder::FoldXor(InstLoc Op1, InstLoc Op2) { GetImmValue(getOp2(Op1)); return FoldXor(getOp1(Op1), EmitIntConst(RHS)); } + if (isICmp(getOpcode(*Op1)) && GetImmValue(Op2) == 1) + { + return FoldBiOp(ICmpInverseOp(getOpcode(*Op1)), getOp1(Op1), getOp2(Op1)); + + } } if (Op1 == Op2) return EmitIntConst(0); @@ -849,42 +908,6 @@ InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) { return EmitBranchUncond(Op2); return nullptr; } - if (getOpcode(*Op1) == And && - isImm(*getOp2(Op1)) && - getOpcode(*getOp1(Op1)) == ICmpCRSigned) { - unsigned branchValue = GetImmValue(getOp2(Op1)); - if (branchValue == 2) - return FoldBranchCond(EmitICmpEq(getOp1(getOp1(Op1)), - getOp2(getOp1(Op1))), Op2); - if (branchValue == 4) - return FoldBranchCond(EmitICmpSgt(getOp1(getOp1(Op1)), - getOp2(getOp1(Op1))), Op2); - if (branchValue == 8) - return FoldBranchCond(EmitICmpSlt(getOp1(getOp1(Op1)), - getOp2(getOp1(Op1))), Op2); - } - if (getOpcode(*Op1) == Xor && - isImm(*getOp2(Op1))) { - InstLoc XOp1 = getOp1(Op1); - unsigned branchValue = GetImmValue(getOp2(Op1)); - if (getOpcode(*XOp1) == And && - isImm(*getOp2(XOp1)) && - getOpcode(*getOp1(XOp1)) == ICmpCRSigned) { - unsigned innerBranchValue = - GetImmValue(getOp2(XOp1)); - if (branchValue == innerBranchValue) { - if (branchValue == 2) - return FoldBranchCond(EmitICmpNe(getOp1(getOp1(XOp1)), - getOp2(getOp1(XOp1))), Op2); - if (branchValue == 4) - return FoldBranchCond(EmitICmpSle(getOp1(getOp1(XOp1)), - getOp2(getOp1(XOp1))), Op2); - if (branchValue == 8) - return FoldBranchCond(EmitICmpSge(getOp1(getOp1(XOp1)), - getOp2(getOp1(XOp1))), Op2); - } - } - } return EmitBiOp(BranchCond, Op1, Op2); } @@ -967,16 +990,8 @@ InstLoc IRBuilder::FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2) { InstLoc IRBuilder::FoldICmpCRSigned(InstLoc Op1, InstLoc Op2) { if (isImm(*Op1)) { if (isImm(*Op2)) { - int c1 = (int)GetImmValue(Op1), - c2 = (int)GetImmValue(Op2), - result; - if (c1 == c2) - result = 2; - else if (c1 > c2) - result = 4; - else - result = 8; - return EmitIntConst(result); + s64 diff = (s64)(s32)GetImmValue(Op1) - (s64)(s32)GetImmValue(Op2); + return EmitIntConst64((u64)diff); } } return EmitBiOp(ICmpCRSigned, Op1, Op2); @@ -985,16 +1000,8 @@ InstLoc IRBuilder::FoldICmpCRSigned(InstLoc Op1, InstLoc Op2) { InstLoc IRBuilder::FoldICmpCRUnsigned(InstLoc Op1, InstLoc Op2) { if (isImm(*Op1)) { if (isImm(*Op2)) { - unsigned int c1 = GetImmValue(Op1), - c2 = GetImmValue(Op2), - result; - if (c1 == c2) - result = 2; - else if (c1 > c2) - result = 4; - else - result = 8; - return EmitIntConst(result); + u64 diff = (u64)GetImmValue(Op1) - (u64)GetImmValue(Op2); + return EmitIntConst64(diff); } } return EmitBiOp(ICmpCRUnsigned, Op1, Op2); @@ -1056,7 +1063,7 @@ InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned } } -InstLoc IRBuilder::EmitIntConst(unsigned value) { +InstLoc IRBuilder::EmitIntConst64(u64 value) { InstLoc curIndex = InstList.data() + InstList.size(); InstList.push_back(CInt32 | ((unsigned int)ConstList.size() << 8)); MarkUsed.push_back(false); @@ -1064,7 +1071,7 @@ InstLoc IRBuilder::EmitIntConst(unsigned value) { return curIndex; } -unsigned IRBuilder::GetImmValue(InstLoc I) const { +u64 IRBuilder::GetImmValue64(InstLoc I) const { return ConstList[*I >> 8]; } @@ -1129,9 +1136,9 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const { numberOfOperands[CInt16] = 0; numberOfOperands[CInt32] = 0; - static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, }; - static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, }; - static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, }; + static unsigned ZeroOp[] = { LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, }; + static unsigned UOp[] = { StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR, FastCRSOSet, FastCREQSet, FastCRGTSet, FastCRLTSet, }; + static unsigned BiOp[] = { BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, }; for (auto& op : ZeroOp) { numberOfOperands[op] = 0; } @@ -1235,10 +1242,12 @@ static std::unique_ptr writer; static const std::string opcodeNames[] = { "Nop", "LoadGReg", "LoadLink", "LoadCR", "LoadCarry", "LoadCTR", "LoadMSR", "LoadGQR", "SExt8", "SExt16", "BSwap32", "BSwap16", "Cntlzw", - "Not", "Load8", "Load16", "Load32", "BranchUncond", "StoreGReg", - "StoreCR", "StoreLink", "StoreCarry", "StoreCTR", "StoreMSR", "StoreFPRF", - "StoreGQR", "StoreSRR", "FallBackToInterpreter", "Add", "Mul", "And", "Or", - "Xor", "MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol", + "Not", "Load8", "Load16", "Load32", "BranchUncond", "ConvertFromFastCR", + "ConvertToFastCR", "StoreGReg", "StoreCR", "StoreLink", "StoreCarry", + "StoreCTR", "StoreMSR", "StoreFPRF", "StoreGQR", "StoreSRR", + "FastCRSOSet", "FastCREQSet", "FastCRGTSet", "FastCRLTSet", + "FallBackToInterpreter", "Add", "Mul", "And", "Or", "Xor", + "MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol", "ICmpCRSigned", "ICmpCRUnsigned", "ICmpEq", "ICmpNe", "ICmpUgt", "ICmpUlt", "ICmpUge", "ICmpUle", "ICmpSgt", "ICmpSlt", "ICmpSge", "ICmpSle", "Store8", "Store16", "Store32", "BranchCond", "FResult_Start", diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.h b/Source/Core/Core/PowerPC/JitILCommon/IR.h index f84f29fe02..58202ffd9b 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.h +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.h @@ -33,6 +33,9 @@ enum Opcode { Load8, // These loads zext Load16, Load32, + // CR conversions + ConvertFromFastCR, + ConvertToFastCR, // Branches BranchUncond, // Register store operators @@ -45,6 +48,11 @@ enum Opcode { StoreFPRF, StoreGQR, StoreSRR, + // Branch conditions + FastCRSOSet, + FastCREQSet, + FastCRGTSet, + FastCRLTSet, // Arbitrary interpreter instruction FallBackToInterpreter, @@ -74,6 +82,7 @@ enum Opcode { ICmpSlt, ICmpSge, ICmpSle, // Opposite of sgt + // Memory store operators Store8, Store16, @@ -237,7 +246,8 @@ private: unsigned ComputeKnownZeroBits(InstLoc I) const; public: - InstLoc EmitIntConst(unsigned value); + InstLoc EmitIntConst(unsigned value) { return EmitIntConst64(value); } + InstLoc EmitIntConst64(u64 value); InstLoc EmitStoreLink(InstLoc val) { return FoldUOp(StoreLink, val); } @@ -373,6 +383,24 @@ public: InstLoc EmitICmpCRUnsigned(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpCRUnsigned, op1, op2); } + InstLoc EmitConvertFromFastCR(InstLoc op1) { + return FoldUOp(ConvertFromFastCR, op1); + } + InstLoc EmitConvertToFastCR(InstLoc op1) { + return FoldUOp(ConvertToFastCR, op1); + } + InstLoc EmitFastCRSOSet(InstLoc op1) { + return FoldUOp(FastCRSOSet, op1); + } + InstLoc EmitFastCREQSet(InstLoc op1) { + return FoldUOp(FastCREQSet, op1); + } + InstLoc EmitFastCRLTSet(InstLoc op1) { + return FoldUOp(FastCRLTSet, op1); + } + InstLoc EmitFastCRGTSet(InstLoc op1) { + return FoldUOp(FastCRGTSet, op1); + } InstLoc EmitFallBackToInterpreter(InstLoc op1, InstLoc op2) { return FoldBiOp(FallBackToInterpreter, op1, op2); } @@ -532,7 +560,8 @@ public: InstLoc getFirstInst() { return InstList.data(); } unsigned int getNumInsts() { return (unsigned int)InstList.size(); } unsigned int ReadInst(InstLoc I) { return *I; } - unsigned int GetImmValue(InstLoc I) const; + unsigned int GetImmValue(InstLoc I) const { return (u32)GetImmValue64(I); } + u64 GetImmValue64(InstLoc I) const; void SetMarkUsed(InstLoc I); bool IsMarkUsed(InstLoc I) const; void WriteToFile(u64 codeHash); @@ -571,7 +600,7 @@ private: std::vector InstList; // FIXME: We must ensure this is continuous! std::vector MarkUsed; // Used for IRWriter - std::vector ConstList; + std::vector ConstList; InstLoc curReadPtr; InstLoc GRegCache[32]; InstLoc GRegCacheStore[32]; diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp index 85601cd0c5..c60080e6a9 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp @@ -61,15 +61,35 @@ void JitILBase::bx(UGeckoInstruction inst) ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination)); } +static IREmitter::InstLoc EmitCRTest(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) +{ + IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); + IREmitter::InstLoc CRTest; + switch (3 - (inst.BI & 3)) + { + case CR_SO_BIT: + CRTest = ibuild.EmitFastCRSOSet(CRReg); + break; + case CR_EQ_BIT: + CRTest = ibuild.EmitFastCREQSet(CRReg); + break; + case CR_GT_BIT: + CRTest = ibuild.EmitFastCRGTSet(CRReg); + break; + case CR_LT_BIT: + CRTest = ibuild.EmitFastCRLTSet(CRReg); + break; + } + if (!(inst.BO & 8)) + CRTest = ibuild.EmitXor(CRTest, ibuild.EmitIntConst(1)); + return CRTest; +} + static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) { IREmitter::InstLoc CRTest = nullptr, CTRTest = nullptr; if ((inst.BO & 16) == 0) // Test a CR bit { - IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); - IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); - CRTest = ibuild.EmitAnd(CRReg, CRCmp); - if (!(inst.BO & 8)) - CRTest = ibuild.EmitXor(CRCmp, CRTest); + CRTest = EmitCRTest(ibuild, inst); } if ((inst.BO & 4) == 0) { @@ -140,12 +160,10 @@ void JitILBase::bcctrx(UGeckoInstruction inst) IREmitter::InstLoc test; if ((inst.BO & 16) == 0) // Test a CR bit { - IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); - IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); - test = ibuild.EmitAnd(CRReg, CRCmp); - if (!(inst.BO & 8)) - test = ibuild.EmitXor(test, CRCmp); - } else { + test = EmitCRTest(ibuild, inst); + } + else + { test = ibuild.EmitIntConst(1); } test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0)); diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp index 3184fbc16f..b97740ce5e 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp @@ -86,7 +86,7 @@ void JitILBase::fcmpx(UGeckoInstruction inst) int ordered = (inst.SUBOP10 == 32) ? 1 : 0; res = ibuild.EmitFDCmpCR(lhs, rhs, ordered); ibuild.EmitStoreFPRF(res); - ibuild.EmitStoreCR(res, inst.CRFD); + ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), inst.CRFD); } void JitILBase::fsign(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp index dbd08d94bb..31c6ffa4b4 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp @@ -107,8 +107,10 @@ void JitILBase::mfcr(UGeckoInstruction inst) IREmitter::InstLoc d = ibuild.EmitIntConst(0); for (int i = 0; i < 8; ++i) { - d = ibuild.EmitShl(d, ibuild.EmitIntConst(4)); - d = ibuild.EmitOr(d, ibuild.EmitLoadCR(i)); + IREmitter::InstLoc cr = ibuild.EmitLoadCR(i); + cr = ibuild.EmitConvertFromFastCR(cr); + cr = ibuild.EmitShl(cr, ibuild.EmitIntConst(28 - 4 * i)); + d = ibuild.EmitOr(d, cr); } ibuild.EmitStoreGReg(d, inst.RD); } @@ -126,6 +128,7 @@ void JitILBase::mtcrf(UGeckoInstruction inst) IREmitter::InstLoc value; value = ibuild.EmitShrl(s, ibuild.EmitIntConst(28 - i * 4)); value = ibuild.EmitAnd(value, ibuild.EmitIntConst(0xF)); + value = ibuild.EmitConvertToFastCR(value); ibuild.EmitStoreCR(value, i); } } @@ -150,6 +153,7 @@ void JitILBase::crXX(UGeckoInstruction inst) // Get bit CRBA in EAX aligned with bit CRBD const int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); IREmitter::InstLoc eax = ibuild.EmitLoadCR(inst.CRBA >> 2); + eax = ibuild.EmitConvertFromFastCR(eax); if (shiftA < 0) eax = ibuild.EmitShl(eax, ibuild.EmitIntConst(-shiftA)); else if (shiftA > 0) @@ -158,6 +162,7 @@ void JitILBase::crXX(UGeckoInstruction inst) // Get bit CRBB in ECX aligned with bit CRBD const int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); IREmitter::InstLoc ecx = ibuild.EmitLoadCR(inst.CRBB >> 2); + ecx = ibuild.EmitConvertFromFastCR(ecx); if (shiftB < 0) ecx = ibuild.EmitShl(ecx, ibuild.EmitIntConst(-shiftB)); else if (shiftB > 0) @@ -211,7 +216,9 @@ void JitILBase::crXX(UGeckoInstruction inst) // Store result bit in CRBD eax = ibuild.EmitAnd(eax, ibuild.EmitIntConst(0x8 >> (inst.CRBD & 3))); IREmitter::InstLoc bd = ibuild.EmitLoadCR(inst.CRBD >> 2); + bd = ibuild.EmitConvertFromFastCR(bd); bd = ibuild.EmitAnd(bd, ibuild.EmitIntConst(~(0x8 >> (inst.CRBD & 3)))); bd = ibuild.EmitOr(bd, eax); + bd = ibuild.EmitConvertToFastCR(bd); ibuild.EmitStoreCR(bd, inst.CRBD >> 2); } diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index 8dc6eaefe2..e7c1d1ed2a 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -27,8 +27,6 @@ #if _M_ARM_32 #include "Core/PowerPC/JitArm32/Jit.h" #include "Core/PowerPC/JitArm32/JitArm_Tables.h" -#include "Core/PowerPC/JitArmIL/JitIL.h" -#include "Core/PowerPC/JitArmIL/JitIL_Tables.h" #endif static bool bFakeVMEM = false; @@ -67,11 +65,6 @@ namespace JitInterface ptr = new JitArm(); break; } - case 4: - { - ptr = new JitArmIL(); - break; - } #endif default: { @@ -106,11 +99,6 @@ namespace JitInterface JitArmTables::InitTables(); break; } - case 4: - { - JitArmILTables::InitTables(); - break; - } #endif default: { diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 9778c2234a..8b0f9d6362 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -41,10 +41,10 @@ PPCDebugInterface debug_interface; u32 CompactCR() { - u32 new_cr = ppcState.cr_fast[0] << 28; - for (int i = 1; i < 8; i++) + u32 new_cr = 0; + for (int i = 0; i < 8; i++) { - new_cr |= ppcState.cr_fast[i] << (28 - i * 4); + new_cr |= GetCRField(i) << (28 - i * 4); } return new_cr; } @@ -53,7 +53,7 @@ void ExpandCR(u32 cr) { for (int i = 0; i < 8; i++) { - ppcState.cr_fast[i] = (cr >> (28 - i * 4)) & 0xF; + SetCRField(i, (cr >> (28 - i * 4)) & 0xF); } } @@ -99,7 +99,8 @@ static void ResetRegisters() ppcState.pc = 0; ppcState.npc = 0; ppcState.Exceptions = 0; - ((u64*)(&ppcState.cr_fast[0]))[0] = 0; + for (auto& v : ppcState.cr_val) + v = 0x8000000000000001; TL = 0; TU = 0; diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 107c63da95..aa4e351d94 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -38,7 +38,20 @@ struct GC_ALIGNED64(PowerPCState) u32 pc; // program counter u32 npc; - u8 cr_fast[8]; // Possibly reorder to 0, 2, 4, 8, 1, 3, 5, 7 so that we can make Compact and Expand super fast? + // Optimized CR implementation. Instead of storing CR in its PowerPC format + // (4 bit value, SO/EQ/LT/GT), we store instead a 64 bit value for each of + // the 8 CR register parts. This 64 bit value follows this format: + // - SO iff. bit 61 is set + // - EQ iff. lower 32 bits == 0 + // - GT iff. (s64)cr_val > 0 + // - LT iff. bit 62 is set + // + // This has the interesting property that sign-extending the result of an + // operation from 32 to 64 bits results in a 64 bit value that works as a + // CR value. Checking each part of CR is also fast, as it is equivalent to + // testing one bit or the low 32 bit part of a register. And CR can still + // be manipulated bit by bit fairly easily. + u64 cr_val[8]; u32 msr; // machine specific register u32 fpscr; // floating point flags/status bits @@ -149,27 +162,63 @@ void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst); } // namespace -// Fast CR system - store them in single bytes instead of nibbles to not have to -// mask/shift them out. +enum CRBits +{ + CR_SO = 1, + CR_EQ = 2, + CR_GT = 4, + CR_LT = 8, -// These are intended to stay fast, probably become faster, and are not likely to slow down much if at all. + CR_SO_BIT = 0, + CR_EQ_BIT = 1, + CR_GT_BIT = 2, + CR_LT_BIT = 3, +}; + +// Convert between PPC and internal representation of CR. +inline u64 PPCCRToInternal(u8 value) +{ + u64 cr_val = 0x100000000; + cr_val |= (u64)!!(value & CR_SO) << 61; + cr_val |= (u64)!(value & CR_EQ); + cr_val |= (u64)!(value & CR_GT) << 63; + cr_val |= (u64)!!(value & CR_LT) << 62; + + return cr_val; +} + +// Warning: these CR operations are fairly slow since they need to convert from +// PowerPC format (4 bit) to our internal 64 bit format. See the definition of +// ppcState.cr_val for more explanations. inline void SetCRField(int cr_field, int value) { - PowerPC::ppcState.cr_fast[cr_field] = value; + PowerPC::ppcState.cr_val[cr_field] = PPCCRToInternal(value); } inline u32 GetCRField(int cr_field) { - return PowerPC::ppcState.cr_fast[cr_field]; + u64 cr_val = PowerPC::ppcState.cr_val[cr_field]; + u32 ppc_cr = 0; + + // SO + ppc_cr |= !!(cr_val & (1ull << 61)); + // EQ + ppc_cr |= ((cr_val & 0xFFFFFFFF) == 0) << 1; + // GT + ppc_cr |= ((s64)cr_val > 0) << 2; + // LT + ppc_cr |= !!(cr_val & (1ull << 62)) << 3; + + return ppc_cr; } inline u32 GetCRBit(int bit) { - return (PowerPC::ppcState.cr_fast[bit >> 2] >> (3 - (bit & 3))) & 1; + return (GetCRField(bit >> 2) >> (3 - (bit & 3))) & 1; } inline void SetCRBit(int bit, int value) { if (value & 1) - PowerPC::ppcState.cr_fast[bit >> 2] |= 0x8 >> (bit & 3); + SetCRField(bit >> 2, GetCRField(bit >> 2) | (0x8 >> (bit & 3))); else - PowerPC::ppcState.cr_fast[bit >> 2] &= ~(0x8 >> (bit & 3)); + SetCRField(bit >> 2, GetCRField(bit >> 2) & ~(0x8 >> (bit & 3))); } // SetCR and GetCR are fairly slow. Should be avoided if possible.