diff --git a/Source/Core/Common/Src/ABI.cpp b/Source/Core/Common/Src/ABI.cpp index f565a77633..2dbe1c57bf 100644 --- a/Source/Core/Common/Src/ABI.cpp +++ b/Source/Core/Common/Src/ABI.cpp @@ -127,12 +127,20 @@ void XEmitter::ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2 void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) { ABI_AlignStack(2 * 4); - PUSH(32, arg1); PUSH(32, Imm32(param2)); + PUSH(32, arg1); CALL(func); ABI_RestoreStack(2 * 4); } +void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) +{ + ABI_AlignStack(1 * 4); + PUSH(32, arg1); + CALL(func); + ABI_RestoreStack(1 * 4); +} + void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { // Note: 4 * 4 = 16 bytes, so alignment is preserved. PUSH(EBP); diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index 36f5ea0b61..308c3021eb 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -600,6 +600,7 @@ public: void ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3); void ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3); void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2); + void ABI_CallFunctionA(void *func, const Gen::OpArg &arg1); // Pass a register as a paremeter. void ABI_CallFunctionR(void *func, Gen::X64Reg reg1); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index 4c320f4810..88f6bf563e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -300,6 +300,14 @@ void Jit64::Cleanup() void Jit64::WriteExit(u32 destination, int exit_num) { + // We are about to jump to the dispatcher => save and flush regs + RegCacheState regCacheStateGPR; + RegCacheState regCacheStateFPR; + gpr.SaveState(regCacheStateGPR); + fpr.SaveState(regCacheStateFPR); + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + Cleanup(); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); @@ -321,29 +329,69 @@ void Jit64::WriteExit(u32 destination, int exit_num) MOV(32, M(&PC), Imm32(destination)); JMP(asm_routines.dispatcher, true); } + + // Restore registers states so that the next instructions could still use the cached values + gpr.LoadState(regCacheStateGPR); + fpr.LoadState(regCacheStateFPR); } void Jit64::WriteExitDestInEAX(int exit_num) { + // We are about to jump to the dispatcher => save and flush regs + RegCacheState regCacheStateGPR; + RegCacheState regCacheStateFPR; + gpr.SaveState(regCacheStateGPR); + fpr.SaveState(regCacheStateFPR); + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + MOV(32, M(&PC), R(EAX)); Cleanup(); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); JMP(asm_routines.dispatcher, true); + + // Restore registers states so that the next instructions could still use the cached values + gpr.LoadState(regCacheStateGPR); + fpr.LoadState(regCacheStateFPR); } void Jit64::WriteRfiExitDestInEAX() { + // We are about to jump to the exception handler => save and flush regs + RegCacheState regCacheStateGPR; + RegCacheState regCacheStateFPR; + gpr.SaveState(regCacheStateGPR); + fpr.SaveState(regCacheStateFPR); + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + MOV(32, M(&PC), R(EAX)); Cleanup(); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); JMP(asm_routines.testExceptions, true); + + // Restore registers states so that the next instructions could still use the cached values + gpr.LoadState(regCacheStateGPR); + fpr.LoadState(regCacheStateFPR); } void Jit64::WriteExceptionExit() { + // We are about to jump to the exception handler => save and flush regs + RegCacheState regCacheStateGPR; + RegCacheState regCacheStateFPR; + gpr.SaveState(regCacheStateGPR); + fpr.SaveState(regCacheStateFPR); + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + Cleanup(); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); JMP(asm_routines.testExceptions, true); + + // Restore registers states so that the next instructions could still use the cached values + gpr.LoadState(regCacheStateGPR); + fpr.LoadState(regCacheStateFPR); } void STACKALIGN Jit64::Run() @@ -552,7 +600,15 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc { //This instruction uses FPU - needs to add FP exception bailout TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit - FixupBranch b1 = J_CC(CC_NZ); + FixupBranch b1 = J_CC(CC_NZ, true); + + // We are about to jump to the exception handler => save and flush regs + RegCacheState regCacheStateGPR; + RegCacheState regCacheStateFPR; + gpr.SaveState(regCacheStateGPR); + fpr.SaveState(regCacheStateFPR); + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); // If a FPU exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. @@ -560,18 +616,19 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); JMP(asm_routines.fpException, true); SetJumpTarget(b1); + + // Restore registers states + // (If no memory exception occured, the next instructions could still use the cached values) + gpr.LoadState(regCacheStateGPR); + fpr.LoadState(regCacheStateFPR); } Jit64Tables::CompileInstruction(ops[i]); if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) { - // In case we are about to jump to the dispatcher, flush regs - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); - FixupBranch noMemException = J_CC(CC_Z); + FixupBranch noMemException = J_CC(CC_Z, true); // If a memory exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. @@ -607,8 +664,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (broken_block) { - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); WriteExit(nextPC, 0); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index daaf15243b..198ee0233b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -236,8 +236,6 @@ public: void fmaddXX(UGeckoInstruction inst); void fsign(UGeckoInstruction inst); void stX(UGeckoInstruction inst); //stw sth stb - void lXz(UGeckoInstruction inst); - void lha(UGeckoInstruction inst); void rlwinmx(UGeckoInstruction inst); void rlwimix(UGeckoInstruction inst); void rlwnmx(UGeckoInstruction inst); @@ -254,12 +252,8 @@ public: void subfmex(UGeckoInstruction inst); void subfzex(UGeckoInstruction inst); - void lbzx(UGeckoInstruction inst); - void lwzx(UGeckoInstruction inst); - void lhax(UGeckoInstruction inst); + void lXXx(UGeckoInstruction inst); - void lwzux(UGeckoInstruction inst); - void stXx(UGeckoInstruction inst); void lmw(UGeckoInstruction inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp index ef50a62057..d55702767c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp @@ -77,14 +77,14 @@ static GekkoOPTemplate primarytable[] = {28, &Jit64::reg_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, {29, &Jit64::reg_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, - {32, &Jit64::lXz}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {33, &Jit64::Default}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {34, &Jit64::lXz}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {35, &Jit64::Default}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {40, &Jit64::lXz}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {41, &Jit64::Default}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {42, &Jit64::lha}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {43, &Jit64::Default}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {32, &Jit64::lXXx}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {33, &Jit64::lXXx}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {34, &Jit64::lXXx}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {35, &Jit64::lXXx}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {40, &Jit64::lXXx}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {41, &Jit64::lXXx}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {42, &Jit64::lXXx}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {43, &Jit64::lXXx}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {44, &Jit64::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, {45, &Jit64::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, @@ -220,20 +220,20 @@ static GekkoOPTemplate table31[] = {1014, &Jit64::dcbz}, //"dcbz", OPTYPE_DCACHE, 0, 4}}, //load word - {23, &Jit64::lwzx}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {55, &Jit64::lwzux}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + {23, &Jit64::lXXx}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {55, &Jit64::lXXx}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load halfword - {279, &Jit64::Default}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {311, &Jit64::Default}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + {279, &Jit64::lXXx}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {311, &Jit64::lXXx}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load halfword signextend - {343, &Jit64::lhax}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {375, &Jit64::Default}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + {343, &Jit64::lXXx}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {375, &Jit64::lXXx}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load byte - {87, &Jit64::lbzx}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {119, &Jit64::Default}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + {87, &Jit64::lXXx}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {119, &Jit64::lXXx}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load byte reverse {534, &Jit64::Default}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp index bfd9e4c1e4..1b3acaf94b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp @@ -28,18 +28,15 @@ using namespace PowerPC; RegCache::RegCache() : emit(0) { memset(locks, 0, sizeof(locks)); memset(xlocks, 0, sizeof(xlocks)); - memset(saved_locks, 0, sizeof(saved_locks)); - memset(saved_xlocks, 0, sizeof(saved_xlocks)); memset(regs, 0, sizeof(regs)); memset(xregs, 0, sizeof(xregs)); - memset(saved_regs, 0, sizeof(saved_regs)); - memset(saved_xregs, 0, sizeof(saved_xregs)); } void RegCache::Start(PPCAnalyst::BlockRegStats &stats) { for (int i = 0; i < NUMXREGS; i++) { + xregs[i].ppcReg = -1; xregs[i].free = true; xregs[i].dirty = false; xlocks[i] = false; @@ -48,6 +45,7 @@ void RegCache::Start(PPCAnalyst::BlockRegStats &stats) { regs[i].location = GetDefaultLocation(i); regs[i].away = false; + locks[i] = false; } // todo: sort to find the most popular regs @@ -137,20 +135,20 @@ X64Reg RegCache::GetFreeXReg() return (X64Reg) -1; } -void RegCache::SaveState() +void RegCache::SaveState(RegCacheState & state) { - memcpy(saved_locks, locks, sizeof(locks)); - memcpy(saved_xlocks, xlocks, sizeof(xlocks)); - memcpy(saved_regs, regs, sizeof(regs)); - memcpy(saved_xregs, xregs, sizeof(xregs)); + memcpy(state.locks, locks, sizeof(locks)); + memcpy(state.xlocks, xlocks, sizeof(xlocks)); + memcpy(state.regs, regs, sizeof(regs)); + memcpy(state.xregs, xregs, sizeof(xregs)); } -void RegCache::LoadState() +void RegCache::LoadState(const RegCacheState & state) { - memcpy(xlocks, saved_xlocks, sizeof(xlocks)); - memcpy(locks, saved_locks, sizeof(locks)); - memcpy(regs, saved_regs, sizeof(regs)); - memcpy(xregs, saved_xregs, sizeof(xregs)); + memcpy(xlocks, state.xlocks, sizeof(xlocks)); + memcpy(locks, state.locks, sizeof(locks)); + memcpy(regs, state.regs, sizeof(regs)); + memcpy(xregs, state.xregs, sizeof(xregs)); } void RegCache::FlushR(X64Reg reg) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h index cefa0e13f7..a2cce002ee 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h @@ -55,21 +55,17 @@ typedef int PReg; #define NUMXREGS 8 #endif -class RegCache +struct RegCacheState { -private: bool locks[32]; - bool saved_locks[32]; - bool saved_xlocks[NUMXREGS]; - -protected: bool xlocks[NUMXREGS]; PPCCachedReg regs[32]; X64CachedReg xregs[NUMXREGS]; +}; - PPCCachedReg saved_regs[32]; - X64CachedReg saved_xregs[NUMXREGS]; - +class RegCache : protected RegCacheState +{ +protected: virtual const int *GetAllocationOrder(int &count) = 0; XEmitter *emit; @@ -123,8 +119,8 @@ public: X64Reg GetFreeXReg(); - void SaveState(); - void LoadState(); + void SaveState(RegCacheState & state); + void LoadState(const RegCacheState & state); }; class GPRRegCache : public RegCache diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp index 146c1f0513..992c006e74 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp @@ -45,8 +45,6 @@ void Jit64::sc(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Branch) - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); MOV(32, M(&PC), Imm32(js.compilerPC + 4)); OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL)); WriteExceptionExit(); @@ -57,8 +55,6 @@ void Jit64::rfi(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Branch) - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); // See Interpreter rfi for details const u32 mask = 0x87C0FFFF; const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] @@ -79,8 +75,6 @@ void Jit64::bx(UGeckoInstruction inst) if (inst.LK) MOV(32, M(&LR), Imm32(js.compilerPC + 4)); - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); if (js.isLastInstruction) { @@ -120,17 +114,14 @@ void Jit64::bcx(UGeckoInstruction inst) // USES_CR _assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block"); - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { SUB(32, M(&CTR), Imm8(1)); if (inst.BO & BO_BRANCH_IF_CTR_0) - pCTRDontBranch = J_CC(CC_NZ); + pCTRDontBranch = J_CC(CC_NZ, true); else - pCTRDontBranch = J_CC(CC_Z); + pCTRDontBranch = J_CC(CC_Z, true); } FixupBranch pConditionDontBranch; @@ -138,9 +129,9 @@ void Jit64::bcx(UGeckoInstruction inst) { TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch - pConditionDontBranch = J_CC(CC_Z); + pConditionDontBranch = J_CC(CC_Z, true); else - pConditionDontBranch = J_CC(CC_NZ); + pConditionDontBranch = J_CC(CC_NZ, true); } if (inst.LK) @@ -165,9 +156,6 @@ void Jit64::bcctrx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Branch) - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - // bcctrx doesn't decrement and/or test CTR _dbg_assert_msg_(POWERPC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!"); @@ -196,7 +184,7 @@ void Jit64::bcctrx(UGeckoInstruction inst) branch = CC_Z; else branch = CC_NZ; - FixupBranch b = J_CC(branch, false); + FixupBranch b = J_CC(branch, true); MOV(32, R(EAX), M(&CTR)); AND(32, R(EAX), Imm32(0xFFFFFFFC)); //MOV(32, M(&PC), R(EAX)); => Already done in WriteExitDestInEAX() @@ -214,17 +202,14 @@ void Jit64::bclrx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Branch) - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { SUB(32, M(&CTR), Imm8(1)); if (inst.BO & BO_BRANCH_IF_CTR_0) - pCTRDontBranch = J_CC(CC_NZ); + pCTRDontBranch = J_CC(CC_NZ, true); else - pCTRDontBranch = J_CC(CC_Z); + pCTRDontBranch = J_CC(CC_Z, true); } FixupBranch pConditionDontBranch; @@ -232,9 +217,9 @@ void Jit64::bclrx(UGeckoInstruction inst) { TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch - pConditionDontBranch = J_CC(CC_Z); + pConditionDontBranch = J_CC(CC_Z, true); else - pConditionDontBranch = J_CC(CC_NZ); + pConditionDontBranch = J_CC(CC_NZ, true); } // This below line can be used to prove that blr "eats flags" in practice. diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index 81f7d388f5..a6bd23d1fe 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -264,9 +264,6 @@ void Jit64::cmpXX(UGeckoInstruction inst) { js.downcountAmount++; - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - int test_bit = 8 >> (js.next_inst.BI & 3); u8 conditionResult = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? test_bit : 0; if ((compareResult & test_bit) == conditionResult) @@ -359,21 +356,19 @@ void Jit64::cmpXX(UGeckoInstruction inst) // if (rand() & 1) // std::swap(destination1, destination2), condition = !condition; - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); FixupBranch pLesser = J_CC(less_than); FixupBranch pGreater = J_CC(greater_than); MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // == 0 - FixupBranch continue1 = J(); + FixupBranch continue1 = J(true); SetJumpTarget(pGreater); MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // > 0 - FixupBranch continue2 = J(); + FixupBranch continue2 = J(true); SetJumpTarget(pLesser); MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // < 0 FixupBranch continue3; - if (!!(8 & test_bit) == condition) continue3 = J(); + if (!!(8 & test_bit) == condition) continue3 = J(true); if (!!(4 & test_bit) != condition) SetJumpTarget(continue2); if (!!(2 & test_bit) != condition) SetJumpTarget(continue1); if (js.next_inst.OPCD == 16) // bcx diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index c007343583..f8c5a6986b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -34,96 +34,84 @@ #include "JitAsm.h" #include "JitRegCache.h" -void Jit64::lbzx(UGeckoInstruction inst) +void Jit64::lXXx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) + int a = inst.RA, b = inst.RB, d = inst.RD; - if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff) + // Skip disabled JIT instructions + if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff && (inst.OPCD == 31) && (inst.SUBOP10 == 87)) + { Default(inst); return; } + if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff && ((inst.OPCD == 34) || (inst.OPCD == 40) || (inst.OPCD == 32))) + { Default(inst); return; } + if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff && (inst.OPCD == 32)) { Default(inst); return; } - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(b)); - if (a) + // Determine memory access size and sign extend + int accessSize; + bool signExtend; + switch (inst.OPCD) { - ADD(32, R(ABI_PARAM1), gpr.R(a)); + case 32: /* lwz */ + case 33: /* lwzu */ + accessSize = 32; + signExtend = false; + break; + + case 34: /* lbz */ + case 35: /* lbzu */ + accessSize = 8; + signExtend = false; + break; + + case 40: /* lhz */ + case 41: /* lhzu */ + accessSize = 16; + signExtend = false; + break; + + case 42: /* lha */ + case 43: /* lhau */ + accessSize = 16; + signExtend = true; + break; + + case 31: + switch (inst.SUBOP10) + { + case 23: /* lwzx */ + case 55: /* lwzux */ + accessSize = 32; + signExtend = false; + break; + + case 87: /* lbzx */ + case 119: /* lbzux */ + accessSize = 8; + signExtend = false; + break; + case 279: /* lhzx */ + case 311: /* lhzux */ + accessSize = 16; + signExtend = false; + break; + + case 343: /* lhax */ + case 375: /* lhaux */ + accessSize = 16; + signExtend = true; + break; + + default: + PanicAlert("Invalid instruction"); + } + break; + + default: + PanicAlert("Invalid instruction"); } - SafeLoadRegToEAX(ABI_PARAM1, 8, 0); - - MEMCHECK_START - - gpr.KillImmediate(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - - MEMCHECK_END - - gpr.UnlockAllX(); -} - -void Jit64::lhax(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(b)); - if (a) - { - ADD(32, R(ABI_PARAM1), gpr.R(a)); - } - - // Some homebrew actually loads from a hw reg with this instruction - SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true); - - MEMCHECK_START - - gpr.KillImmediate(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - - MEMCHECK_END - - gpr.UnlockAllX(); -} - -void Jit64::lwzx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(b)); - if (a) - { - ADD(32, R(ABI_PARAM1), gpr.R(a)); - } - - SafeLoadRegToEAX(ABI_PARAM1, 32, 0); - - MEMCHECK_START - - gpr.KillImmediate(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - - MEMCHECK_END - - gpr.UnlockAllX(); -} - -void Jit64::lXz(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - - if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff) - { Default(inst); return; } - - int d = inst.RD; - int a = inst.RA; - // TODO(ector): Make it dynamically enable/disable idle skipping where appropriate // Will give nice boost to dual core mode // (mb2): I agree, @@ -144,23 +132,24 @@ void Jit64::lXz(UGeckoInstruction inst) // do our job at first s32 offset = (s32)(s16)inst.SIMM_16; - gpr.FlushLockX(ABI_PARAM1); gpr.Lock(d); - MOV(32, R(ABI_PARAM1), gpr.R(a)); - SafeLoadRegToEAX(ABI_PARAM1, 32, offset); + SafeLoadToEAX(gpr.R(a), accessSize, offset, signExtend); gpr.KillImmediate(d, false, true); MOV(32, gpr.R(d), R(EAX)); gpr.UnlockAll(); - gpr.UnlockAllX(); - - gpr.Flush(FLUSH_ALL); - + // if it's still 0, we can wait until the next event CMP(32, R(RAX), Imm32(0)); FixupBranch noIdle = J_CC(CC_NE); + // We are about to jump to the exception handler, save and flush regs + RegCacheState regCacheStateGPR; + RegCacheState regCacheStateFPR; + gpr.SaveState(regCacheStateGPR); + fpr.SaveState(regCacheStateFPR); gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL); + ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 @@ -169,112 +158,99 @@ void Jit64::lXz(UGeckoInstruction inst) SetJumpTarget(noIdle); + // Restore registers states so that the next instructions could still use the cached values + gpr.LoadState(regCacheStateGPR); + fpr.LoadState(regCacheStateFPR); + //js.compilerPC += 8; return; } - - // R2 always points to the small read-only data area. We could bake R2-relative loads into immediates. - // R13 always points to the small read/write data area. Not so exciting but at least could drop checks in 32-bit safe mode. - - s32 offset = (s32)(s16)inst.SIMM_16; - if (!a) - { - Default(inst); - return; - } - - int accessSize; - switch (inst.OPCD) - { - case 32: - accessSize = 32; - if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff) {Default(inst); return;} - break; //lwz - case 40: accessSize = 16; break; //lhz - case 34: accessSize = 8; break; //lbz - default: - //_assert_msg_(DYNA_REC, 0, "lXz: invalid access size"); - PanicAlert("lXz: invalid access size"); - return; - } - - if (accessSize == 32 && jo.enableFastMem && !Core::g_CoreStartupParameter.bMMU) + else if ((inst.OPCD == 32 /* lwz */) && accessSize == 32 && jo.enableFastMem && !Core::g_CoreStartupParameter.bMMU) { // Fast and daring + s32 offset = (s32)(s16)inst.SIMM_16; gpr.Lock(a, d); gpr.BindToRegister(a, true, false); gpr.BindToRegister(d, a == d, true); MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset)); BSWAP(32, gpr.R(d).GetSimpleReg()); gpr.UnlockAll(); + return; + } + + // Determine whether this instruction updates inst.RA + bool update; + if (inst.OPCD == 31) + update = ((inst.SUBOP10 & 0x20) != 0); + else + update = ((inst.OPCD & 1) != 0); + + // Prepare address operand + Gen::OpArg opAddress; + if (!update && !a) + { + if (inst.OPCD == 31) + { + gpr.Lock(b); + opAddress = gpr.R(b); + } + else + { + opAddress = Imm32((u32)(s32)inst.SIMM_16); + } + } + else if (update && ((a == 0) || (d == a))) + { + PanicAlert("Invalid instruction"); } else { - gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(a); - gpr.BindToRegister(a, true, false); - MOV(32, R(ABI_PARAM1), gpr.R(a)); - SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); - - MEMCHECK_START - - gpr.KillImmediate(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - - MEMCHECK_END - - gpr.UnlockAll(); - gpr.UnlockAllX(); + if ((inst.OPCD != 31) && gpr.R(a).IsImm()) + { + opAddress = Imm32((u32)gpr.R(a).offset + (s32)inst.SIMM_16); + } + else if ((inst.OPCD == 31) && gpr.R(a).IsImm() && gpr.R(b).IsImm()) + { + opAddress = Imm32((u32)gpr.R(a).offset + (u32)gpr.R(b).offset); + } + else + { + gpr.FlushLockX(ABI_PARAM1); + opAddress = R(ABI_PARAM1); + MOV(32, opAddress, gpr.R(a)); + + if (inst.OPCD == 31) + ADD(32, opAddress, gpr.R(b)); + else + ADD(32, opAddress, Imm32((u32)(s32)inst.SIMM_16)); + } } -} -void Jit64::lha(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) + SafeLoadToEAX(opAddress, accessSize, 0, signExtend); - int d = inst.RD; - int a = inst.RA; - s32 offset = (s32)(s16)inst.SIMM_16; - // Safe and boring - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(a)); - SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); - - MEMCHECK_START - - gpr.KillImmediate(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - - MEMCHECK_END - - gpr.UnlockAllX(); -} - -void Jit64::lwzux(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - - int a = inst.RA, b = inst.RB, d = inst.RD; - if (!a || a == d || a == b) + // We must flush immediate values from the following registers because + // they may change at runtime if no MMU exception has been raised + gpr.KillImmediate(d, true, true); + if (update) { - Default(inst); - return; + gpr.Lock(a); + gpr.BindToRegister(a, true, true); } - gpr.Lock(a); - gpr.BindToRegister(a, true, true); - ADD(32, gpr.R(a), gpr.R(b)); - MOV(32, R(EAX), gpr.R(a)); - SafeLoadRegToEAX(EAX, 32, 0, false); - + MEMCHECK_START - gpr.KillImmediate(d, false, true); + if (update) + { + if (inst.OPCD == 31) + ADD(32, gpr.R(a), gpr.R(b)); + else + ADD(32, gpr.R(a), Imm32((u32)(s32)inst.SIMM_16)); + } MOV(32, gpr.R(d), R(EAX)); MEMCHECK_END - + + gpr.UnlockAll(); gpr.UnlockAll(); } @@ -312,7 +288,7 @@ void Jit64::stX(UGeckoInstruction inst) bool update = inst.OPCD & 1; s32 offset = (s32)(s16)inst.SIMM_16; - if (a || update) + if (a || !update) { int accessSize; switch (inst.OPCD & ~1) @@ -323,18 +299,18 @@ void Jit64::stX(UGeckoInstruction inst) default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; } - if (gpr.R(a).IsImm()) + if ((a == 0) || gpr.R(a).IsImm()) { // If we already know the address through constant folding, we can do some // fun tricks... - u32 addr = (u32)gpr.R(a).offset; + u32 addr = ((a == 0) ? 0 : (u32)gpr.R(a).offset); addr += offset; if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) { - if (offset && update) - gpr.SetImmediate32(a, addr); gpr.FlushLockX(ABI_PARAM1); MOV(32, R(ABI_PARAM1), gpr.R(s)); + if (update) + gpr.SetImmediate32(a, addr); switch (accessSize) { // No need to protect these, they don't touch any state @@ -347,16 +323,27 @@ void Jit64::stX(UGeckoInstruction inst) gpr.UnlockAllX(); return; } - else if (Memory::IsRAMAddress(addr) && accessSize == 32) + else if (Memory::IsRAMAddress(addr)) { - if (offset && update) - gpr.SetImmediate32(a, addr); - MOV(accessSize, R(EAX), gpr.R(s)); + MOV(32, R(EAX), gpr.R(s)); BSWAP(accessSize, EAX); WriteToConstRamAddress(accessSize, R(EAX), addr); + if (update) + gpr.SetImmediate32(a, addr); + return; + } + else + { + switch (accessSize) + { + case 32: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), gpr.R(s), addr); break; + case 16: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), gpr.R(s), addr); break; + case 8: ABI_CallFunctionAC(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), gpr.R(s), addr); break; + } + if (update) + gpr.SetImmediate32(a, addr); return; } - // Other IO not worth the trouble. } // Optimized stack access? @@ -372,7 +359,7 @@ void Jit64::stX(UGeckoInstruction inst) AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX)); #endif - if (update) + if (update && offset) { gpr.Lock(a); gpr.KillImmediate(a, true, true); @@ -406,9 +393,9 @@ void Jit64::stX(UGeckoInstruction inst) if (update && offset) { + gpr.KillImmediate(a, true, true); MEMCHECK_START - gpr.KillImmediate(a, true, true); ADD(32, gpr.R(a), Imm32((u32)offset)); MEMCHECK_END @@ -419,7 +406,7 @@ void Jit64::stX(UGeckoInstruction inst) } else { - Default(inst); + PanicAlert("Invalid stX"); } } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 44160dc6b1..9f46a623be 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -62,15 +62,13 @@ void Jit64::lfs(UGeckoInstruction inst) return; } s32 offset = (s32)(s16)inst.SIMM_16; - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(a)); if (jo.assumeFPLoadFromMem) { - UnsafeLoadRegToReg(ABI_PARAM1, EAX, 32, offset, false); + UnsafeLoadToEAX(gpr.R(a), 32, offset, false); } else { - SafeLoadRegToEAX(ABI_PARAM1, 32, offset); + SafeLoadToEAX(gpr.R(a), 32, offset, false); } MEMCHECK_START @@ -83,7 +81,6 @@ void Jit64::lfs(UGeckoInstruction inst) MEMCHECK_END - gpr.UnlockAllX(); fpr.UnlockAll(); } @@ -299,9 +296,12 @@ void Jit64::stfs(UGeckoInstruction inst) ADD(32, R(ABI_PARAM2), Imm32(offset)); if (update && offset) { + // We must flush immediate values from the following register because + // it may take another value at runtime if no MMU exception has been raised + gpr.KillImmediate(a, true, true); + MEMCHECK_START - gpr.KillImmediate(a, false, true); MOV(32, gpr.R(a), R(ABI_PARAM2)); MEMCHECK_END diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp index b2c5487997..33468e6062 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -123,8 +123,6 @@ void Jit64::mtmsr(UGeckoInstruction inst) } MOV(32, M(&MSR), gpr.R(inst.RS)); gpr.UnlockAll(); - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); WriteExit(js.compilerPC + 4, 0); } // ============== diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp index b01cc4c517..930d9fc90f 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp @@ -71,6 +71,56 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i #endif } +void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend) +{ + if (opAddress.IsImm()) + { + #ifdef _M_IX86 + MOVZX(32, accessSize, EAX, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK))); + #else + MOVZX(32, accessSize, EAX, M(Memory::base + (u32)opAddress.offset + offset)); + #endif + } + else + { + #ifdef _M_IX86 + MOV(32, R(EAX), opAddress); + if (offset) + ADD(32, R(EAX), Imm32(offset)); + AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + MOVZX(32, accessSize, EAX, MDisp(EAX, (u32)Memory::base)); + #else + if (opAddress.IsSimpleReg()) + { + MOVZX(32, accessSize, EAX, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset)); + } + else + { + MOV(32, R(EAX), opAddress); + MOVZX(32, accessSize, EAX, MComplex(RBX, EAX, SCALE_1, offset)); + } + #endif + } + + if (accessSize == 32) + { + BSWAP(32, EAX); + } + else if (accessSize == 16) + { + BSWAP(32, EAX); + if (signExtend) + SAR(32, R(EAX), Imm8(16)); + else + SHR(32, R(EAX), Imm8(16)); + } + else if (signExtend) + { + // TODO: bake 8-bit into the original load. + MOVSX(32, accessSize, EAX, R(EAX)); + } +} + void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset, bool signExtend) { if (Core::g_CoreStartupParameter.bUseFastMem && (accessSize == 32 || accessSize == 8) && !Core::g_CoreStartupParameter.bMMU) @@ -112,6 +162,95 @@ void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset, } } +void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend) +{ + if (Core::g_CoreStartupParameter.bUseFastMem && (accessSize == 32 || accessSize == 8) && !Core::g_CoreStartupParameter.bMMU) + { + // FIXME: accessSize == 16 does not work. Breaks mkdd + UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + } + else + { + u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; + if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack) + { + mem_mask |= Memory::ADDR_MASK_MEM1; + } + + if (opAddress.IsImm()) + { + u32 address = (u32)opAddress.offset + offset; + if ((address & mem_mask) == 0) + { + UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + } + else + { + switch (accessSize) + { + case 32: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), address); break; + case 16: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), address); break; + case 8: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), address); break; + } + if (signExtend && accessSize < 32) + { + // Need to sign extend values coming from the Read_U* functions. + MOVSX(32, accessSize, EAX, R(EAX)); + } + } + } + else + { + if (offset) + { + MOV(32, R(EAX), opAddress); + ADD(32, R(EAX), Imm32(offset)); + TEST(32, R(EAX), Imm32(mem_mask)); + FixupBranch fast = J_CC(CC_Z); + + switch (accessSize) + { + case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), EAX); break; + case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), EAX); break; + case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), EAX); break; + } + if (signExtend && accessSize < 32) + { + // Need to sign extend values coming from the Read_U* functions. + MOVSX(32, accessSize, EAX, R(EAX)); + } + + FixupBranch exit = J(); + SetJumpTarget(fast); + UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + SetJumpTarget(exit); + } + else + { + TEST(32, opAddress, Imm32(mem_mask)); + FixupBranch fast = J_CC(CC_Z); + + switch (accessSize) + { + case 32: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), opAddress); break; + case 16: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), opAddress); break; + case 8: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), opAddress); break; + } + if (signExtend && accessSize < 32) + { + // Need to sign extend values coming from the Read_U* functions. + MOVSX(32, accessSize, EAX, R(EAX)); + } + + FixupBranch exit = J(); + SetJumpTarget(fast); + UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + SetJumpTarget(exit); + } + } + } +} + void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) { if (accessSize == 8 && reg_value >= 4) { diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h index 99ee795822..30848d7e97 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h @@ -27,8 +27,10 @@ public: void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset); void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true); + void UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false); void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true); + void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); // Trashes both inputs and EAX. void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr);