From 54e26f64c6c610b3503e8d9f01781c0cca875abe Mon Sep 17 00:00:00 2001 From: Fiora Date: Sun, 31 Aug 2014 20:08:51 -0700 Subject: [PATCH] JIT: use far code emitter in more places --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 6 ++- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 8 +-- Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp | 4 +- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 52 +++++++++++++------ Source/Core/Core/PowerPC/JitCommon/Jit_Util.h | 7 ++- 5 files changed, 53 insertions(+), 24 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index e0b1fdffd7..b243e6bb90 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -167,11 +167,13 @@ void Jit64::Init() trampolines.Init(); AllocCodeSpace(CODE_SIZE); - farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE); - blocks.Init(); asm_routines.Init(); + // important: do this *after* generating the global asm routines, because we can't use farcode in them. + // it'll crash because the farcode functions get cleared on JIT clears. + farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE); + code_block.m_stats = &js.st; code_block.m_gpa = &js.gpa; code_block.m_fpa = &js.fpa; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index a31da48fa9..853bae8702 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -279,18 +279,20 @@ void Jit64::dcbz(UGeckoInstruction inst) ADD(32, R(RSCRATCH), gpr.R(a)); AND(32, R(RSCRATCH), Imm32(~31)); TEST(32, R(RSCRATCH), Imm32(mem_mask)); - FixupBranch fast = J_CC(CC_Z, true); + FixupBranch slow = J_CC(CC_NZ, true); // Should this code ever run? I can't find any games that use DCBZ on non-physical addresses, but // supposedly there are, at least for some MMU titles. Let's be careful and support it to be sure. + SwitchToFarCode(); + SetJumpTarget(slow); MOV(32, M(&PC), Imm32(jit->js.compilerPC)); u32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH); ABI_PopRegistersAndAdjustStack(registersInUse, 0); + FixupBranch exit = J(true); - FixupBranch exit = J(); - SetJumpTarget(fast); + SwitchToNearCode(); PXOR(XMM0, R(XMM0)); MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0); MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0); diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index da8612c046..81260249c7 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -271,11 +271,11 @@ void JitIL::Init() trampolines.Init(); AllocCodeSpace(CODE_SIZE); - farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE); - blocks.Init(); asm_routines.Init(); + farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE); + code_block.m_stats = &js.st; code_block.m_gpa = &js.gpa; code_block.m_fpa = &js.fpa; diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index ae445d8575..c0041173ed 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -351,7 +351,14 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, } TEST(32, addr_loc, Imm32(mem_mask)); - FixupBranch fast = J_CC(CC_Z, true); + FixupBranch slow, exit; + slow = J_CC(CC_NZ, farcode.Enabled()); + UnsafeLoadToReg(reg_value, addr_loc, accessSize, 0, signExtend); + if (farcode.Enabled()) + SwitchToFarCode(); + else + exit = J(true); + SetJumpTarget(slow); size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0; ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment); @@ -385,10 +392,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, } MEMCHECK_END - - FixupBranch exit = J(); - SetJumpTarget(fast); - UnsafeLoadToReg(reg_value, addr_loc, accessSize, 0, signExtend); + if (farcode.Enabled()) + { + exit = J(true); + SwitchToNearCode(); + } SetJumpTarget(exit); } } @@ -470,12 +478,21 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce } #endif + bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); + + FixupBranch slow, exit; TEST(32, R(reg_addr), Imm32(mem_mask)); - FixupBranch fast = J_CC(CC_Z, true); + slow = J_CC(CC_NZ, farcode.Enabled()); + UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); + if (farcode.Enabled()) + SwitchToFarCode(); + else + exit = J(true); + SetJumpTarget(slow); // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); + size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0; - bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment); switch (accessSize) { @@ -493,9 +510,11 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce break; } ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); - FixupBranch exit = J(); - SetJumpTarget(fast); - UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); + if (farcode.Enabled()) + { + exit = J(true); + SwitchToNearCode(); + } SetJumpTarget(exit); } @@ -659,15 +678,17 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) // to save an instruction, since diverting a few more floats to the slow path can't hurt much. SUB(8, R(RSCRATCH), Imm8(0x6D)); CMP(8, R(RSCRATCH), Imm8(0x3)); - FixupBranch x87Conversion = J_CC(CC_BE); + FixupBranch x87Conversion = J_CC(CC_BE, true); CVTSD2SS(dst, R(src)); - FixupBranch continue1 = J(); + SwitchToFarCode(); SetJumpTarget(x87Conversion); MOVSD(M(&temp64), src); FLD(64, M(&temp64)); FSTP(32, M(&temp32)); MOVSS(dst, M(&temp32)); + FixupBranch continue1 = J(true); + SwitchToNearCode(); SetJumpTarget(continue1); // We'd normally need to MOVDDUP here to put the single in the top half of the output register too, but @@ -696,16 +717,17 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr // through the slow path (0x00800000), but the performance effects of that should be negligible. SUB(32, R(gprsrc), Imm8(1)); TEST(32, R(gprsrc), Imm32(0x7f800000)); - - FixupBranch x87Conversion = J_CC(CC_Z); + FixupBranch x87Conversion = J_CC(CC_Z, true); CVTSS2SD(dst, R(dst)); - FixupBranch continue1 = J(); + SwitchToFarCode(); SetJumpTarget(x87Conversion); MOVSS(M(&temp32), dst); FLD(32, M(&temp32)); FSTP(64, M(&temp64)); MOVSD(dst, M(&temp64)); + FixupBranch continue1 = J(true); + SwitchToNearCode(); SetJumpTarget(continue1); MOVDDUP(dst, R(dst)); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index 1eab197fda..0fed69871c 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -36,9 +36,12 @@ namespace MMIO { class Mapping; } // exception branches. class FarCodeCache : public Gen::X64CodeBlock { +private: + bool m_enabled = false; public: - void Init(int size) { AllocCodeSpace(size); } - void Shutdown() { FreeCodeSpace(); } + bool Enabled() { return m_enabled; } + void Init(int size) { AllocCodeSpace(size); m_enabled = true; } + void Shutdown() { FreeCodeSpace(); m_enabled = false; } }; // Like XCodeBlock but has some utilities for memory access.