From a1655a0e3c771ab0d528249a4f9e9fde368c8029 Mon Sep 17 00:00:00 2001 From: Fiora Date: Thu, 14 Aug 2014 23:36:50 -0700 Subject: [PATCH] JIT: avoid saving the PC on every store Modify the backpatcher to store the PC in the trampolines. Should be ~3.5% faster overall (measured on POV-RAY benchmark). --- .../Core/Core/PowerPC/JitCommon/JitBackpatch.cpp | 15 +++++++++++++-- Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h | 2 +- Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 5 +++-- Source/Core/Core/PowerPC/JitCommon/Jit_Util.h | 1 + 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index 024198a303..047901ad46 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -95,7 +95,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re } // Extremely simplistic - just generate the requested trampoline. May reuse them in the future. -const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse) +const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc) { if (GetSpaceLeft() < 1024) PanicAlert("Trampoline cache full"); @@ -110,6 +110,9 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r // Don't treat FIFO writes specially for now because they require a burst // check anyway. + // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs + MOV(32, M(&PC), Imm32(pc)); + if (dataReg == ABI_PARAM2) PanicAlert("Incorrect use of SafeWriteRegToReg"); if (addrReg != ABI_PARAM1) @@ -220,6 +223,14 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) else { // TODO: special case FIFO writes. Also, support 32-bit mode. + it = pcAtLoc.find(codePtr); + if (it == pcAtLoc.end()) + { + PanicAlert("BackPatch: no pc entry for address %p", codePtr); + return nullptr; + } + + u32 pc = it->second; u8 *start; if (info.byteSwap) @@ -253,7 +264,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) start = codePtr - bswapSize; } XEmitter emitter(start); - const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse); + const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse, pc); emitter.CALL((void *)trampoline); int padding = codePtr + info.instructionSize - emitter.GetCodePtr(); if (padding > 0) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h index c17f98b7e0..3a573f7531 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h @@ -175,5 +175,5 @@ public: void Shutdown(); const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse); - const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse); + const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc); }; diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index f66aa387d5..cc233846a5 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -411,7 +411,6 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce #endif ) { - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write const u8* backpatchStart = GetCodePtr(); u8* mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_LOADSTORE_NO_SWAP)); int padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart); @@ -421,6 +420,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce } registersInUseAtLoc[mov] = registersInUse; + pcAtLoc[mov] = jit->js.compilerPC; return; } @@ -441,9 +441,10 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce } #endif - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write TEST(32, R(reg_addr), Imm32(mem_mask)); FixupBranch fast = J_CC(CC_Z, true); + // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs + MOV(32, M(&PC), Imm32(jit->js.compilerPC)); bool noProlog = (0 != (flags & SAFE_LOADSTORE_NO_PROLOG)); bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index 613b92c1db..af68493571 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -63,4 +63,5 @@ public: void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src); protected: std::unordered_map registersInUseAtLoc; + std::unordered_map pcAtLoc; };