JIT: avoid saving the PC on every store

Modify the backpatcher to store the PC in the trampolines.
Should be ~3.5% faster overall (measured on POV-RAY benchmark).
This commit is contained in:
Fiora 2014-08-14 23:36:50 -07:00
parent 15a3b30e27
commit a1655a0e3c
4 changed files with 18 additions and 5 deletions

View File

@ -95,7 +95,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
} }
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future. // Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse) const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc)
{ {
if (GetSpaceLeft() < 1024) if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full"); PanicAlert("Trampoline cache full");
@ -110,6 +110,9 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
// Don't treat FIFO writes specially for now because they require a burst // Don't treat FIFO writes specially for now because they require a burst
// check anyway. // check anyway.
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, M(&PC), Imm32(pc));
if (dataReg == ABI_PARAM2) if (dataReg == ABI_PARAM2)
PanicAlert("Incorrect use of SafeWriteRegToReg"); PanicAlert("Incorrect use of SafeWriteRegToReg");
if (addrReg != ABI_PARAM1) if (addrReg != ABI_PARAM1)
@ -220,6 +223,14 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
else else
{ {
// TODO: special case FIFO writes. Also, support 32-bit mode. // TODO: special case FIFO writes. Also, support 32-bit mode.
it = pcAtLoc.find(codePtr);
if (it == pcAtLoc.end())
{
PanicAlert("BackPatch: no pc entry for address %p", codePtr);
return nullptr;
}
u32 pc = it->second;
u8 *start; u8 *start;
if (info.byteSwap) if (info.byteSwap)
@ -253,7 +264,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
start = codePtr - bswapSize; start = codePtr - bswapSize;
} }
XEmitter emitter(start); XEmitter emitter(start);
const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse); const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse, pc);
emitter.CALL((void *)trampoline); emitter.CALL((void *)trampoline);
int padding = codePtr + info.instructionSize - emitter.GetCodePtr(); int padding = codePtr + info.instructionSize - emitter.GetCodePtr();
if (padding > 0) if (padding > 0)

View File

@ -175,5 +175,5 @@ public:
void Shutdown(); void Shutdown();
const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse); const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse);
const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse); const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc);
}; };

View File

@ -411,7 +411,6 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
#endif #endif
) )
{ {
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
const u8* backpatchStart = GetCodePtr(); const u8* backpatchStart = GetCodePtr();
u8* mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_LOADSTORE_NO_SWAP)); u8* mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_LOADSTORE_NO_SWAP));
int padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart); int padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
@ -421,6 +420,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
} }
registersInUseAtLoc[mov] = registersInUse; registersInUseAtLoc[mov] = registersInUse;
pcAtLoc[mov] = jit->js.compilerPC;
return; return;
} }
@ -441,9 +441,10 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
} }
#endif #endif
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
TEST(32, R(reg_addr), Imm32(mem_mask)); TEST(32, R(reg_addr), Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z, true); FixupBranch fast = J_CC(CC_Z, true);
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
bool noProlog = (0 != (flags & SAFE_LOADSTORE_NO_PROLOG)); bool noProlog = (0 != (flags & SAFE_LOADSTORE_NO_PROLOG));
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); ABI_PushRegistersAndAdjustStack(registersInUse, noProlog);

View File

@ -63,4 +63,5 @@ public:
void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src); void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
protected: protected:
std::unordered_map<u8 *, u32> registersInUseAtLoc; std::unordered_map<u8 *, u32> registersInUseAtLoc;
std::unordered_map<u8 *, u32> pcAtLoc;
}; };