From d3aee2de0875d3a0fea312e06773f0a8692ce581 Mon Sep 17 00:00:00 2001 From: degasus Date: Wed, 25 Jan 2017 00:59:36 +0100 Subject: [PATCH] JitCache: Split off JIT call from dispatcher. This avoid flushing the BLR optimization stack on fast_block_cache misses. --- .../CachedInterpreter/CachedInterpreter.cpp | 6 +++ Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 24 ++++++++--- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 17 +++++--- .../Core/Core/PowerPC/JitCommon/JitBase.cpp | 2 +- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 2 +- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 40 +++++++++---------- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 2 +- 7 files changed, 58 insertions(+), 35 deletions(-) diff --git a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp index ed4c13e2c6..e97d29ff9c 100644 --- a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp +++ b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp @@ -36,6 +36,12 @@ void CachedInterpreter::Shutdown() void CachedInterpreter::ExecuteOneBlock() { const u8* normal_entry = m_block_cache.Dispatch(); + if (!normal_entry) + { + Jit(PC); + return; + } + const Instruction* code = reinterpret_cast(normal_entry); for (; code->type != Instruction::INSTRUCTION_ABORT; ++code) diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 81c02aadd0..1239570dbd 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -140,17 +140,14 @@ void Jit64AsmRoutineManager::Generate() // Failure, fallback to the C++ dispatcher for calling the JIT. } - // We reset the stack because Jit might clear the code cache. - // Also if we are in the middle of disabling BLR optimization on windows - // we need to reset the stack before _resetstkoflw() is called in Jit - // otherwise we will generate a second stack overflow exception during DoJit() - ResetStack(*this); - // Ok, no block, let's call the slow dispatcher ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunction(JitBase::Dispatch); ABI_PopRegistersAndAdjustStack({}, 0); + TEST(64, R(ABI_RETURN), R(ABI_RETURN)); + FixupBranch no_block_available = J_CC(CC_Z); + // Switch to the correct memory base, in case MSR.DR has changed. TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27))); FixupBranch physmem = J_CC(CC_Z); @@ -160,6 +157,21 @@ void Jit64AsmRoutineManager::Generate() MOV(64, R(RMEM), ImmPtr(Memory::physical_base)); JMPptr(R(ABI_RETURN)); + SetJumpTarget(no_block_available); + + // We reset the stack because Jit might clear the code cache. + // Also if we are in the middle of disabling BLR optimization on windows + // we need to reset the stack before _resetstkoflw() is called in Jit + // otherwise we will generate a second stack overflow exception during DoJit() + ResetStack(*this); + + ABI_PushRegistersAndAdjustStack({}, 0); + MOV(32, R(ABI_PARAM1), PPCSTATE(pc)); + ABI_CallFunction(JitTrampoline); + ABI_PopRegistersAndAdjustStack({}, 0); + + JMP(dispatcherNoCheck, true); + SetJumpTarget(bail); doTiming = GetCodePtr(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 3d2b236739..9daf386adb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -106,18 +106,25 @@ void JitArm64::GenerateAsm() MOVP2R(X30, reinterpret_cast(&JitBase::Dispatch)); BLR(X30); - // set the mem_base based on MSR flags + FixupBranch no_block_available = CBZ(X0); + + // set the mem_base based on MSR flags and jump to next block. LDR(INDEX_UNSIGNED, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr)); FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27); MOVP2R(MEM_REG, Memory::physical_base); - FixupBranch membaseend = B(); + BR(X0); SetJumpTarget(physmem); MOVP2R(MEM_REG, Memory::logical_base); - SetJumpTarget(membaseend); - - // Jump to next block. BR(X0); + // Call JIT + SetJumpTarget(no_block_available); + MOV(W0, DISPATCHER_PC); + MOVP2R(X30, reinterpret_cast(&JitTrampoline)); + BLR(X30); + LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); + B(dispatcherNoCheck); + SetJumpTarget(bail); doTiming = GetCodePtr(); // Write the current PC out to PPCSTATE diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index eba5152056..634331c15a 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -12,7 +12,7 @@ JitBase* g_jit; -void Jit(u32 em_address) +void JitTrampoline(u32 em_address) { g_jit->Jit(em_address); } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 8ee7990671..6a154922ff 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -125,7 +125,7 @@ public: virtual bool HandleStackFault() { return false; } }; -void Jit(u32 em_address); +void JitTrampoline(u32 em_address); // Merged routines that should be moved somewhere better u32 Helper_Mask(u8 mb, u8 me); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index f756328804..7faf3b1258 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -174,11 +174,11 @@ const u8* JitBaseBlockCache::Dispatch() { JitBlock* block = fast_block_map[FastLookupIndexForAddress(PC)]; - while (!block || block->effectiveAddress != PC || block->msrBits != (MSR & JIT_CACHE_MSR_MASK)) - { - MoveBlockIntoFastCache(PC, MSR & JIT_CACHE_MSR_MASK); - block = fast_block_map[FastLookupIndexForAddress(PC)]; - } + if (!block || block->effectiveAddress != PC || block->msrBits != (MSR & JIT_CACHE_MSR_MASK)) + block = MoveBlockIntoFastCache(PC, MSR & JIT_CACHE_MSR_MASK); + + if (!block) + return nullptr; return block->normalEntry; } @@ -349,25 +349,23 @@ void JitBaseBlockCache::DestroyBlock(JitBlock& block) WriteDestroyBlock(block); } -void JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr) +JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr) { JitBlock* block = GetBlockFromStartAddress(addr, msr); - if (!block) - { - Jit(addr); - } - else - { - // Drop old fast block map entry - if (fast_block_map[block->fast_block_map_index] == block) - fast_block_map[block->fast_block_map_index] = nullptr; - // And create a new one - size_t index = FastLookupIndexForAddress(addr); - fast_block_map[index] = block; - block->fast_block_map_index = index; - LinkBlock(*block); - } + if (!block) + return nullptr; + + // Drop old fast block map entry + if (fast_block_map[block->fast_block_map_index] == block) + fast_block_map[block->fast_block_map_index] = nullptr; + + // And create a new one + size_t index = FastLookupIndexForAddress(addr); + fast_block_map[index] = block; + block->fast_block_map_index = index; + + return block; } size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index ea053a3dfa..4d281ed9c8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -161,7 +161,7 @@ private: void UnlinkBlock(const JitBlock& block); void DestroyBlock(JitBlock& block); - void MoveBlockIntoFastCache(u32 em_address, u32 msr); + JitBlock* MoveBlockIntoFastCache(u32 em_address, u32 msr); // Fast but risky block lookup based on fast_block_map. size_t FastLookupIndexForAddress(u32 address);