From c9c5437cb836c44e862f99fc5c4a879a5b4fb966 Mon Sep 17 00:00:00 2001 From: degasus Date: Tue, 17 Jan 2017 22:59:03 +0100 Subject: [PATCH 1/8] JitCache: Fix links_to updates on destroying blocks. --- Source/Core/Core/PowerPC/JitCommon/JitCache.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index aebff21af8..8da453eef9 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -325,13 +325,16 @@ void JitBaseBlockCache::DestroyBlock(JitBlock& block, bool invalidate) UnlinkBlock(block); // Delete linking addresses - auto it = links_to.equal_range(block.effectiveAddress); - while (it.first != it.second) + for (const auto& e : block.linkData) { - if (it.first->second == &block) - it.first = links_to.erase(it.first); - else - it.first++; + auto it = links_to.equal_range(e.exitAddress); + while (it.first != it.second) + { + if (it.first->second == &block) + it.first = links_to.erase(it.first); + else + it.first++; + } } // Raise an signal if we are going to call this block again From 352909fc4c02573983aadb7cccd704b0aa3d51e4 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 12 Jan 2017 09:56:05 +0100 Subject: [PATCH 2/8] JitCache: Track the ICache entry of jit blocks. This guarantees that no invalidated jit block is still in the icache. --- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 25 +++++++++++++------ Source/Core/Core/PowerPC/JitCommon/JitCache.h | 6 ++++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 8da453eef9..a108a3fdb6 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -118,6 +118,7 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) b.physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address; b.msrBits = MSR & JitBlock::JIT_CACHE_MSR_MASK; b.linkData.clear(); + b.in_icache = 0; num_blocks++; // commit the current block return &b; } @@ -136,7 +137,9 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8 DestroyBlock(old_b, true); } start_block_map[block.physicalAddress] = █ - FastLookupEntryForAddress(block.effectiveAddress) = █ + size_t icache = FastLookupEntryForAddress(block.effectiveAddress); + iCache[icache] = █ + block.in_icache = icache; u32 pAddr = block.physicalAddress; @@ -184,13 +187,13 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr) const u8* JitBaseBlockCache::Dispatch() { - JitBlock* block = FastLookupEntryForAddress(PC); + JitBlock* block = iCache[FastLookupEntryForAddress(PC)]; while (!block || block->effectiveAddress != PC || block->msrBits != (MSR & JitBlock::JIT_CACHE_MSR_MASK)) { MoveBlockIntoFastCache(PC, MSR & JitBlock::JIT_CACHE_MSR_MASK); - block = FastLookupEntryForAddress(PC); + block = iCache[FastLookupEntryForAddress(PC)]; } return block->normalEntry; @@ -320,7 +323,8 @@ void JitBaseBlockCache::DestroyBlock(JitBlock& block, bool invalidate) } block.invalid = true; start_block_map.erase(block.physicalAddress); - FastLookupEntryForAddress(block.effectiveAddress) = nullptr; + if (iCache[block.in_icache] == &block) + iCache[block.in_icache] = nullptr; UnlinkBlock(block); @@ -350,12 +354,19 @@ void JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr) } else { - FastLookupEntryForAddress(addr) = block; + // Drop old icache entry + if (iCache[block->in_icache] == block) + iCache[block->in_icache] = nullptr; + + // And create a new one + size_t icache = FastLookupEntryForAddress(addr); + iCache[icache] = block; + block->in_icache = icache; LinkBlock(*block); } } -JitBlock*& JitBaseBlockCache::FastLookupEntryForAddress(u32 address) +size_t JitBaseBlockCache::FastLookupEntryForAddress(u32 address) { - return iCache[(address >> 2) & iCache_Mask]; + return (address >> 2) & iCache_Mask; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 8c3f141c90..d0bcd07bf2 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -74,6 +74,10 @@ struct JitBlock u64 ticStart; // for profiling - time. u64 ticStop; // for profiling - time. u64 ticCounter; // for profiling - time. + + // This tracks the position if this block within the icache. + // We allow each block to have one icache entry. + size_t in_icache; }; typedef void (*CompiledCode)(); @@ -163,7 +167,7 @@ private: void MoveBlockIntoFastCache(u32 em_address, u32 msr); // Fast but risky block lookup based on iCache. - JitBlock*& FastLookupEntryForAddress(u32 address); + size_t FastLookupEntryForAddress(u32 address); // We store the metadata of all blocks in a linear way within this array. // Note: blocks[0] must not be used as it is referenced as invalid block in iCache. From 9d58127dec42c4bc57cdb746eb178abddc9a0f1e Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 15 Jan 2017 13:23:46 +0100 Subject: [PATCH 3/8] JitCache: Move JitBlock config variable. --- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 2 +- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 2 +- Source/Core/Core/PowerPC/JitCommon/JitCache.cpp | 9 ++++----- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 11 ++++------- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index d0db731de4..310c85089f 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -126,7 +126,7 @@ void Jit64AsmRoutineManager::Generate() // Check both block.effectiveAddress and block.msrBits. MOV(32, R(RSCRATCH2), PPCSTATE(msr)); - AND(32, R(RSCRATCH2), Imm32(JitBlock::JIT_CACHE_MSR_MASK)); + AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK)); SHL(64, R(RSCRATCH2), Imm8(32)); MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc)); OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 20921b2b70..7f323ebe6d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -88,7 +88,7 @@ void JitArm64::GenerateAsm() FixupBranch pc_missmatch = B(CC_NEQ); LDR(INDEX_UNSIGNED, pc_and_msr2, PPC_REG, PPCSTATE_OFF(msr)); - ANDI2R(pc_and_msr2, pc_and_msr2, JitBlock::JIT_CACHE_MSR_MASK); + ANDI2R(pc_and_msr2, pc_and_msr2, JitBaseBlockCache::JIT_CACHE_MSR_MASK); LDR(INDEX_UNSIGNED, pc_and_msr, block, offsetof(JitBlock, msrBits)); CMP(pc_and_msr, pc_and_msr2); FixupBranch msr_missmatch = B(CC_NEQ); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index a108a3fdb6..f8dabcef69 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -116,7 +116,7 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) b.invalid = false; b.effectiveAddress = em_address; b.physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address; - b.msrBits = MSR & JitBlock::JIT_CACHE_MSR_MASK; + b.msrBits = MSR & JIT_CACHE_MSR_MASK; b.linkData.clear(); b.in_icache = 0; num_blocks++; // commit the current block @@ -180,7 +180,7 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr) JitBlock* b = map_result->second; if (b->invalid || b->effectiveAddress != addr || - b->msrBits != (msr & JitBlock::JIT_CACHE_MSR_MASK)) + b->msrBits != (msr & JIT_CACHE_MSR_MASK)) return nullptr; return b; } @@ -189,10 +189,9 @@ const u8* JitBaseBlockCache::Dispatch() { JitBlock* block = iCache[FastLookupEntryForAddress(PC)]; - while (!block || block->effectiveAddress != PC || - block->msrBits != (MSR & JitBlock::JIT_CACHE_MSR_MASK)) + while (!block || block->effectiveAddress != PC || block->msrBits != (MSR & JIT_CACHE_MSR_MASK)) { - MoveBlockIntoFastCache(PC, MSR & JitBlock::JIT_CACHE_MSR_MASK); + MoveBlockIntoFastCache(PC, MSR & JIT_CACHE_MSR_MASK); block = iCache[FastLookupEntryForAddress(PC)]; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index d0bcd07bf2..d9f3e9944d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -24,13 +24,6 @@ class JitBase; // address. struct JitBlock { - enum - { - // Mask for the MSR bits which determine whether a compiled block - // is valid (MSR.IR and MSR.DR, the address translation bits). - JIT_CACHE_MSR_MASK = 0x30, - }; - // A special entry point for block linking; usually used to check the // downcount. const u8* checkedEntry; @@ -115,6 +108,10 @@ public: class JitBaseBlockCache { public: + // Mask for the MSR bits which determine whether a compiled block + // is valid (MSR.IR and MSR.DR, the address translation bits). + static constexpr u32 JIT_CACHE_MSR_MASK = 0x30; + static constexpr int MAX_NUM_BLOCKS = 65536 * 2; static constexpr u32 iCache_Num_Elements = 0x10000; static constexpr u32 iCache_Mask = iCache_Num_Elements - 1; From 113d6b3b8469143f008f4459d035337ff8b4a5ce Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 16 Jan 2017 23:43:43 +0100 Subject: [PATCH 4/8] JitCache: Use a multimap for block_map and start_block_map. We may have duplicated entries here because of MSR mismatch. Just store both and validate the matching one on cache access. --- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 42 +++++++++---------- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 6 +-- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index f8dabcef69..309f94993e 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -9,6 +9,7 @@ // performance hit, it's not enabled by default, but it's useful for // locating performance issues. +#include #include #include #include @@ -125,18 +126,7 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr) { - if (start_block_map.count(block.physicalAddress)) - { - // We already have a block at this address; invalidate the old block. - // This should be very rare. This will only happen if the same block - // is called both with DR/IR enabled or disabled. - WARN_LOG(DYNA_REC, "Invalidating compiled block at same address %08x", block.physicalAddress); - JitBlock& old_b = *start_block_map[block.physicalAddress]; - block_map.erase( - std::make_pair(old_b.physicalAddress + 4 * old_b.originalSize - 1, old_b.physicalAddress)); - DestroyBlock(old_b, true); - } - start_block_map[block.physicalAddress] = █ + start_block_map.emplace(block.physicalAddress, &block); size_t icache = FastLookupEntryForAddress(block.effectiveAddress); iCache[icache] = █ block.in_icache = icache; @@ -146,7 +136,7 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8 for (u32 addr = pAddr / 32; addr <= (pAddr + (block.originalSize - 1) * 4) / 32; ++addr) valid_block.Set(addr); - block_map[std::make_pair(pAddr + 4 * block.originalSize - 1, pAddr)] = █ + block_map.emplace(std::make_pair(pAddr + 4 * block.originalSize - 1, pAddr), &block); if (block_link) { @@ -174,15 +164,15 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr) translated_addr = translated.address; } - auto map_result = start_block_map.find(translated_addr); - if (map_result == start_block_map.end()) - return nullptr; + auto iter = start_block_map.equal_range(translated_addr); + for (; iter.first != iter.second; iter.first++) + { + JitBlock& b = *iter.first->second; + if (!b.invalid && b.effectiveAddress == addr && b.msrBits == (msr & JIT_CACHE_MSR_MASK)) + return &b; + } - JitBlock* b = map_result->second; - if (b->invalid || b->effectiveAddress != addr || - b->msrBits != (msr & JIT_CACHE_MSR_MASK)) - return nullptr; - return b; + return nullptr; } const u8* JitBaseBlockCache::Dispatch() @@ -321,10 +311,18 @@ void JitBaseBlockCache::DestroyBlock(JitBlock& block, bool invalidate) return; } block.invalid = true; - start_block_map.erase(block.physicalAddress); if (iCache[block.in_icache] == &block) iCache[block.in_icache] = nullptr; + auto iter = start_block_map.equal_range(block.physicalAddress); + while (iter.first != iter.second) + { + if (iter.first->second == &block) + iter.first = start_block_map.erase(iter.first); + else + iter.first++; + } + UnlinkBlock(block); // Delete linking addresses diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index d9f3e9944d..0fd6b84e85 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -177,12 +177,12 @@ private: // Map indexed by the physical memory location. // It is used to invalidate blocks based on memory location. - std::map, JitBlock*> block_map; // (end_addr, start_addr) -> block + std::multimap, JitBlock*> block_map; // (end_addr, start_addr) -> block // Map indexed by the physical address of the entry point. // This is used to query the block based on the current PC in a slow way. - // TODO: This is redundant with block_map, and both should be a multimap. - std::map start_block_map; // start_addr -> block + // TODO: This is redundant with block_map. + std::multimap start_block_map; // start_addr -> block // This bitsets shows which cachelines overlap with any blocks. // It is used to provide a fast way to query if no icache invalidation is needed. From 9b77a3976724d4afd94c7c21ab2c782d739a98a8 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 16 Jan 2017 22:55:21 +0100 Subject: [PATCH 5/8] JitCache: Only call DestroyBlock on valid blocks. --- Source/Core/Core/PowerPC/JitCommon/JitCache.cpp | 10 +++++----- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 309f94993e..12bbd5ab21 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -72,7 +72,8 @@ void JitBaseBlockCache::Clear() m_jit.js.pairedQuantizeAddresses.clear(); for (int i = 1; i < num_blocks; i++) { - DestroyBlock(blocks[i], false); + if (!block.invalid) + DestroyBlock(blocks[i]); } links_to.clear(); block_map.clear(); @@ -213,7 +214,7 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for auto it = block_map.lower_bound(std::make_pair(pAddr, 0)); while (it != block_map.end() && it->first.second < pAddr + length) { - DestroyBlock(*it->second, true); + DestroyBlock(*it->second); it = block_map.erase(it); } @@ -302,12 +303,11 @@ void JitBaseBlockCache::UnlinkBlock(const JitBlock& block) } } -void JitBaseBlockCache::DestroyBlock(JitBlock& block, bool invalidate) +void JitBaseBlockCache::DestroyBlock(JitBlock& block) { if (block.invalid) { - if (invalidate) - PanicAlert("Invalidating invalid block %p", &block); + PanicAlert("Invalidating invalid block %p", &block); return; } block.invalid = true; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 0fd6b84e85..341d7c8d6d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -159,7 +159,7 @@ private: void LinkBlockExits(JitBlock& block); void LinkBlock(JitBlock& block); void UnlinkBlock(const JitBlock& block); - void DestroyBlock(JitBlock& block, bool invalidate); + void DestroyBlock(JitBlock& block); void MoveBlockIntoFastCache(u32 em_address, u32 msr); From 43cdbab5154d7a95aafa11949dd159dc5aa1aad9 Mon Sep 17 00:00:00 2001 From: degasus Date: Tue, 17 Jan 2017 22:03:51 +0100 Subject: [PATCH 6/8] Jit64: Add a switch for easier fallback to the C++ dispatcher. --- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 100 ++++++++++++---------- 1 file changed, 53 insertions(+), 47 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 310c85089f..ffff485d95 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -92,54 +92,53 @@ void Jit64AsmRoutineManager::Generate() dispatcherNoCheck = GetCodePtr(); - // Switch to the correct memory base, in case MSR.DR has changed. - // TODO: Is there a more efficient place to put this? We don't - // need to do this for indirect jumps, just exceptions etc. - TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27))); - FixupBranch physmem = J_CC(CC_NZ); - MOV(64, R(RMEM), ImmPtr(Memory::physical_base)); - FixupBranch membaseend = J(); - SetJumpTarget(physmem); - MOV(64, R(RMEM), ImmPtr(Memory::logical_base)); - SetJumpTarget(membaseend); - // The following is a translation of JitBaseBlockCache::Dispatch into assembly. - - // Fast block number lookup. - // ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2 - MOV(32, R(RSCRATCH), PPCSTATE(pc)); - u64 icache = reinterpret_cast(g_jit->GetBlockCache()->GetICache()); - AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::iCache_Mask << 2)); - if (icache <= INT_MAX) + const bool assembly_dispatcher = true; + if (assembly_dispatcher) { - MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_2, static_cast(icache))); + // Fast block number lookup. + // ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2 + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + u64 icache = reinterpret_cast(g_jit->GetBlockCache()->GetICache()); + AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::iCache_Mask << 2)); + if (icache <= INT_MAX) + { + MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_2, static_cast(icache))); + } + else + { + MOV(64, R(RSCRATCH2), Imm64(icache)); + MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0)); + } + + // Check if we found a block. + TEST(64, R(RSCRATCH), R(RSCRATCH)); + FixupBranch not_found = J_CC(CC_Z); + + // Check both block.effectiveAddress and block.msrBits. + MOV(32, R(RSCRATCH2), PPCSTATE(msr)); + AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK)); + SHL(64, R(RSCRATCH2), Imm8(32)); + MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc)); + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); + CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast(offsetof(JitBlock, effectiveAddress)))); + FixupBranch state_mismatch = J_CC(CC_NE); + + // Success; branch to the block we found. + // Switch to the correct memory base, in case MSR.DR has changed. + TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27))); + FixupBranch physmem = J_CC(CC_Z); + MOV(64, R(RMEM), ImmPtr(Memory::logical_base)); + JMPptr(MDisp(RSCRATCH, static_cast(offsetof(JitBlock, normalEntry)))); + SetJumpTarget(physmem); + MOV(64, R(RMEM), ImmPtr(Memory::physical_base)); + JMPptr(MDisp(RSCRATCH, static_cast(offsetof(JitBlock, normalEntry)))); + + SetJumpTarget(not_found); + SetJumpTarget(state_mismatch); + + // Failure, fallback to the C++ dispatcher for calling the JIT. } - else - { - MOV(64, R(RSCRATCH2), Imm64(icache)); - MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0)); - } - - // Check if we found a block. - TEST(64, R(RSCRATCH), R(RSCRATCH)); - FixupBranch not_found = J_CC(CC_Z); - - // Check both block.effectiveAddress and block.msrBits. - MOV(32, R(RSCRATCH2), PPCSTATE(msr)); - AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK)); - SHL(64, R(RSCRATCH2), Imm8(32)); - MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc)); - OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); - CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast(offsetof(JitBlock, effectiveAddress)))); - FixupBranch state_mismatch = J_CC(CC_NE); - // Success; branch to the block we found. - JMPptr(MDisp(RSCRATCH, static_cast(offsetof(JitBlock, normalEntry)))); - SetJumpTarget(not_found); - SetJumpTarget(state_mismatch); - - // Failure; call into the block cache to update the state, then try again. - // (We need to loop because Jit() might not actually generate a block - // if we hit an ISI.) // We reset the stack because Jit might clear the code cache. // Also if we are in the middle of disabling BLR optimization on windows @@ -151,8 +150,15 @@ void Jit64AsmRoutineManager::Generate() ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunction(JitBase::Dispatch); ABI_PopRegistersAndAdjustStack({}, 0); - // JMPptr(R(ABI_RETURN)); - JMP(dispatcherNoCheck, true); + + // Switch to the correct memory base, in case MSR.DR has changed. + TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27))); + FixupBranch physmem = J_CC(CC_Z); + MOV(64, R(RMEM), ImmPtr(Memory::logical_base)); + JMPptr(R(ABI_RETURN)); + SetJumpTarget(physmem); + MOV(64, R(RMEM), ImmPtr(Memory::physical_base)); + JMPptr(R(ABI_RETURN)); SetJumpTarget(bail); doTiming = GetCodePtr(); From 830ae6a2c181c9fadb0dec1cd7bbb17bafb99ad9 Mon Sep 17 00:00:00 2001 From: degasus Date: Wed, 11 Jan 2017 22:41:30 +0100 Subject: [PATCH 7/8] JitCache: Store the JitBlock in the std::map. --- .../CachedInterpreter/CachedInterpreter.cpp | 2 +- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 2 +- Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp | 2 +- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 3 +- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 77 +++++++------------ Source/Core/Core/PowerPC/JitCommon/JitCache.h | 16 +--- 6 files changed, 34 insertions(+), 68 deletions(-) diff --git a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp index 39006acf01..1e41dc63c2 100644 --- a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp +++ b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp @@ -123,7 +123,7 @@ static bool CheckDSI(u32 data) void CachedInterpreter::Jit(u32 address) { - if (m_code.size() >= CODE_SIZE / sizeof(Instruction) - 0x1000 || m_block_cache.IsFull() || + if (m_code.size() >= CODE_SIZE / sizeof(Instruction) - 0x1000 || SConfig::GetInstance().bJITNoBlockCache) { ClearCache(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 580bab2577..f7b75223e2 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -543,7 +543,7 @@ void Jit64::Jit(u32 em_address) } if (IsAlmostFull() || m_far_code.IsAlmostFull() || trampolines.IsAlmostFull() || - blocks.IsFull() || SConfig::GetInstance().bJITNoBlockCache) + SConfig::GetInstance().bJITNoBlockCache) { ClearCache(); } diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index 85c8b12cc0..dac66a2e4b 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -466,7 +466,7 @@ void JitIL::Trace() void JitIL::Jit(u32 em_address) { if (IsAlmostFull() || m_far_code.IsAlmostFull() || trampolines.IsAlmostFull() || - blocks.IsFull() || SConfig::GetInstance().bJITNoBlockCache) + SConfig::GetInstance().bJITNoBlockCache) { ClearCache(); } diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index a424166574..c0db4b4e76 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -368,8 +368,7 @@ void JitArm64::SingleStep() void JitArm64::Jit(u32) { - if (IsAlmostFull() || farcode.IsAlmostFull() || blocks.IsFull() || - SConfig::GetInstance().bJITNoBlockCache) + if (IsAlmostFull() || farcode.IsAlmostFull() || SConfig::GetInstance().bJITNoBlockCache) { ClearCache(); } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 12bbd5ab21..134b3ebca4 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -47,14 +47,11 @@ void JitBaseBlockCache::Init() s_clear_jit_cache_thread_safe = CoreTiming::RegisterEvent("clearJitCache", ClearCacheThreadSafe); JitRegister::Init(SConfig::GetInstance().m_perfDir); - iCache.fill(nullptr); Clear(); } void JitBaseBlockCache::Shutdown() { - num_blocks = 1; - JitRegister::Shutdown(); } @@ -63,26 +60,21 @@ void JitBaseBlockCache::Shutdown() void JitBaseBlockCache::Clear() { #if defined(_DEBUG) || defined(DEBUGFAST) - if (IsFull()) - Core::DisplayMessage("Clearing block cache.", 3000); - else - Core::DisplayMessage("Clearing code cache.", 3000); + Core::DisplayMessage("Clearing code cache.", 3000); #endif m_jit.js.fifoWriteAddresses.clear(); m_jit.js.pairedQuantizeAddresses.clear(); - for (int i = 1; i < num_blocks; i++) + for (auto& e : start_block_map) { - if (!block.invalid) - DestroyBlock(blocks[i]); + DestroyBlock(e.second); } + start_block_map.clear(); links_to.clear(); block_map.clear(); valid_block.ClearAll(); - num_blocks = 1; - blocks[0].msrBits = 0xFFFFFFFF; - blocks[0].invalid = true; + iCache.fill(nullptr); } void JitBaseBlockCache::Reset() @@ -96,11 +88,6 @@ void JitBaseBlockCache::SchedulateClearCacheThreadSafe() CoreTiming::ScheduleEvent(0, s_clear_jit_cache_thread_safe, 0, CoreTiming::FromThread::NON_CPU); } -bool JitBaseBlockCache::IsFull() const -{ - return num_blocks >= MAX_NUM_BLOCKS - 1; -} - JitBlock** JitBaseBlockCache::GetICache() { return iCache.data(); @@ -108,26 +95,36 @@ JitBlock** JitBaseBlockCache::GetICache() void JitBaseBlockCache::RunOnBlocks(std::function f) { - for (int i = 0; i < num_blocks; i++) - f(blocks[i]); + for (const auto& e : start_block_map) + f(e.second); } JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) { - JitBlock& b = blocks[num_blocks]; - b.invalid = false; + u32 physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address; + JitBlock& b = start_block_map.emplace(physicalAddress, JitBlock())->second; b.effectiveAddress = em_address; - b.physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address; + b.physicalAddress = physicalAddress; b.msrBits = MSR & JIT_CACHE_MSR_MASK; b.linkData.clear(); b.in_icache = 0; - num_blocks++; // commit the current block return &b; } +void JitBaseBlockCache::FreeBlock(JitBlock* block) +{ + auto iter = start_block_map.equal_range(block->physicalAddress); + while (iter.first != iter.second) + { + if (&iter.first->second == block) + start_block_map.erase(iter.first); + else + iter.first++; + } +} + void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr) { - start_block_map.emplace(block.physicalAddress, &block); size_t icache = FastLookupEntryForAddress(block.effectiveAddress); iCache[icache] = █ block.in_icache = icache; @@ -168,8 +165,8 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr) auto iter = start_block_map.equal_range(translated_addr); for (; iter.first != iter.second; iter.first++) { - JitBlock& b = *iter.first->second; - if (!b.invalid && b.effectiveAddress == addr && b.msrBits == (msr & JIT_CACHE_MSR_MASK)) + JitBlock& b = iter.first->second; + if (b.effectiveAddress == addr && b.msrBits == (msr & JIT_CACHE_MSR_MASK)) return &b; } @@ -214,7 +211,9 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for auto it = block_map.lower_bound(std::make_pair(pAddr, 0)); while (it != block_map.end() && it->first.second < pAddr + length) { - DestroyBlock(*it->second); + JitBlock* block = it->second; + DestroyBlock(*block); + FreeBlock(block); it = block_map.erase(it); } @@ -250,17 +249,12 @@ void JitBaseBlockCache::WriteDestroyBlock(const JitBlock& block) void JitBaseBlockCache::LinkBlockExits(JitBlock& block) { - if (block.invalid) - { - // This block is dead. Don't relink it. - return; - } for (auto& e : block.linkData) { if (!e.linkStatus) { JitBlock* destinationBlock = GetBlockFromStartAddress(e.exitAddress, block.msrBits); - if (destinationBlock && !destinationBlock->invalid) + if (destinationBlock) { WriteLinkBlock(e, destinationBlock); e.linkStatus = true; @@ -305,24 +299,9 @@ void JitBaseBlockCache::UnlinkBlock(const JitBlock& block) void JitBaseBlockCache::DestroyBlock(JitBlock& block) { - if (block.invalid) - { - PanicAlert("Invalidating invalid block %p", &block); - return; - } - block.invalid = true; if (iCache[block.in_icache] == &block) iCache[block.in_icache] = nullptr; - auto iter = start_block_map.equal_range(block.physicalAddress); - while (iter.first != iter.second) - { - if (iter.first->second == &block) - iter.first = start_block_map.erase(iter.first); - else - iter.first++; - } - UnlinkBlock(block); // Delete linking addresses diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 341d7c8d6d..0f1c36efef 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -47,11 +47,6 @@ struct JitBlock u32 originalSize; int runCount; // for profiling. - // Whether this struct refers to a valid block. This is mostly useful as - // a debugging aid. - // FIXME: Change current users of invalid bit to assertions? - bool invalid; - // Information about exits to a known address from this block. // This is used to implement block linking. struct LinkData @@ -112,7 +107,6 @@ public: // is valid (MSR.IR and MSR.DR, the address translation bits). static constexpr u32 JIT_CACHE_MSR_MASK = 0x30; - static constexpr int MAX_NUM_BLOCKS = 65536 * 2; static constexpr u32 iCache_Num_Elements = 0x10000; static constexpr u32 iCache_Mask = iCache_Num_Elements - 1; @@ -125,13 +119,12 @@ public: void Reset(); void SchedulateClearCacheThreadSafe(); - bool IsFull() const; - // Code Cache JitBlock** GetICache(); void RunOnBlocks(std::function f); JitBlock* AllocateBlock(u32 em_address); + void FreeBlock(JitBlock* block); void FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr); // Look for the block in the slow but accurate way. @@ -166,11 +159,6 @@ private: // Fast but risky block lookup based on iCache. size_t FastLookupEntryForAddress(u32 address); - // We store the metadata of all blocks in a linear way within this array. - // Note: blocks[0] must not be used as it is referenced as invalid block in iCache. - std::array blocks; // number -> JitBlock - int num_blocks = 1; - // links_to hold all exit points of all valid blocks in a reverse way. // It is used to query all blocks which links to an address. std::multimap links_to; // destination_PC -> number @@ -182,7 +170,7 @@ private: // Map indexed by the physical address of the entry point. // This is used to query the block based on the current PC in a slow way. // TODO: This is redundant with block_map. - std::multimap start_block_map; // start_addr -> block + std::multimap start_block_map; // start_addr -> block // This bitsets shows which cachelines overlap with any blocks. // It is used to provide a fast way to query if no icache invalidation is needed. From 8e00c411a3b5fc57391ce505d36eb905b787c031 Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 22 Jan 2017 16:57:46 +0100 Subject: [PATCH 8/8] JitCache::Rename iCache to fast_block_map. iCache sounds too much like emulation. --- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 4 +- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 4 +- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 38 +++++++++---------- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 20 +++++----- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index ffff485d95..81c02aadd0 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -99,8 +99,8 @@ void Jit64AsmRoutineManager::Generate() // Fast block number lookup. // ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2 MOV(32, R(RSCRATCH), PPCSTATE(pc)); - u64 icache = reinterpret_cast(g_jit->GetBlockCache()->GetICache()); - AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::iCache_Mask << 2)); + u64 icache = reinterpret_cast(g_jit->GetBlockCache()->GetFastBlockMap()); + AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::FAST_BLOCK_MAP_MASK << 2)); if (icache <= INT_MAX) { MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_2, static_cast(icache))); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 7f323ebe6d..3d2b236739 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -74,9 +74,9 @@ void JitArm64::GenerateAsm() ARM64Reg pc_masked = W25; ARM64Reg cache_base = X27; ARM64Reg block = X30; - ORRI2R(pc_masked, WZR, JitBaseBlockCache::iCache_Mask << 3); + ORRI2R(pc_masked, WZR, JitBaseBlockCache::FAST_BLOCK_MAP_MASK << 3); AND(pc_masked, pc_masked, DISPATCHER_PC, ArithOption(DISPATCHER_PC, ST_LSL, 1)); - MOVP2R(cache_base, g_jit->GetBlockCache()->GetICache()); + MOVP2R(cache_base, g_jit->GetBlockCache()->GetFastBlockMap()); LDR(block, cache_base, EncodeRegTo64(pc_masked)); FixupBranch not_found = CBZ(block); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 134b3ebca4..f5853ee071 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -74,7 +74,7 @@ void JitBaseBlockCache::Clear() valid_block.ClearAll(); - iCache.fill(nullptr); + fast_block_map.fill(nullptr); } void JitBaseBlockCache::Reset() @@ -88,9 +88,9 @@ void JitBaseBlockCache::SchedulateClearCacheThreadSafe() CoreTiming::ScheduleEvent(0, s_clear_jit_cache_thread_safe, 0, CoreTiming::FromThread::NON_CPU); } -JitBlock** JitBaseBlockCache::GetICache() +JitBlock** JitBaseBlockCache::GetFastBlockMap() { - return iCache.data(); + return fast_block_map.data(); } void JitBaseBlockCache::RunOnBlocks(std::function f) @@ -107,7 +107,7 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) b.physicalAddress = physicalAddress; b.msrBits = MSR & JIT_CACHE_MSR_MASK; b.linkData.clear(); - b.in_icache = 0; + b.fast_block_map_index = 0; return &b; } @@ -125,9 +125,9 @@ void JitBaseBlockCache::FreeBlock(JitBlock* block) void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr) { - size_t icache = FastLookupEntryForAddress(block.effectiveAddress); - iCache[icache] = █ - block.in_icache = icache; + size_t index = FastLookupIndexForAddress(block.effectiveAddress); + fast_block_map[index] = █ + block.fast_block_map_index = index; u32 pAddr = block.physicalAddress; @@ -175,12 +175,12 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr) const u8* JitBaseBlockCache::Dispatch() { - JitBlock* block = iCache[FastLookupEntryForAddress(PC)]; + JitBlock* block = fast_block_map[FastLookupIndexForAddress(PC)]; while (!block || block->effectiveAddress != PC || block->msrBits != (MSR & JIT_CACHE_MSR_MASK)) { MoveBlockIntoFastCache(PC, MSR & JIT_CACHE_MSR_MASK); - block = iCache[FastLookupEntryForAddress(PC)]; + block = fast_block_map[FastLookupIndexForAddress(PC)]; } return block->normalEntry; @@ -299,8 +299,8 @@ void JitBaseBlockCache::UnlinkBlock(const JitBlock& block) void JitBaseBlockCache::DestroyBlock(JitBlock& block) { - if (iCache[block.in_icache] == &block) - iCache[block.in_icache] = nullptr; + if (fast_block_map[block.fast_block_map_index] == &block) + fast_block_map[block.fast_block_map_index] = nullptr; UnlinkBlock(block); @@ -330,19 +330,19 @@ void JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr) } else { - // Drop old icache entry - if (iCache[block->in_icache] == block) - iCache[block->in_icache] = nullptr; + // Drop old fast block map entry + if (fast_block_map[block->fast_block_map_index] == block) + fast_block_map[block->fast_block_map_index] = nullptr; // And create a new one - size_t icache = FastLookupEntryForAddress(addr); - iCache[icache] = block; - block->in_icache = icache; + size_t index = FastLookupIndexForAddress(addr); + fast_block_map[index] = block; + block->fast_block_map_index = index; LinkBlock(*block); } } -size_t JitBaseBlockCache::FastLookupEntryForAddress(u32 address) +size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address) { - return (address >> 2) & iCache_Mask; + return (address >> 2) & FAST_BLOCK_MAP_MASK; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 0f1c36efef..c5aaa8d1e8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -63,9 +63,9 @@ struct JitBlock u64 ticStop; // for profiling - time. u64 ticCounter; // for profiling - time. - // This tracks the position if this block within the icache. - // We allow each block to have one icache entry. - size_t in_icache; + // This tracks the position if this block within the fast block cache. + // We allow each block to have only one map entry. + size_t fast_block_map_index; }; typedef void (*CompiledCode)(); @@ -107,8 +107,8 @@ public: // is valid (MSR.IR and MSR.DR, the address translation bits). static constexpr u32 JIT_CACHE_MSR_MASK = 0x30; - static constexpr u32 iCache_Num_Elements = 0x10000; - static constexpr u32 iCache_Mask = iCache_Num_Elements - 1; + static constexpr u32 FAST_BLOCK_MAP_ELEMENTS = 0x10000; + static constexpr u32 FAST_BLOCK_MAP_MASK = FAST_BLOCK_MAP_ELEMENTS - 1; explicit JitBaseBlockCache(JitBase& jit); virtual ~JitBaseBlockCache(); @@ -120,7 +120,7 @@ public: void SchedulateClearCacheThreadSafe(); // Code Cache - JitBlock** GetICache(); + JitBlock** GetFastBlockMap(); void RunOnBlocks(std::function f); JitBlock* AllocateBlock(u32 em_address); @@ -128,7 +128,7 @@ public: void FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr); // Look for the block in the slow but accurate way. - // This function shall be used if FastLookupEntryForAddress() failed. + // This function shall be used if FastLookupIndexForAddress() failed. // This might return nullptr if there is no such block. JitBlock* GetBlockFromStartAddress(u32 em_address, u32 msr); @@ -156,8 +156,8 @@ private: void MoveBlockIntoFastCache(u32 em_address, u32 msr); - // Fast but risky block lookup based on iCache. - size_t FastLookupEntryForAddress(u32 address); + // Fast but risky block lookup based on fast_block_map. + size_t FastLookupIndexForAddress(u32 address); // links_to hold all exit points of all valid blocks in a reverse way. // It is used to query all blocks which links to an address. @@ -178,5 +178,5 @@ private: // This array is indexed with the masked PC and likely holds the correct block id. // This is used as a fast cache of start_block_map used in the assembly dispatcher. - std::array iCache; // start_addr & mask -> number + std::array fast_block_map; // start_addr & mask -> number };