From f3ed993747f45ea2fbfa59bf72bc67cad22a2f76 Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 22 Jan 2017 12:58:57 +0100 Subject: [PATCH] JitCache: Use a map with macro blocks for the occupied memory regions. This also allow fast invalidation, without any restritions on the blocks itself. So we can now implement inlining. --- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 51 +++++++++++++++---- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 7 +++ 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 1a8d1c6df9..a3360a6dca 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -79,6 +79,7 @@ void JitBaseBlockCache::Clear() } block_map.clear(); links_to.clear(); + block_range_map.clear(); valid_block.ClearAll(); @@ -125,11 +126,16 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8 fast_block_map[index] = █ block.fast_block_map_index = index; - u32 pAddr = block.physicalAddress; + u32 block_start = block.physicalAddress; + u32 block_end = block_start + (block.originalSize - 1) * 4; - for (u32 addr = pAddr / 32; addr <= (pAddr + (block.originalSize - 1) * 4) / 32; ++addr) + for (u32 addr = block_start / 32; addr <= block_end / 32; ++addr) valid_block.Set(addr); + u32 mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1); + for (u32 addr = block_start & mask; addr <= (block_end & mask); addr += BLOCK_RANGE_MAP_ELEMENTS) + block_range_map[addr].insert(&block); + if (block_link) { for (const auto& e : block.linkData) @@ -200,18 +206,43 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for // destroy JIT blocks if (destroy_block) { - auto iter = block_map.begin(); - while (iter != block_map.end()) + // Iterate over all macro blocks which overlap the given range. + u32 mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1); + auto start = block_range_map.lower_bound(pAddr & mask); + auto end = block_range_map.lower_bound(pAddr + length); + while (start != end) { - if (iter->second.Overlap(pAddr, length)) + // Iterate over all blocks in the macro block. + auto iter = start->second.begin(); + while (iter != start->second.end()) { - DestroyBlock(iter->second); - iter = block_map.erase(iter); + JitBlock* block = *iter; + if (block->Overlap(pAddr, length)) + { + // If the block overlaps, also remove all other occupied slots in the other macro blocks. + // This will leak empty macro blocks, but they may be reused or cleared later on. + u32 block_start = block->physicalAddress; + u32 block_end = block_start + (block->originalSize - 1) * 4; + for (u32 addr = block_start & mask; addr <= (block_end & mask); addr += BLOCK_RANGE_MAP_ELEMENTS) + if (addr != start->first) + block_range_map[addr].erase(block); + + // And remove the block. + DestroyBlock(*block); + block_map.erase(block->physicalAddress); + iter = start->second.erase(iter); + } + else + { + iter++; + } } + + // If the macro block is empty, drop it. + if (start->second.empty()) + start = block_range_map.erase(start); else - { - iter++; - } + start++; } // If the code was actually modified, we need to clear the relevant entries from the diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 28111437fb..1baab02444 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "Common/CommonTypes.h" @@ -168,6 +169,12 @@ private: // This is used to query the block based on the current PC in a slow way. std::multimap block_map; // start_addr -> block + // Range of overlapping code indexed by a masked physical address. + // This is used for invalidation of memory regions. The range is grouped + // in macro blocks of each 0x100 bytes. + static constexpr u32 BLOCK_RANGE_MAP_ELEMENTS = 0x100; + std::map> block_range_map; + // This bitsets shows which cachelines overlap with any blocks. // It is used to provide a fast way to query if no icache invalidation is needed. ValidBlockBitSet valid_block;