JitCache: Use a map with macro blocks for the occupied memory regions.

This also allow fast invalidation, without any restritions on the blocks itself.
So we can now implement inlining.
This commit is contained in:
degasus 2017-01-22 12:58:57 +01:00
parent dc0fbc15f0
commit f3ed993747
2 changed files with 48 additions and 10 deletions

View File

@ -79,6 +79,7 @@ void JitBaseBlockCache::Clear()
} }
block_map.clear(); block_map.clear();
links_to.clear(); links_to.clear();
block_range_map.clear();
valid_block.ClearAll(); valid_block.ClearAll();
@ -125,11 +126,16 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8
fast_block_map[index] = █ fast_block_map[index] = █
block.fast_block_map_index = index; block.fast_block_map_index = index;
u32 pAddr = block.physicalAddress; u32 block_start = block.physicalAddress;
u32 block_end = block_start + (block.originalSize - 1) * 4;
for (u32 addr = pAddr / 32; addr <= (pAddr + (block.originalSize - 1) * 4) / 32; ++addr) for (u32 addr = block_start / 32; addr <= block_end / 32; ++addr)
valid_block.Set(addr); valid_block.Set(addr);
u32 mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1);
for (u32 addr = block_start & mask; addr <= (block_end & mask); addr += BLOCK_RANGE_MAP_ELEMENTS)
block_range_map[addr].insert(&block);
if (block_link) if (block_link)
{ {
for (const auto& e : block.linkData) for (const auto& e : block.linkData)
@ -200,18 +206,43 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for
// destroy JIT blocks // destroy JIT blocks
if (destroy_block) if (destroy_block)
{ {
auto iter = block_map.begin(); // Iterate over all macro blocks which overlap the given range.
while (iter != block_map.end()) u32 mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1);
auto start = block_range_map.lower_bound(pAddr & mask);
auto end = block_range_map.lower_bound(pAddr + length);
while (start != end)
{ {
if (iter->second.Overlap(pAddr, length)) // Iterate over all blocks in the macro block.
auto iter = start->second.begin();
while (iter != start->second.end())
{ {
DestroyBlock(iter->second); JitBlock* block = *iter;
iter = block_map.erase(iter); if (block->Overlap(pAddr, length))
{
// If the block overlaps, also remove all other occupied slots in the other macro blocks.
// This will leak empty macro blocks, but they may be reused or cleared later on.
u32 block_start = block->physicalAddress;
u32 block_end = block_start + (block->originalSize - 1) * 4;
for (u32 addr = block_start & mask; addr <= (block_end & mask); addr += BLOCK_RANGE_MAP_ELEMENTS)
if (addr != start->first)
block_range_map[addr].erase(block);
// And remove the block.
DestroyBlock(*block);
block_map.erase(block->physicalAddress);
iter = start->second.erase(iter);
}
else
{
iter++;
}
} }
// If the macro block is empty, drop it.
if (start->second.empty())
start = block_range_map.erase(start);
else else
{ start++;
iter++;
}
} }
// If the code was actually modified, we need to clear the relevant entries from the // If the code was actually modified, we need to clear the relevant entries from the

View File

@ -9,6 +9,7 @@
#include <functional> #include <functional>
#include <map> #include <map>
#include <memory> #include <memory>
#include <set>
#include <vector> #include <vector>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
@ -168,6 +169,12 @@ private:
// This is used to query the block based on the current PC in a slow way. // This is used to query the block based on the current PC in a slow way.
std::multimap<u32, JitBlock> block_map; // start_addr -> block std::multimap<u32, JitBlock> block_map; // start_addr -> block
// Range of overlapping code indexed by a masked physical address.
// This is used for invalidation of memory regions. The range is grouped
// in macro blocks of each 0x100 bytes.
static constexpr u32 BLOCK_RANGE_MAP_ELEMENTS = 0x100;
std::map<u32, std::set<JitBlock*>> block_range_map;
// This bitsets shows which cachelines overlap with any blocks. // This bitsets shows which cachelines overlap with any blocks.
// It is used to provide a fast way to query if no icache invalidation is needed. // It is used to provide a fast way to query if no icache invalidation is needed.
ValidBlockBitSet valid_block; ValidBlockBitSet valid_block;