Merge pull request #4716 from degasus/jitcache
JitCache: Use a container for overlapping blocks.
This commit is contained in:
commit
8d0ce8ea47
|
@ -211,7 +211,7 @@ void CachedInterpreter::Jit(u32 address)
|
|||
b->codeSize = (u32)(GetCodePtr() - b->checkedEntry);
|
||||
b->originalSize = code_block.m_num_instructions;
|
||||
|
||||
m_block_cache.FinalizeBlock(*b, jo.enableBlocklink, b->checkedEntry);
|
||||
m_block_cache.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
|
||||
}
|
||||
|
||||
void CachedInterpreter::ClearCache()
|
||||
|
|
|
@ -590,7 +590,8 @@ void Jit64::Jit(u32 em_address)
|
|||
}
|
||||
|
||||
JitBlock* b = blocks.AllocateBlock(em_address);
|
||||
blocks.FinalizeBlock(*b, jo.enableBlocklink, DoJit(em_address, &code_buffer, b, nextPC));
|
||||
DoJit(em_address, &code_buffer, b, nextPC);
|
||||
blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
|
||||
}
|
||||
|
||||
const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* b, u32 nextPC)
|
||||
|
|
|
@ -508,7 +508,8 @@ void JitIL::Jit(u32 em_address)
|
|||
}
|
||||
|
||||
JitBlock* b = blocks.AllocateBlock(em_address);
|
||||
blocks.FinalizeBlock(*b, jo.enableBlocklink, DoJit(em_address, &code_buffer, b, nextPC));
|
||||
DoJit(em_address, &code_buffer, b, nextPC);
|
||||
blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
|
||||
}
|
||||
|
||||
const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* b, u32 nextPC)
|
||||
|
|
|
@ -399,7 +399,7 @@ void JitArm64::Jit(u32)
|
|||
|
||||
JitBlock* b = blocks.AllocateBlock(em_address);
|
||||
const u8* BlockPtr = DoJit(em_address, &code_buffer, b, nextPC);
|
||||
blocks.FinalizeBlock(*b, jo.enableBlocklink, BlockPtr);
|
||||
blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
|
||||
}
|
||||
|
||||
const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* b, u32 nextPC)
|
||||
|
|
|
@ -36,6 +36,12 @@ static void ClearCacheThreadSafe(u64 userdata, s64 cyclesdata)
|
|||
JitInterface::ClearCache();
|
||||
}
|
||||
|
||||
bool JitBlock::OverlapsPhysicalRange(u32 address, u32 length) const
|
||||
{
|
||||
return physical_addresses.lower_bound(address) !=
|
||||
physical_addresses.lower_bound(address + length);
|
||||
}
|
||||
|
||||
JitBaseBlockCache::JitBaseBlockCache(JitBase& jit) : m_jit{jit}
|
||||
{
|
||||
}
|
||||
|
@ -64,13 +70,13 @@ void JitBaseBlockCache::Clear()
|
|||
#endif
|
||||
m_jit.js.fifoWriteAddresses.clear();
|
||||
m_jit.js.pairedQuantizeAddresses.clear();
|
||||
for (auto& e : start_block_map)
|
||||
for (auto& e : block_map)
|
||||
{
|
||||
DestroyBlock(e.second);
|
||||
}
|
||||
start_block_map.clear();
|
||||
links_to.clear();
|
||||
block_map.clear();
|
||||
links_to.clear();
|
||||
block_range_map.clear();
|
||||
|
||||
valid_block.ClearAll();
|
||||
|
||||
|
@ -95,14 +101,14 @@ JitBlock** JitBaseBlockCache::GetFastBlockMap()
|
|||
|
||||
void JitBaseBlockCache::RunOnBlocks(std::function<void(const JitBlock&)> f)
|
||||
{
|
||||
for (const auto& e : start_block_map)
|
||||
for (const auto& e : block_map)
|
||||
f(e.second);
|
||||
}
|
||||
|
||||
JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address)
|
||||
{
|
||||
u32 physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address;
|
||||
JitBlock& b = start_block_map.emplace(physicalAddress, JitBlock())->second;
|
||||
JitBlock& b = block_map.emplace(physicalAddress, JitBlock())->second;
|
||||
b.effectiveAddress = em_address;
|
||||
b.physicalAddress = physicalAddress;
|
||||
b.msrBits = MSR & JIT_CACHE_MSR_MASK;
|
||||
|
@ -111,30 +117,21 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address)
|
|||
return &b;
|
||||
}
|
||||
|
||||
void JitBaseBlockCache::FreeBlock(JitBlock* block)
|
||||
{
|
||||
auto iter = start_block_map.equal_range(block->physicalAddress);
|
||||
while (iter.first != iter.second)
|
||||
{
|
||||
if (&iter.first->second == block)
|
||||
iter.first = start_block_map.erase(iter.first);
|
||||
else
|
||||
iter.first++;
|
||||
}
|
||||
}
|
||||
|
||||
void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr)
|
||||
void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link,
|
||||
const std::set<u32>& physical_addresses)
|
||||
{
|
||||
size_t index = FastLookupIndexForAddress(block.effectiveAddress);
|
||||
fast_block_map[index] = █
|
||||
block.fast_block_map_index = index;
|
||||
|
||||
u32 pAddr = block.physicalAddress;
|
||||
block.physical_addresses = physical_addresses;
|
||||
|
||||
for (u32 addr = pAddr / 32; addr <= (pAddr + (block.originalSize - 1) * 4) / 32; ++addr)
|
||||
valid_block.Set(addr);
|
||||
|
||||
block_map.emplace(std::make_pair(pAddr + 4 * block.originalSize - 1, pAddr), &block);
|
||||
u32 range_mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1);
|
||||
for (u32 addr : physical_addresses)
|
||||
{
|
||||
valid_block.Set(addr / 32);
|
||||
block_range_map[addr & range_mask].insert(&block);
|
||||
}
|
||||
|
||||
if (block_link)
|
||||
{
|
||||
|
@ -162,7 +159,7 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr)
|
|||
translated_addr = translated.address;
|
||||
}
|
||||
|
||||
auto iter = start_block_map.equal_range(translated_addr);
|
||||
auto iter = block_map.equal_range(translated_addr);
|
||||
for (; iter.first != iter.second; iter.first++)
|
||||
{
|
||||
JitBlock& b = iter.first->second;
|
||||
|
@ -186,7 +183,7 @@ const u8* JitBaseBlockCache::Dispatch()
|
|||
return block->normalEntry;
|
||||
}
|
||||
|
||||
void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool forced)
|
||||
void JitBaseBlockCache::InvalidateICache(u32 address, u32 length, bool forced)
|
||||
{
|
||||
auto translated = PowerPC::JitCache_TranslateAddress(address);
|
||||
if (!translated.valid)
|
||||
|
@ -203,19 +200,10 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for
|
|||
valid_block.Clear(pAddr / 32);
|
||||
}
|
||||
|
||||
// destroy JIT blocks
|
||||
// !! this works correctly under assumption that any two overlapping blocks end at the same
|
||||
// address
|
||||
if (destroy_block)
|
||||
{
|
||||
auto it = block_map.lower_bound(std::make_pair(pAddr, 0));
|
||||
while (it != block_map.end() && it->first.second < pAddr + length)
|
||||
{
|
||||
JitBlock* block = it->second;
|
||||
DestroyBlock(*block);
|
||||
FreeBlock(block);
|
||||
it = block_map.erase(it);
|
||||
}
|
||||
// destroy JIT blocks
|
||||
ErasePhysicalRange(pAddr, length);
|
||||
|
||||
// If the code was actually modified, we need to clear the relevant entries from the
|
||||
// FIFO write address cache, so we don't end up with FIFO checks in places they shouldn't
|
||||
|
@ -232,6 +220,46 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for
|
|||
}
|
||||
}
|
||||
|
||||
void JitBaseBlockCache::ErasePhysicalRange(u32 address, u32 length)
|
||||
{
|
||||
// Iterate over all macro blocks which overlap the given range.
|
||||
u32 range_mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1);
|
||||
auto start = block_range_map.lower_bound(address & range_mask);
|
||||
auto end = block_range_map.lower_bound(address + length);
|
||||
while (start != end)
|
||||
{
|
||||
// Iterate over all blocks in the macro block.
|
||||
auto iter = start->second.begin();
|
||||
while (iter != start->second.end())
|
||||
{
|
||||
JitBlock* block = *iter;
|
||||
if (block->OverlapsPhysicalRange(address, length))
|
||||
{
|
||||
// If the block overlaps, also remove all other occupied slots in the other macro blocks.
|
||||
// This will leak empty macro blocks, but they may be reused or cleared later on.
|
||||
for (u32 addr : block->physical_addresses)
|
||||
if ((addr & range_mask) != start->first)
|
||||
block_range_map[addr & range_mask].erase(block);
|
||||
|
||||
// And remove the block.
|
||||
DestroyBlock(*block);
|
||||
block_map.erase(block->physicalAddress);
|
||||
iter = start->second.erase(iter);
|
||||
}
|
||||
else
|
||||
{
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
|
||||
// If the macro block is empty, drop it.
|
||||
if (start->second.empty())
|
||||
start = block_range_map.erase(start);
|
||||
else
|
||||
start++;
|
||||
}
|
||||
}
|
||||
|
||||
u32* JitBaseBlockCache::GetBlockBitSet() const
|
||||
{
|
||||
return valid_block.m_valid_block.get();
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
|
@ -24,6 +25,8 @@ class JitBase;
|
|||
// address.
|
||||
struct JitBlock
|
||||
{
|
||||
bool OverlapsPhysicalRange(u32 address, u32 length) const;
|
||||
|
||||
// A special entry point for block linking; usually used to check the
|
||||
// downcount.
|
||||
const u8* checkedEntry;
|
||||
|
@ -35,8 +38,8 @@ struct JitBlock
|
|||
// The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK.
|
||||
u32 msrBits;
|
||||
// The physical address of the code represented by this block.
|
||||
// Various maps in the cache are indexed by this (start_block_map,
|
||||
// block_map, and valid_block in particular). This is useful because of
|
||||
// Various maps in the cache are indexed by this (block_map
|
||||
// and valid_block in particular). This is useful because of
|
||||
// of the way the instruction cache works on PowerPC.
|
||||
u32 physicalAddress;
|
||||
// The number of bytes of JIT'ed code contained in this block. Mostly
|
||||
|
@ -57,6 +60,9 @@ struct JitBlock
|
|||
};
|
||||
std::vector<LinkData> linkData;
|
||||
|
||||
// This set stores all physical addresses of all occupied instructions.
|
||||
std::set<u32> physical_addresses;
|
||||
|
||||
// we don't really need to save start and stop
|
||||
// TODO (mb2): ticStart and ticStop -> "local var" mean "in block" ... low priority ;)
|
||||
u64 ticStart; // for profiling - time.
|
||||
|
@ -124,8 +130,7 @@ public:
|
|||
void RunOnBlocks(std::function<void(const JitBlock&)> f);
|
||||
|
||||
JitBlock* AllocateBlock(u32 em_address);
|
||||
void FreeBlock(JitBlock* block);
|
||||
void FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr);
|
||||
void FinalizeBlock(JitBlock& block, bool block_link, const std::set<u32>& physical_addresses);
|
||||
|
||||
// Look for the block in the slow but accurate way.
|
||||
// This function shall be used if FastLookupIndexForAddress() failed.
|
||||
|
@ -138,7 +143,8 @@ public:
|
|||
// assembly version.)
|
||||
const u8* Dispatch();
|
||||
|
||||
void InvalidateICache(u32 address, const u32 length, bool forced);
|
||||
void InvalidateICache(u32 address, u32 length, bool forced);
|
||||
void ErasePhysicalRange(u32 address, u32 length);
|
||||
|
||||
u32* GetBlockBitSet() const;
|
||||
|
||||
|
@ -163,20 +169,21 @@ private:
|
|||
// It is used to query all blocks which links to an address.
|
||||
std::multimap<u32, JitBlock*> links_to; // destination_PC -> number
|
||||
|
||||
// Map indexed by the physical memory location.
|
||||
// It is used to invalidate blocks based on memory location.
|
||||
std::multimap<std::pair<u32, u32>, JitBlock*> block_map; // (end_addr, start_addr) -> block
|
||||
|
||||
// Map indexed by the physical address of the entry point.
|
||||
// This is used to query the block based on the current PC in a slow way.
|
||||
// TODO: This is redundant with block_map.
|
||||
std::multimap<u32, JitBlock> start_block_map; // start_addr -> block
|
||||
std::multimap<u32, JitBlock> block_map; // start_addr -> block
|
||||
|
||||
// Range of overlapping code indexed by a masked physical address.
|
||||
// This is used for invalidation of memory regions. The range is grouped
|
||||
// in macro blocks of each 0x100 bytes.
|
||||
static constexpr u32 BLOCK_RANGE_MAP_ELEMENTS = 0x100;
|
||||
std::map<u32, std::set<JitBlock*>> block_range_map;
|
||||
|
||||
// This bitsets shows which cachelines overlap with any blocks.
|
||||
// It is used to provide a fast way to query if no icache invalidation is needed.
|
||||
ValidBlockBitSet valid_block;
|
||||
|
||||
// This array is indexed with the masked PC and likely holds the correct block id.
|
||||
// This is used as a fast cache of start_block_map used in the assembly dispatcher.
|
||||
// This is used as a fast cache of block_map used in the assembly dispatcher.
|
||||
std::array<JitBlock*, FAST_BLOCK_MAP_ELEMENTS> fast_block_map; // start_addr & mask -> number
|
||||
};
|
||||
|
|
|
@ -384,7 +384,7 @@ TryReadInstResult TryReadInstruction(u32 address)
|
|||
auto tlb_addr = TranslateAddress<FLAG_OPCODE>(address);
|
||||
if (!tlb_addr.Success())
|
||||
{
|
||||
return TryReadInstResult{false, false, 0};
|
||||
return TryReadInstResult{false, false, 0, 0};
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -403,7 +403,7 @@ TryReadInstResult TryReadInstruction(u32 address)
|
|||
{
|
||||
hex = PowerPC::ppcState.iCache.ReadInstruction(address);
|
||||
}
|
||||
return TryReadInstResult{true, from_bat, hex};
|
||||
return TryReadInstResult{true, from_bat, hex, address};
|
||||
}
|
||||
|
||||
u32 HostRead_Instruction(const u32 address)
|
||||
|
|
|
@ -646,6 +646,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32
|
|||
block->m_memory_exception = false;
|
||||
block->m_num_instructions = 0;
|
||||
block->m_gqr_used = BitSet8(0);
|
||||
block->m_physical_addresses.clear();
|
||||
|
||||
CodeOp* code = buffer->codebuffer;
|
||||
|
||||
|
@ -653,7 +654,6 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32
|
|||
u32 return_address = 0;
|
||||
u32 numFollows = 0;
|
||||
u32 num_inst = 0;
|
||||
bool prev_inst_from_bat = true;
|
||||
|
||||
for (u32 i = 0; i < blockSize; ++i)
|
||||
{
|
||||
|
@ -666,16 +666,6 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32
|
|||
}
|
||||
UGeckoInstruction inst = result.hex;
|
||||
|
||||
// Slight hack: the JIT block cache currently assumes all blocks end at the same place,
|
||||
// but broken blocks due to page faults break this assumption. Avoid this by just ending
|
||||
// all virtual memory instruction blocks at page boundaries.
|
||||
// FIXME: improve the JIT block cache so we don't need to do this.
|
||||
if ((!result.from_bat || !prev_inst_from_bat) && i > 0 && (address & 0xfff) == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
prev_inst_from_bat = result.from_bat;
|
||||
|
||||
num_inst++;
|
||||
memset(&code[i], 0, sizeof(CodeOp));
|
||||
GekkoOPInfo* opinfo = GetOpInfo(inst);
|
||||
|
@ -687,6 +677,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32
|
|||
code[i].branchToIndex = -1;
|
||||
code[i].skip = false;
|
||||
block->m_stats->numCycles += opinfo->numCycles;
|
||||
block->m_physical_addresses.insert(result.physical_address);
|
||||
|
||||
SetInstructionStats(block, &code[i], opinfo, i);
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
@ -157,6 +158,9 @@ struct CodeBlock
|
|||
|
||||
// Which GPRs this block reads from before defining, if any.
|
||||
BitSet32 m_gpr_inputs;
|
||||
|
||||
// Which memory locations are occupied by this block.
|
||||
std::set<u32> m_physical_addresses;
|
||||
};
|
||||
|
||||
class PPCAnalyzer
|
||||
|
|
|
@ -232,6 +232,7 @@ struct TryReadInstResult
|
|||
bool valid;
|
||||
bool from_bat;
|
||||
u32 hex;
|
||||
u32 physical_address;
|
||||
};
|
||||
TryReadInstResult TryReadInstruction(const u32 address);
|
||||
|
||||
|
|
Loading…
Reference in New Issue