Merge pull request #4678 from degasus/jitcache

JitCache: Store the JIT blocks in the std::map.
This commit is contained in:
Matthew Parlane 2017-01-23 11:49:46 +13:00 committed by GitHub
commit ef7a809fad
8 changed files with 149 additions and 165 deletions

View File

@ -123,7 +123,7 @@ static bool CheckDSI(u32 data)
void CachedInterpreter::Jit(u32 address)
{
if (m_code.size() >= CODE_SIZE / sizeof(Instruction) - 0x1000 || m_block_cache.IsFull() ||
if (m_code.size() >= CODE_SIZE / sizeof(Instruction) - 0x1000 ||
SConfig::GetInstance().bJITNoBlockCache)
{
ClearCache();

View File

@ -543,7 +543,7 @@ void Jit64::Jit(u32 em_address)
}
if (IsAlmostFull() || m_far_code.IsAlmostFull() || trampolines.IsAlmostFull() ||
blocks.IsFull() || SConfig::GetInstance().bJITNoBlockCache)
SConfig::GetInstance().bJITNoBlockCache)
{
ClearCache();
}

View File

@ -92,54 +92,53 @@ void Jit64AsmRoutineManager::Generate()
dispatcherNoCheck = GetCodePtr();
// Switch to the correct memory base, in case MSR.DR has changed.
// TODO: Is there a more efficient place to put this? We don't
// need to do this for indirect jumps, just exceptions etc.
TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27)));
FixupBranch physmem = J_CC(CC_NZ);
MOV(64, R(RMEM), ImmPtr(Memory::physical_base));
FixupBranch membaseend = J();
SetJumpTarget(physmem);
MOV(64, R(RMEM), ImmPtr(Memory::logical_base));
SetJumpTarget(membaseend);
// The following is a translation of JitBaseBlockCache::Dispatch into assembly.
// Fast block number lookup.
// ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2
MOV(32, R(RSCRATCH), PPCSTATE(pc));
u64 icache = reinterpret_cast<u64>(g_jit->GetBlockCache()->GetICache());
AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::iCache_Mask << 2));
if (icache <= INT_MAX)
const bool assembly_dispatcher = true;
if (assembly_dispatcher)
{
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_2, static_cast<s32>(icache)));
// Fast block number lookup.
// ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2
MOV(32, R(RSCRATCH), PPCSTATE(pc));
u64 icache = reinterpret_cast<u64>(g_jit->GetBlockCache()->GetFastBlockMap());
AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::FAST_BLOCK_MAP_MASK << 2));
if (icache <= INT_MAX)
{
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_2, static_cast<s32>(icache)));
}
else
{
MOV(64, R(RSCRATCH2), Imm64(icache));
MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0));
}
// Check if we found a block.
TEST(64, R(RSCRATCH), R(RSCRATCH));
FixupBranch not_found = J_CC(CC_Z);
// Check both block.effectiveAddress and block.msrBits.
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK));
SHL(64, R(RSCRATCH2), Imm8(32));
MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc));
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, effectiveAddress))));
FixupBranch state_mismatch = J_CC(CC_NE);
// Success; branch to the block we found.
// Switch to the correct memory base, in case MSR.DR has changed.
TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27)));
FixupBranch physmem = J_CC(CC_Z);
MOV(64, R(RMEM), ImmPtr(Memory::logical_base));
JMPptr(MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, normalEntry))));
SetJumpTarget(physmem);
MOV(64, R(RMEM), ImmPtr(Memory::physical_base));
JMPptr(MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, normalEntry))));
SetJumpTarget(not_found);
SetJumpTarget(state_mismatch);
// Failure, fallback to the C++ dispatcher for calling the JIT.
}
else
{
MOV(64, R(RSCRATCH2), Imm64(icache));
MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0));
}
// Check if we found a block.
TEST(64, R(RSCRATCH), R(RSCRATCH));
FixupBranch not_found = J_CC(CC_Z);
// Check both block.effectiveAddress and block.msrBits.
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
AND(32, R(RSCRATCH2), Imm32(JitBlock::JIT_CACHE_MSR_MASK));
SHL(64, R(RSCRATCH2), Imm8(32));
MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc));
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, effectiveAddress))));
FixupBranch state_mismatch = J_CC(CC_NE);
// Success; branch to the block we found.
JMPptr(MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, normalEntry))));
SetJumpTarget(not_found);
SetJumpTarget(state_mismatch);
// Failure; call into the block cache to update the state, then try again.
// (We need to loop because Jit() might not actually generate a block
// if we hit an ISI.)
// We reset the stack because Jit might clear the code cache.
// Also if we are in the middle of disabling BLR optimization on windows
@ -151,8 +150,15 @@ void Jit64AsmRoutineManager::Generate()
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunction(JitBase::Dispatch);
ABI_PopRegistersAndAdjustStack({}, 0);
// JMPptr(R(ABI_RETURN));
JMP(dispatcherNoCheck, true);
// Switch to the correct memory base, in case MSR.DR has changed.
TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27)));
FixupBranch physmem = J_CC(CC_Z);
MOV(64, R(RMEM), ImmPtr(Memory::logical_base));
JMPptr(R(ABI_RETURN));
SetJumpTarget(physmem);
MOV(64, R(RMEM), ImmPtr(Memory::physical_base));
JMPptr(R(ABI_RETURN));
SetJumpTarget(bail);
doTiming = GetCodePtr();

View File

@ -466,7 +466,7 @@ void JitIL::Trace()
void JitIL::Jit(u32 em_address)
{
if (IsAlmostFull() || m_far_code.IsAlmostFull() || trampolines.IsAlmostFull() ||
blocks.IsFull() || SConfig::GetInstance().bJITNoBlockCache)
SConfig::GetInstance().bJITNoBlockCache)
{
ClearCache();
}

View File

@ -368,8 +368,7 @@ void JitArm64::SingleStep()
void JitArm64::Jit(u32)
{
if (IsAlmostFull() || farcode.IsAlmostFull() || blocks.IsFull() ||
SConfig::GetInstance().bJITNoBlockCache)
if (IsAlmostFull() || farcode.IsAlmostFull() || SConfig::GetInstance().bJITNoBlockCache)
{
ClearCache();
}

View File

@ -74,9 +74,9 @@ void JitArm64::GenerateAsm()
ARM64Reg pc_masked = W25;
ARM64Reg cache_base = X27;
ARM64Reg block = X30;
ORRI2R(pc_masked, WZR, JitBaseBlockCache::iCache_Mask << 3);
ORRI2R(pc_masked, WZR, JitBaseBlockCache::FAST_BLOCK_MAP_MASK << 3);
AND(pc_masked, pc_masked, DISPATCHER_PC, ArithOption(DISPATCHER_PC, ST_LSL, 1));
MOVP2R(cache_base, g_jit->GetBlockCache()->GetICache());
MOVP2R(cache_base, g_jit->GetBlockCache()->GetFastBlockMap());
LDR(block, cache_base, EncodeRegTo64(pc_masked));
FixupBranch not_found = CBZ(block);
@ -88,7 +88,7 @@ void JitArm64::GenerateAsm()
FixupBranch pc_missmatch = B(CC_NEQ);
LDR(INDEX_UNSIGNED, pc_and_msr2, PPC_REG, PPCSTATE_OFF(msr));
ANDI2R(pc_and_msr2, pc_and_msr2, JitBlock::JIT_CACHE_MSR_MASK);
ANDI2R(pc_and_msr2, pc_and_msr2, JitBaseBlockCache::JIT_CACHE_MSR_MASK);
LDR(INDEX_UNSIGNED, pc_and_msr, block, offsetof(JitBlock, msrBits));
CMP(pc_and_msr, pc_and_msr2);
FixupBranch msr_missmatch = B(CC_NEQ);

View File

@ -9,6 +9,7 @@
// performance hit, it's not enabled by default, but it's useful for
// locating performance issues.
#include <algorithm>
#include <cstring>
#include <map>
#include <utility>
@ -46,14 +47,11 @@ void JitBaseBlockCache::Init()
s_clear_jit_cache_thread_safe = CoreTiming::RegisterEvent("clearJitCache", ClearCacheThreadSafe);
JitRegister::Init(SConfig::GetInstance().m_perfDir);
iCache.fill(nullptr);
Clear();
}
void JitBaseBlockCache::Shutdown()
{
num_blocks = 1;
JitRegister::Shutdown();
}
@ -62,25 +60,21 @@ void JitBaseBlockCache::Shutdown()
void JitBaseBlockCache::Clear()
{
#if defined(_DEBUG) || defined(DEBUGFAST)
if (IsFull())
Core::DisplayMessage("Clearing block cache.", 3000);
else
Core::DisplayMessage("Clearing code cache.", 3000);
Core::DisplayMessage("Clearing code cache.", 3000);
#endif
m_jit.js.fifoWriteAddresses.clear();
m_jit.js.pairedQuantizeAddresses.clear();
for (int i = 1; i < num_blocks; i++)
for (auto& e : start_block_map)
{
DestroyBlock(blocks[i], false);
DestroyBlock(e.second);
}
start_block_map.clear();
links_to.clear();
block_map.clear();
valid_block.ClearAll();
num_blocks = 1;
blocks[0].msrBits = 0xFFFFFFFF;
blocks[0].invalid = true;
fast_block_map.fill(nullptr);
}
void JitBaseBlockCache::Reset()
@ -94,56 +88,53 @@ void JitBaseBlockCache::SchedulateClearCacheThreadSafe()
CoreTiming::ScheduleEvent(0, s_clear_jit_cache_thread_safe, 0, CoreTiming::FromThread::NON_CPU);
}
bool JitBaseBlockCache::IsFull() const
JitBlock** JitBaseBlockCache::GetFastBlockMap()
{
return num_blocks >= MAX_NUM_BLOCKS - 1;
}
JitBlock** JitBaseBlockCache::GetICache()
{
return iCache.data();
return fast_block_map.data();
}
void JitBaseBlockCache::RunOnBlocks(std::function<void(const JitBlock&)> f)
{
for (int i = 0; i < num_blocks; i++)
f(blocks[i]);
for (const auto& e : start_block_map)
f(e.second);
}
JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address)
{
JitBlock& b = blocks[num_blocks];
b.invalid = false;
u32 physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address;
JitBlock& b = start_block_map.emplace(physicalAddress, JitBlock())->second;
b.effectiveAddress = em_address;
b.physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address;
b.msrBits = MSR & JitBlock::JIT_CACHE_MSR_MASK;
b.physicalAddress = physicalAddress;
b.msrBits = MSR & JIT_CACHE_MSR_MASK;
b.linkData.clear();
num_blocks++; // commit the current block
b.fast_block_map_index = 0;
return &b;
}
void JitBaseBlockCache::FreeBlock(JitBlock* block)
{
auto iter = start_block_map.equal_range(block->physicalAddress);
while (iter.first != iter.second)
{
if (&iter.first->second == block)
start_block_map.erase(iter.first);
else
iter.first++;
}
}
void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr)
{
if (start_block_map.count(block.physicalAddress))
{
// We already have a block at this address; invalidate the old block.
// This should be very rare. This will only happen if the same block
// is called both with DR/IR enabled or disabled.
WARN_LOG(DYNA_REC, "Invalidating compiled block at same address %08x", block.physicalAddress);
JitBlock& old_b = *start_block_map[block.physicalAddress];
block_map.erase(
std::make_pair(old_b.physicalAddress + 4 * old_b.originalSize - 1, old_b.physicalAddress));
DestroyBlock(old_b, true);
}
start_block_map[block.physicalAddress] = &block;
FastLookupEntryForAddress(block.effectiveAddress) = &block;
size_t index = FastLookupIndexForAddress(block.effectiveAddress);
fast_block_map[index] = &block;
block.fast_block_map_index = index;
u32 pAddr = block.physicalAddress;
for (u32 addr = pAddr / 32; addr <= (pAddr + (block.originalSize - 1) * 4) / 32; ++addr)
valid_block.Set(addr);
block_map[std::make_pair(pAddr + 4 * block.originalSize - 1, pAddr)] = &block;
block_map.emplace(std::make_pair(pAddr + 4 * block.originalSize - 1, pAddr), &block);
if (block_link)
{
@ -171,26 +162,25 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr)
translated_addr = translated.address;
}
auto map_result = start_block_map.find(translated_addr);
if (map_result == start_block_map.end())
return nullptr;
auto iter = start_block_map.equal_range(translated_addr);
for (; iter.first != iter.second; iter.first++)
{
JitBlock& b = iter.first->second;
if (b.effectiveAddress == addr && b.msrBits == (msr & JIT_CACHE_MSR_MASK))
return &b;
}
JitBlock* b = map_result->second;
if (b->invalid || b->effectiveAddress != addr ||
b->msrBits != (msr & JitBlock::JIT_CACHE_MSR_MASK))
return nullptr;
return b;
return nullptr;
}
const u8* JitBaseBlockCache::Dispatch()
{
JitBlock* block = FastLookupEntryForAddress(PC);
JitBlock* block = fast_block_map[FastLookupIndexForAddress(PC)];
while (!block || block->effectiveAddress != PC ||
block->msrBits != (MSR & JitBlock::JIT_CACHE_MSR_MASK))
while (!block || block->effectiveAddress != PC || block->msrBits != (MSR & JIT_CACHE_MSR_MASK))
{
MoveBlockIntoFastCache(PC, MSR & JitBlock::JIT_CACHE_MSR_MASK);
block = FastLookupEntryForAddress(PC);
MoveBlockIntoFastCache(PC, MSR & JIT_CACHE_MSR_MASK);
block = fast_block_map[FastLookupIndexForAddress(PC)];
}
return block->normalEntry;
@ -221,7 +211,9 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for
auto it = block_map.lower_bound(std::make_pair(pAddr, 0));
while (it != block_map.end() && it->first.second < pAddr + length)
{
DestroyBlock(*it->second, true);
JitBlock* block = it->second;
DestroyBlock(*block);
FreeBlock(block);
it = block_map.erase(it);
}
@ -257,17 +249,12 @@ void JitBaseBlockCache::WriteDestroyBlock(const JitBlock& block)
void JitBaseBlockCache::LinkBlockExits(JitBlock& block)
{
if (block.invalid)
{
// This block is dead. Don't relink it.
return;
}
for (auto& e : block.linkData)
{
if (!e.linkStatus)
{
JitBlock* destinationBlock = GetBlockFromStartAddress(e.exitAddress, block.msrBits);
if (destinationBlock && !destinationBlock->invalid)
if (destinationBlock)
{
WriteLinkBlock(e, destinationBlock);
e.linkStatus = true;
@ -310,28 +297,24 @@ void JitBaseBlockCache::UnlinkBlock(const JitBlock& block)
}
}
void JitBaseBlockCache::DestroyBlock(JitBlock& block, bool invalidate)
void JitBaseBlockCache::DestroyBlock(JitBlock& block)
{
if (block.invalid)
{
if (invalidate)
PanicAlert("Invalidating invalid block %p", &block);
return;
}
block.invalid = true;
start_block_map.erase(block.physicalAddress);
FastLookupEntryForAddress(block.effectiveAddress) = nullptr;
if (fast_block_map[block.fast_block_map_index] == &block)
fast_block_map[block.fast_block_map_index] = nullptr;
UnlinkBlock(block);
// Delete linking addresses
auto it = links_to.equal_range(block.effectiveAddress);
while (it.first != it.second)
for (const auto& e : block.linkData)
{
if (it.first->second == &block)
it.first = links_to.erase(it.first);
else
it.first++;
auto it = links_to.equal_range(e.exitAddress);
while (it.first != it.second)
{
if (it.first->second == &block)
it.first = links_to.erase(it.first);
else
it.first++;
}
}
// Raise an signal if we are going to call this block again
@ -347,12 +330,19 @@ void JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
}
else
{
FastLookupEntryForAddress(addr) = block;
// Drop old fast block map entry
if (fast_block_map[block->fast_block_map_index] == block)
fast_block_map[block->fast_block_map_index] = nullptr;
// And create a new one
size_t index = FastLookupIndexForAddress(addr);
fast_block_map[index] = block;
block->fast_block_map_index = index;
LinkBlock(*block);
}
}
JitBlock*& JitBaseBlockCache::FastLookupEntryForAddress(u32 address)
size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address)
{
return iCache[(address >> 2) & iCache_Mask];
return (address >> 2) & FAST_BLOCK_MAP_MASK;
}

View File

@ -24,13 +24,6 @@ class JitBase;
// address.
struct JitBlock
{
enum
{
// Mask for the MSR bits which determine whether a compiled block
// is valid (MSR.IR and MSR.DR, the address translation bits).
JIT_CACHE_MSR_MASK = 0x30,
};
// A special entry point for block linking; usually used to check the
// downcount.
const u8* checkedEntry;
@ -54,11 +47,6 @@ struct JitBlock
u32 originalSize;
int runCount; // for profiling.
// Whether this struct refers to a valid block. This is mostly useful as
// a debugging aid.
// FIXME: Change current users of invalid bit to assertions?
bool invalid;
// Information about exits to a known address from this block.
// This is used to implement block linking.
struct LinkData
@ -74,6 +62,10 @@ struct JitBlock
u64 ticStart; // for profiling - time.
u64 ticStop; // for profiling - time.
u64 ticCounter; // for profiling - time.
// This tracks the position if this block within the fast block cache.
// We allow each block to have only one map entry.
size_t fast_block_map_index;
};
typedef void (*CompiledCode)();
@ -111,9 +103,12 @@ public:
class JitBaseBlockCache
{
public:
static constexpr int MAX_NUM_BLOCKS = 65536 * 2;
static constexpr u32 iCache_Num_Elements = 0x10000;
static constexpr u32 iCache_Mask = iCache_Num_Elements - 1;
// Mask for the MSR bits which determine whether a compiled block
// is valid (MSR.IR and MSR.DR, the address translation bits).
static constexpr u32 JIT_CACHE_MSR_MASK = 0x30;
static constexpr u32 FAST_BLOCK_MAP_ELEMENTS = 0x10000;
static constexpr u32 FAST_BLOCK_MAP_MASK = FAST_BLOCK_MAP_ELEMENTS - 1;
explicit JitBaseBlockCache(JitBase& jit);
virtual ~JitBaseBlockCache();
@ -124,17 +119,16 @@ public:
void Reset();
void SchedulateClearCacheThreadSafe();
bool IsFull() const;
// Code Cache
JitBlock** GetICache();
JitBlock** GetFastBlockMap();
void RunOnBlocks(std::function<void(const JitBlock&)> f);
JitBlock* AllocateBlock(u32 em_address);
void FreeBlock(JitBlock* block);
void FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr);
// Look for the block in the slow but accurate way.
// This function shall be used if FastLookupEntryForAddress() failed.
// This function shall be used if FastLookupIndexForAddress() failed.
// This might return nullptr if there is no such block.
JitBlock* GetBlockFromStartAddress(u32 em_address, u32 msr);
@ -158,17 +152,12 @@ private:
void LinkBlockExits(JitBlock& block);
void LinkBlock(JitBlock& block);
void UnlinkBlock(const JitBlock& block);
void DestroyBlock(JitBlock& block, bool invalidate);
void DestroyBlock(JitBlock& block);
void MoveBlockIntoFastCache(u32 em_address, u32 msr);
// Fast but risky block lookup based on iCache.
JitBlock*& FastLookupEntryForAddress(u32 address);
// We store the metadata of all blocks in a linear way within this array.
// Note: blocks[0] must not be used as it is referenced as invalid block in iCache.
std::array<JitBlock, MAX_NUM_BLOCKS> blocks; // number -> JitBlock
int num_blocks = 1;
// Fast but risky block lookup based on fast_block_map.
size_t FastLookupIndexForAddress(u32 address);
// links_to hold all exit points of all valid blocks in a reverse way.
// It is used to query all blocks which links to an address.
@ -176,12 +165,12 @@ private:
// Map indexed by the physical memory location.
// It is used to invalidate blocks based on memory location.
std::map<std::pair<u32, u32>, JitBlock*> block_map; // (end_addr, start_addr) -> block
std::multimap<std::pair<u32, u32>, JitBlock*> block_map; // (end_addr, start_addr) -> block
// Map indexed by the physical address of the entry point.
// This is used to query the block based on the current PC in a slow way.
// TODO: This is redundant with block_map, and both should be a multimap.
std::map<u32, JitBlock*> start_block_map; // start_addr -> block
// TODO: This is redundant with block_map.
std::multimap<u32, JitBlock> start_block_map; // start_addr -> block
// This bitsets shows which cachelines overlap with any blocks.
// It is used to provide a fast way to query if no icache invalidation is needed.
@ -189,5 +178,5 @@ private:
// This array is indexed with the masked PC and likely holds the correct block id.
// This is used as a fast cache of start_block_map used in the assembly dispatcher.
std::array<JitBlock*, iCache_Num_Elements> iCache; // start_addr & mask -> number
std::array<JitBlock*, FAST_BLOCK_MAP_ELEMENTS> fast_block_map; // start_addr & mask -> number
};