Merge pull request #11737 from krnlyng/block_map
Jit: Improve block lookup performance through a shm memory segment.
This commit is contained in:
commit
8fd61d0b54
|
@ -20,6 +20,10 @@
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
|
|
||||||
|
// These need to be next of each other so that the assembly
|
||||||
|
// code can compare them easily.
|
||||||
|
static_assert(offsetof(JitBlockData, effectiveAddress) + 4 == offsetof(JitBlockData, msrBits));
|
||||||
|
|
||||||
Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit)
|
Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -102,14 +106,26 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
// The following is a translation of JitBaseBlockCache::Dispatch into assembly.
|
// The following is a translation of JitBaseBlockCache::Dispatch into assembly.
|
||||||
const bool assembly_dispatcher = true;
|
const bool assembly_dispatcher = true;
|
||||||
if (assembly_dispatcher)
|
if (assembly_dispatcher)
|
||||||
|
{
|
||||||
|
if (m_jit.GetBlockCache()->GetFastBlockMap())
|
||||||
|
{
|
||||||
|
u64 icache = reinterpret_cast<u64>(m_jit.GetBlockCache()->GetFastBlockMap());
|
||||||
|
MOV(32, R(RSCRATCH), PPCSTATE(pc));
|
||||||
|
|
||||||
|
MOV(64, R(RSCRATCH2), Imm64(icache));
|
||||||
|
// Each 4-byte offset of the PC register corresponds to a 8-byte offset
|
||||||
|
// in the lookup table due to host pointers being 8-bytes long.
|
||||||
|
MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0));
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
// Fast block number lookup.
|
// Fast block number lookup.
|
||||||
// ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2
|
// ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2
|
||||||
MOV(32, R(RSCRATCH), PPCSTATE(pc));
|
MOV(32, R(RSCRATCH), PPCSTATE(pc));
|
||||||
// Keep a copy for later.
|
// Keep a copy for later.
|
||||||
MOV(32, R(RSCRATCH_EXTRA), R(RSCRATCH));
|
MOV(32, R(RSCRATCH_EXTRA), R(RSCRATCH));
|
||||||
u64 icache = reinterpret_cast<u64>(m_jit.GetBlockCache()->GetFastBlockMap());
|
u64 icache = reinterpret_cast<u64>(m_jit.GetBlockCache()->GetFastBlockMapFallback());
|
||||||
AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::FAST_BLOCK_MAP_MASK << 2));
|
AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::FAST_BLOCK_MAP_FALLBACK_MASK << 2));
|
||||||
if (icache <= INT_MAX)
|
if (icache <= INT_MAX)
|
||||||
{
|
{
|
||||||
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_2, static_cast<s32>(icache)));
|
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_2, static_cast<s32>(icache)));
|
||||||
|
@ -119,19 +135,30 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
MOV(64, R(RSCRATCH2), Imm64(icache));
|
MOV(64, R(RSCRATCH2), Imm64(icache));
|
||||||
MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0));
|
MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check if we found a block.
|
// Check if we found a block.
|
||||||
TEST(64, R(RSCRATCH), R(RSCRATCH));
|
TEST(64, R(RSCRATCH), R(RSCRATCH));
|
||||||
FixupBranch not_found = J_CC(CC_Z);
|
FixupBranch not_found = J_CC(CC_Z);
|
||||||
|
|
||||||
// Check both block.effectiveAddress and block.msrBits.
|
// Check block.msrBits.
|
||||||
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
|
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
|
||||||
AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK));
|
AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK));
|
||||||
|
|
||||||
|
if (m_jit.GetBlockCache()->GetFastBlockMap())
|
||||||
|
{
|
||||||
|
CMP(32, R(RSCRATCH2), MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, msrBits))));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Also check the block.effectiveAddress
|
||||||
SHL(64, R(RSCRATCH2), Imm8(32));
|
SHL(64, R(RSCRATCH2), Imm8(32));
|
||||||
// RSCRATCH_EXTRA still has the PC.
|
// RSCRATCH_EXTRA still has the PC.
|
||||||
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
|
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
|
||||||
CMP(64, R(RSCRATCH2),
|
CMP(64, R(RSCRATCH2),
|
||||||
MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, effectiveAddress))));
|
MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, effectiveAddress))));
|
||||||
|
}
|
||||||
|
|
||||||
FixupBranch state_mismatch = J_CC(CC_NE);
|
FixupBranch state_mismatch = J_CC(CC_NE);
|
||||||
|
|
||||||
// Success; branch to the block we found.
|
// Success; branch to the block we found.
|
||||||
|
|
|
@ -110,11 +110,41 @@ void JitArm64::GenerateAsm()
|
||||||
jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase());
|
jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase());
|
||||||
SetJumpTarget(membaseend);
|
SetJumpTarget(membaseend);
|
||||||
|
|
||||||
|
if (GetBlockCache()->GetFastBlockMap())
|
||||||
|
{
|
||||||
|
// Check if there is a block
|
||||||
|
ARM64Reg pc_masked = ARM64Reg::X25;
|
||||||
|
ARM64Reg cache_base = ARM64Reg::X27;
|
||||||
|
ARM64Reg block = ARM64Reg::X30;
|
||||||
|
LSL(pc_masked, DISPATCHER_PC, 1);
|
||||||
|
MOVP2R(cache_base, GetBlockCache()->GetFastBlockMap());
|
||||||
|
LDR(block, cache_base, pc_masked);
|
||||||
|
FixupBranch not_found = CBZ(block);
|
||||||
|
|
||||||
|
// b.msrBits != msr
|
||||||
|
ARM64Reg msr = ARM64Reg::W25;
|
||||||
|
ARM64Reg msr2 = ARM64Reg::W24;
|
||||||
|
LDR(IndexType::Unsigned, msr, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
|
AND(msr, msr, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32));
|
||||||
|
LDR(IndexType::Unsigned, msr2, block, offsetof(JitBlockData, msrBits));
|
||||||
|
CMP(msr, msr2);
|
||||||
|
|
||||||
|
FixupBranch msr_missmatch = B(CC_NEQ);
|
||||||
|
|
||||||
|
// return blocks[block_num].normalEntry;
|
||||||
|
LDR(IndexType::Unsigned, block, block, offsetof(JitBlockData, normalEntry));
|
||||||
|
BR(block);
|
||||||
|
SetJumpTarget(not_found);
|
||||||
|
SetJumpTarget(msr_missmatch);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
// iCache[(address >> 2) & iCache_Mask];
|
// iCache[(address >> 2) & iCache_Mask];
|
||||||
ARM64Reg pc_masked = ARM64Reg::W25;
|
ARM64Reg pc_masked = ARM64Reg::W25;
|
||||||
ARM64Reg cache_base = ARM64Reg::X27;
|
ARM64Reg cache_base = ARM64Reg::X27;
|
||||||
ARM64Reg block = ARM64Reg::X30;
|
ARM64Reg block = ARM64Reg::X30;
|
||||||
ORR(pc_masked, ARM64Reg::WZR, LogicalImm(JitBaseBlockCache::FAST_BLOCK_MAP_MASK << 3, 32));
|
ORR(pc_masked, ARM64Reg::WZR,
|
||||||
|
LogicalImm(JitBaseBlockCache::FAST_BLOCK_MAP_FALLBACK_MASK << 3, 32));
|
||||||
AND(pc_masked, pc_masked, DISPATCHER_PC, ArithOption(DISPATCHER_PC, ShiftType::LSL, 1));
|
AND(pc_masked, pc_masked, DISPATCHER_PC, ArithOption(DISPATCHER_PC, ShiftType::LSL, 1));
|
||||||
MOVP2R(cache_base, GetBlockCache()->GetFastBlockMap());
|
MOVP2R(cache_base, GetBlockCache()->GetFastBlockMap());
|
||||||
LDR(block, cache_base, EncodeRegTo64(pc_masked));
|
LDR(block, cache_base, EncodeRegTo64(pc_masked));
|
||||||
|
@ -131,6 +161,7 @@ void JitArm64::GenerateAsm()
|
||||||
AND(pc_and_msr2, pc_and_msr2, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32));
|
AND(pc_and_msr2, pc_and_msr2, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32));
|
||||||
LDR(IndexType::Unsigned, pc_and_msr, block, offsetof(JitBlockData, msrBits));
|
LDR(IndexType::Unsigned, pc_and_msr, block, offsetof(JitBlockData, msrBits));
|
||||||
CMP(pc_and_msr, pc_and_msr2);
|
CMP(pc_and_msr, pc_and_msr2);
|
||||||
|
|
||||||
FixupBranch msr_missmatch = B(CC_NEQ);
|
FixupBranch msr_missmatch = B(CC_NEQ);
|
||||||
|
|
||||||
// return blocks[block_num].normalEntry;
|
// return blocks[block_num].normalEntry;
|
||||||
|
@ -140,6 +171,7 @@ void JitArm64::GenerateAsm()
|
||||||
SetJumpTarget(pc_missmatch);
|
SetJumpTarget(pc_missmatch);
|
||||||
SetJumpTarget(msr_missmatch);
|
SetJumpTarget(msr_missmatch);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Call C version of Dispatch().
|
// Call C version of Dispatch().
|
||||||
STR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
STR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
||||||
|
|
|
@ -42,12 +42,21 @@ void JitBaseBlockCache::Init()
|
||||||
{
|
{
|
||||||
Common::JitRegister::Init(Config::Get(Config::MAIN_PERF_MAP_DIR));
|
Common::JitRegister::Init(Config::Get(Config::MAIN_PERF_MAP_DIR));
|
||||||
|
|
||||||
|
m_block_map_arena.GrabSHMSegment(FAST_BLOCK_MAP_SIZE, "dolphin-emu-jitblock");
|
||||||
|
|
||||||
Clear();
|
Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitBaseBlockCache::Shutdown()
|
void JitBaseBlockCache::Shutdown()
|
||||||
{
|
{
|
||||||
Common::JitRegister::Shutdown();
|
Common::JitRegister::Shutdown();
|
||||||
|
|
||||||
|
if (m_fast_block_map)
|
||||||
|
{
|
||||||
|
m_block_map_arena.ReleaseView(m_fast_block_map, FAST_BLOCK_MAP_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_block_map_arena.ReleaseSHMSegment();
|
||||||
}
|
}
|
||||||
|
|
||||||
// This clears the JIT cache. It's called from JitCache.cpp when the JIT cache
|
// This clears the JIT cache. It's called from JitCache.cpp when the JIT cache
|
||||||
|
@ -70,7 +79,24 @@ void JitBaseBlockCache::Clear()
|
||||||
|
|
||||||
valid_block.ClearAll();
|
valid_block.ClearAll();
|
||||||
|
|
||||||
fast_block_map.fill(nullptr);
|
if (m_fast_block_map)
|
||||||
|
{
|
||||||
|
m_block_map_arena.ReleaseView(m_fast_block_map, FAST_BLOCK_MAP_SIZE);
|
||||||
|
m_block_map_arena.ReleaseSHMSegment();
|
||||||
|
m_block_map_arena.GrabSHMSegment(FAST_BLOCK_MAP_SIZE, "dolphin-emu-jitblock");
|
||||||
|
}
|
||||||
|
|
||||||
|
m_fast_block_map =
|
||||||
|
reinterpret_cast<JitBlock**>(m_block_map_arena.CreateView(0, FAST_BLOCK_MAP_SIZE));
|
||||||
|
|
||||||
|
if (m_fast_block_map)
|
||||||
|
{
|
||||||
|
m_fast_block_map_ptr = m_fast_block_map;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_fast_block_map_ptr = m_fast_block_map_fallback.data();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitBaseBlockCache::Reset()
|
void JitBaseBlockCache::Reset()
|
||||||
|
@ -81,7 +107,12 @@ void JitBaseBlockCache::Reset()
|
||||||
|
|
||||||
JitBlock** JitBaseBlockCache::GetFastBlockMap()
|
JitBlock** JitBaseBlockCache::GetFastBlockMap()
|
||||||
{
|
{
|
||||||
return fast_block_map.data();
|
return m_fast_block_map;
|
||||||
|
}
|
||||||
|
|
||||||
|
JitBlock** JitBaseBlockCache::GetFastBlockMapFallback()
|
||||||
|
{
|
||||||
|
return m_fast_block_map_fallback.data();
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitBaseBlockCache::RunOnBlocks(std::function<void(const JitBlock&)> f)
|
void JitBaseBlockCache::RunOnBlocks(std::function<void(const JitBlock&)> f)
|
||||||
|
@ -106,7 +137,7 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link,
|
||||||
const std::set<u32>& physical_addresses)
|
const std::set<u32>& physical_addresses)
|
||||||
{
|
{
|
||||||
size_t index = FastLookupIndexForAddress(block.effectiveAddress);
|
size_t index = FastLookupIndexForAddress(block.effectiveAddress);
|
||||||
fast_block_map[index] = █
|
m_fast_block_map_ptr[index] = █
|
||||||
block.fast_block_map_index = index;
|
block.fast_block_map_index = index;
|
||||||
|
|
||||||
block.physical_addresses = physical_addresses;
|
block.physical_addresses = physical_addresses;
|
||||||
|
@ -169,7 +200,7 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr)
|
||||||
const u8* JitBaseBlockCache::Dispatch()
|
const u8* JitBaseBlockCache::Dispatch()
|
||||||
{
|
{
|
||||||
const auto& ppc_state = m_jit.m_ppc_state;
|
const auto& ppc_state = m_jit.m_ppc_state;
|
||||||
JitBlock* block = fast_block_map[FastLookupIndexForAddress(ppc_state.pc)];
|
JitBlock* block = m_fast_block_map_ptr[FastLookupIndexForAddress(ppc_state.pc)];
|
||||||
|
|
||||||
if (!block || block->effectiveAddress != ppc_state.pc ||
|
if (!block || block->effectiveAddress != ppc_state.pc ||
|
||||||
block->msrBits != (ppc_state.msr.Hex & JIT_CACHE_MSR_MASK))
|
block->msrBits != (ppc_state.msr.Hex & JIT_CACHE_MSR_MASK))
|
||||||
|
@ -390,8 +421,8 @@ void JitBaseBlockCache::UnlinkBlock(const JitBlock& block)
|
||||||
|
|
||||||
void JitBaseBlockCache::DestroyBlock(JitBlock& block)
|
void JitBaseBlockCache::DestroyBlock(JitBlock& block)
|
||||||
{
|
{
|
||||||
if (fast_block_map[block.fast_block_map_index] == &block)
|
if (m_fast_block_map_ptr[block.fast_block_map_index] == &block)
|
||||||
fast_block_map[block.fast_block_map_index] = nullptr;
|
m_fast_block_map_ptr[block.fast_block_map_index] = nullptr;
|
||||||
|
|
||||||
UnlinkBlock(block);
|
UnlinkBlock(block);
|
||||||
|
|
||||||
|
@ -418,12 +449,12 @@ JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
// Drop old fast block map entry
|
// Drop old fast block map entry
|
||||||
if (fast_block_map[block->fast_block_map_index] == block)
|
if (m_fast_block_map_ptr[block->fast_block_map_index] == block)
|
||||||
fast_block_map[block->fast_block_map_index] = nullptr;
|
m_fast_block_map_ptr[block->fast_block_map_index] = nullptr;
|
||||||
|
|
||||||
// And create a new one
|
// And create a new one
|
||||||
size_t index = FastLookupIndexForAddress(addr);
|
size_t index = FastLookupIndexForAddress(addr);
|
||||||
fast_block_map[index] = block;
|
m_fast_block_map_ptr[index] = block;
|
||||||
block->fast_block_map_index = index;
|
block->fast_block_map_index = index;
|
||||||
|
|
||||||
return block;
|
return block;
|
||||||
|
@ -431,5 +462,12 @@ JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
|
||||||
|
|
||||||
size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address)
|
size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address)
|
||||||
{
|
{
|
||||||
return (address >> 2) & FAST_BLOCK_MAP_MASK;
|
if (m_fast_block_map)
|
||||||
|
{
|
||||||
|
return address >> 2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return (address >> 2) & FAST_BLOCK_MAP_FALLBACK_MASK;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
|
#include "Core/HW/Memmap.h"
|
||||||
|
|
||||||
class JitBase;
|
class JitBase;
|
||||||
|
|
||||||
|
@ -131,8 +132,11 @@ public:
|
||||||
// is valid (MSR.IR and MSR.DR, the address translation bits).
|
// is valid (MSR.IR and MSR.DR, the address translation bits).
|
||||||
static constexpr u32 JIT_CACHE_MSR_MASK = 0x30;
|
static constexpr u32 JIT_CACHE_MSR_MASK = 0x30;
|
||||||
|
|
||||||
static constexpr u32 FAST_BLOCK_MAP_ELEMENTS = 0x10000;
|
// The value for the map is determined like this:
|
||||||
static constexpr u32 FAST_BLOCK_MAP_MASK = FAST_BLOCK_MAP_ELEMENTS - 1;
|
// ((4 GB guest memory space) / (4 bytes per address)) * sizeof(JitBlock*)
|
||||||
|
static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x2'0000'0000;
|
||||||
|
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_ELEMENTS = 0x10000;
|
||||||
|
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_MASK = FAST_BLOCK_MAP_FALLBACK_ELEMENTS - 1;
|
||||||
|
|
||||||
explicit JitBaseBlockCache(JitBase& jit);
|
explicit JitBaseBlockCache(JitBase& jit);
|
||||||
virtual ~JitBaseBlockCache();
|
virtual ~JitBaseBlockCache();
|
||||||
|
@ -144,6 +148,7 @@ public:
|
||||||
|
|
||||||
// Code Cache
|
// Code Cache
|
||||||
JitBlock** GetFastBlockMap();
|
JitBlock** GetFastBlockMap();
|
||||||
|
JitBlock** GetFastBlockMapFallback();
|
||||||
void RunOnBlocks(std::function<void(const JitBlock&)> f);
|
void RunOnBlocks(std::function<void(const JitBlock&)> f);
|
||||||
|
|
||||||
JitBlock* AllocateBlock(u32 em_address);
|
JitBlock* AllocateBlock(u32 em_address);
|
||||||
|
@ -203,7 +208,16 @@ private:
|
||||||
// It is used to provide a fast way to query if no icache invalidation is needed.
|
// It is used to provide a fast way to query if no icache invalidation is needed.
|
||||||
ValidBlockBitSet valid_block;
|
ValidBlockBitSet valid_block;
|
||||||
|
|
||||||
// This array is indexed with the masked PC and likely holds the correct block id.
|
// This array is indexed with the shifted PC and likely holds the correct block id.
|
||||||
// This is used as a fast cache of block_map used in the assembly dispatcher.
|
// This is used as a fast cache of block_map used in the assembly dispatcher.
|
||||||
std::array<JitBlock*, FAST_BLOCK_MAP_ELEMENTS> fast_block_map{}; // start_addr & mask -> number
|
// It is implemented via a shm segment using m_block_map_arena.
|
||||||
|
JitBlock** m_fast_block_map = 0;
|
||||||
|
Common::MemArena m_block_map_arena;
|
||||||
|
|
||||||
|
// An alternative for the above fast_block_map but without a shm segment
|
||||||
|
// in case the shm memory region couldn't be allocated.
|
||||||
|
std::array<JitBlock*, FAST_BLOCK_MAP_FALLBACK_ELEMENTS>
|
||||||
|
m_fast_block_map_fallback{}; // start_addr & mask -> number
|
||||||
|
|
||||||
|
JitBlock** m_fast_block_map_ptr = 0;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue