diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 44e2a3d875..d989d33fd5 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -20,6 +20,10 @@ using namespace Gen; +// These need to be next of each other so that the assembly +// code can compare them easily. +static_assert(offsetof(JitBlockData, effectiveAddress) + 4 == offsetof(JitBlockData, msrBits)); + Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit) { } @@ -103,35 +107,58 @@ void Jit64AsmRoutineManager::Generate() const bool assembly_dispatcher = true; if (assembly_dispatcher) { - // Fast block number lookup. - // ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2 - MOV(32, R(RSCRATCH), PPCSTATE(pc)); - // Keep a copy for later. - MOV(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); - u64 icache = reinterpret_cast(m_jit.GetBlockCache()->GetFastBlockMap()); - AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::FAST_BLOCK_MAP_MASK << 2)); - if (icache <= INT_MAX) + if (m_jit.GetBlockCache()->GetFastBlockMap()) { - MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_2, static_cast(icache))); + u64 icache = reinterpret_cast(m_jit.GetBlockCache()->GetFastBlockMap()); + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + + MOV(64, R(RSCRATCH2), Imm64(icache)); + // Each 4-byte offset of the PC register corresponds to a 8-byte offset + // in the lookup table due to host pointers being 8-bytes long. + MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0)); } else { - MOV(64, R(RSCRATCH2), Imm64(icache)); - MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0)); + // Fast block number lookup. + // ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2 + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + // Keep a copy for later. + MOV(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); + u64 icache = reinterpret_cast(m_jit.GetBlockCache()->GetFastBlockMapFallback()); + AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::FAST_BLOCK_MAP_FALLBACK_MASK << 2)); + if (icache <= INT_MAX) + { + MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_2, static_cast(icache))); + } + else + { + MOV(64, R(RSCRATCH2), Imm64(icache)); + MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0)); + } } // Check if we found a block. TEST(64, R(RSCRATCH), R(RSCRATCH)); FixupBranch not_found = J_CC(CC_Z); - // Check both block.effectiveAddress and block.msrBits. + // Check block.msrBits. MOV(32, R(RSCRATCH2), PPCSTATE(msr)); AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK)); - SHL(64, R(RSCRATCH2), Imm8(32)); - // RSCRATCH_EXTRA still has the PC. - OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); - CMP(64, R(RSCRATCH2), - MDisp(RSCRATCH, static_cast(offsetof(JitBlockData, effectiveAddress)))); + + if (m_jit.GetBlockCache()->GetFastBlockMap()) + { + CMP(32, R(RSCRATCH2), MDisp(RSCRATCH, static_cast(offsetof(JitBlockData, msrBits)))); + } + else + { + // Also check the block.effectiveAddress + SHL(64, R(RSCRATCH2), Imm8(32)); + // RSCRATCH_EXTRA still has the PC. + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); + CMP(64, R(RSCRATCH2), + MDisp(RSCRATCH, static_cast(offsetof(JitBlockData, effectiveAddress)))); + } + FixupBranch state_mismatch = J_CC(CC_NE); // Success; branch to the block we found. diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index ff5fd713f2..6f1066188e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -110,35 +110,67 @@ void JitArm64::GenerateAsm() jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()); SetJumpTarget(membaseend); - // iCache[(address >> 2) & iCache_Mask]; - ARM64Reg pc_masked = ARM64Reg::W25; - ARM64Reg cache_base = ARM64Reg::X27; - ARM64Reg block = ARM64Reg::X30; - ORR(pc_masked, ARM64Reg::WZR, LogicalImm(JitBaseBlockCache::FAST_BLOCK_MAP_MASK << 3, 32)); - AND(pc_masked, pc_masked, DISPATCHER_PC, ArithOption(DISPATCHER_PC, ShiftType::LSL, 1)); - MOVP2R(cache_base, GetBlockCache()->GetFastBlockMap()); - LDR(block, cache_base, EncodeRegTo64(pc_masked)); - FixupBranch not_found = CBZ(block); + if (GetBlockCache()->GetFastBlockMap()) + { + // Check if there is a block + ARM64Reg pc_masked = ARM64Reg::X25; + ARM64Reg cache_base = ARM64Reg::X27; + ARM64Reg block = ARM64Reg::X30; + LSL(pc_masked, DISPATCHER_PC, 1); + MOVP2R(cache_base, GetBlockCache()->GetFastBlockMap()); + LDR(block, cache_base, pc_masked); + FixupBranch not_found = CBZ(block); - // b.effectiveAddress != addr || b.msrBits != msr - ARM64Reg pc_and_msr = ARM64Reg::W25; - ARM64Reg pc_and_msr2 = ARM64Reg::W24; - LDR(IndexType::Unsigned, pc_and_msr, block, offsetof(JitBlockData, effectiveAddress)); - CMP(pc_and_msr, DISPATCHER_PC); - FixupBranch pc_missmatch = B(CC_NEQ); + // b.msrBits != msr + ARM64Reg msr = ARM64Reg::W25; + ARM64Reg msr2 = ARM64Reg::W24; + LDR(IndexType::Unsigned, msr, PPC_REG, PPCSTATE_OFF(msr)); + AND(msr, msr, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32)); + LDR(IndexType::Unsigned, msr2, block, offsetof(JitBlockData, msrBits)); + CMP(msr, msr2); - LDR(IndexType::Unsigned, pc_and_msr2, PPC_REG, PPCSTATE_OFF(msr)); - AND(pc_and_msr2, pc_and_msr2, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32)); - LDR(IndexType::Unsigned, pc_and_msr, block, offsetof(JitBlockData, msrBits)); - CMP(pc_and_msr, pc_and_msr2); - FixupBranch msr_missmatch = B(CC_NEQ); + FixupBranch msr_missmatch = B(CC_NEQ); - // return blocks[block_num].normalEntry; - LDR(IndexType::Unsigned, block, block, offsetof(JitBlockData, normalEntry)); - BR(block); - SetJumpTarget(not_found); - SetJumpTarget(pc_missmatch); - SetJumpTarget(msr_missmatch); + // return blocks[block_num].normalEntry; + LDR(IndexType::Unsigned, block, block, offsetof(JitBlockData, normalEntry)); + BR(block); + SetJumpTarget(not_found); + SetJumpTarget(msr_missmatch); + } + else + { + // iCache[(address >> 2) & iCache_Mask]; + ARM64Reg pc_masked = ARM64Reg::W25; + ARM64Reg cache_base = ARM64Reg::X27; + ARM64Reg block = ARM64Reg::X30; + ORR(pc_masked, ARM64Reg::WZR, + LogicalImm(JitBaseBlockCache::FAST_BLOCK_MAP_FALLBACK_MASK << 3, 32)); + AND(pc_masked, pc_masked, DISPATCHER_PC, ArithOption(DISPATCHER_PC, ShiftType::LSL, 1)); + MOVP2R(cache_base, GetBlockCache()->GetFastBlockMap()); + LDR(block, cache_base, EncodeRegTo64(pc_masked)); + FixupBranch not_found = CBZ(block); + + // b.effectiveAddress != addr || b.msrBits != msr + ARM64Reg pc_and_msr = ARM64Reg::W25; + ARM64Reg pc_and_msr2 = ARM64Reg::W24; + LDR(IndexType::Unsigned, pc_and_msr, block, offsetof(JitBlockData, effectiveAddress)); + CMP(pc_and_msr, DISPATCHER_PC); + FixupBranch pc_missmatch = B(CC_NEQ); + + LDR(IndexType::Unsigned, pc_and_msr2, PPC_REG, PPCSTATE_OFF(msr)); + AND(pc_and_msr2, pc_and_msr2, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32)); + LDR(IndexType::Unsigned, pc_and_msr, block, offsetof(JitBlockData, msrBits)); + CMP(pc_and_msr, pc_and_msr2); + + FixupBranch msr_missmatch = B(CC_NEQ); + + // return blocks[block_num].normalEntry; + LDR(IndexType::Unsigned, block, block, offsetof(JitBlockData, normalEntry)); + BR(block); + SetJumpTarget(not_found); + SetJumpTarget(pc_missmatch); + SetJumpTarget(msr_missmatch); + } } // Call C version of Dispatch(). diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index b01647b70f..6b746d94d1 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -42,12 +42,21 @@ void JitBaseBlockCache::Init() { Common::JitRegister::Init(Config::Get(Config::MAIN_PERF_MAP_DIR)); + m_block_map_arena.GrabSHMSegment(FAST_BLOCK_MAP_SIZE, "dolphin-emu-jitblock"); + Clear(); } void JitBaseBlockCache::Shutdown() { Common::JitRegister::Shutdown(); + + if (m_fast_block_map) + { + m_block_map_arena.ReleaseView(m_fast_block_map, FAST_BLOCK_MAP_SIZE); + } + + m_block_map_arena.ReleaseSHMSegment(); } // This clears the JIT cache. It's called from JitCache.cpp when the JIT cache @@ -70,7 +79,24 @@ void JitBaseBlockCache::Clear() valid_block.ClearAll(); - fast_block_map.fill(nullptr); + if (m_fast_block_map) + { + m_block_map_arena.ReleaseView(m_fast_block_map, FAST_BLOCK_MAP_SIZE); + m_block_map_arena.ReleaseSHMSegment(); + m_block_map_arena.GrabSHMSegment(FAST_BLOCK_MAP_SIZE, "dolphin-emu-jitblock"); + } + + m_fast_block_map = + reinterpret_cast(m_block_map_arena.CreateView(0, FAST_BLOCK_MAP_SIZE)); + + if (m_fast_block_map) + { + m_fast_block_map_ptr = m_fast_block_map; + } + else + { + m_fast_block_map_ptr = m_fast_block_map_fallback.data(); + } } void JitBaseBlockCache::Reset() @@ -81,7 +107,12 @@ void JitBaseBlockCache::Reset() JitBlock** JitBaseBlockCache::GetFastBlockMap() { - return fast_block_map.data(); + return m_fast_block_map; +} + +JitBlock** JitBaseBlockCache::GetFastBlockMapFallback() +{ + return m_fast_block_map_fallback.data(); } void JitBaseBlockCache::RunOnBlocks(std::function f) @@ -106,7 +137,7 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const std::set& physical_addresses) { size_t index = FastLookupIndexForAddress(block.effectiveAddress); - fast_block_map[index] = █ + m_fast_block_map_ptr[index] = █ block.fast_block_map_index = index; block.physical_addresses = physical_addresses; @@ -169,7 +200,7 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr) const u8* JitBaseBlockCache::Dispatch() { const auto& ppc_state = m_jit.m_ppc_state; - JitBlock* block = fast_block_map[FastLookupIndexForAddress(ppc_state.pc)]; + JitBlock* block = m_fast_block_map_ptr[FastLookupIndexForAddress(ppc_state.pc)]; if (!block || block->effectiveAddress != ppc_state.pc || block->msrBits != (ppc_state.msr.Hex & JIT_CACHE_MSR_MASK)) @@ -390,8 +421,8 @@ void JitBaseBlockCache::UnlinkBlock(const JitBlock& block) void JitBaseBlockCache::DestroyBlock(JitBlock& block) { - if (fast_block_map[block.fast_block_map_index] == &block) - fast_block_map[block.fast_block_map_index] = nullptr; + if (m_fast_block_map_ptr[block.fast_block_map_index] == &block) + m_fast_block_map_ptr[block.fast_block_map_index] = nullptr; UnlinkBlock(block); @@ -418,12 +449,12 @@ JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr) return nullptr; // Drop old fast block map entry - if (fast_block_map[block->fast_block_map_index] == block) - fast_block_map[block->fast_block_map_index] = nullptr; + if (m_fast_block_map_ptr[block->fast_block_map_index] == block) + m_fast_block_map_ptr[block->fast_block_map_index] = nullptr; // And create a new one size_t index = FastLookupIndexForAddress(addr); - fast_block_map[index] = block; + m_fast_block_map_ptr[index] = block; block->fast_block_map_index = index; return block; @@ -431,5 +462,12 @@ JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr) size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address) { - return (address >> 2) & FAST_BLOCK_MAP_MASK; + if (m_fast_block_map) + { + return address >> 2; + } + else + { + return (address >> 2) & FAST_BLOCK_MAP_FALLBACK_MASK; + } } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index f2c0719109..61b5966a14 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -16,6 +16,7 @@ #include #include "Common/CommonTypes.h" +#include "Core/HW/Memmap.h" class JitBase; @@ -131,8 +132,11 @@ public: // is valid (MSR.IR and MSR.DR, the address translation bits). static constexpr u32 JIT_CACHE_MSR_MASK = 0x30; - static constexpr u32 FAST_BLOCK_MAP_ELEMENTS = 0x10000; - static constexpr u32 FAST_BLOCK_MAP_MASK = FAST_BLOCK_MAP_ELEMENTS - 1; + // The value for the map is determined like this: + // ((4 GB guest memory space) / (4 bytes per address)) * sizeof(JitBlock*) + static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x2'0000'0000; + static constexpr u32 FAST_BLOCK_MAP_FALLBACK_ELEMENTS = 0x10000; + static constexpr u32 FAST_BLOCK_MAP_FALLBACK_MASK = FAST_BLOCK_MAP_FALLBACK_ELEMENTS - 1; explicit JitBaseBlockCache(JitBase& jit); virtual ~JitBaseBlockCache(); @@ -144,6 +148,7 @@ public: // Code Cache JitBlock** GetFastBlockMap(); + JitBlock** GetFastBlockMapFallback(); void RunOnBlocks(std::function f); JitBlock* AllocateBlock(u32 em_address); @@ -203,7 +208,16 @@ private: // It is used to provide a fast way to query if no icache invalidation is needed. ValidBlockBitSet valid_block; - // This array is indexed with the masked PC and likely holds the correct block id. + // This array is indexed with the shifted PC and likely holds the correct block id. // This is used as a fast cache of block_map used in the assembly dispatcher. - std::array fast_block_map{}; // start_addr & mask -> number + // It is implemented via a shm segment using m_block_map_arena. + JitBlock** m_fast_block_map = 0; + Common::MemArena m_block_map_arena; + + // An alternative for the above fast_block_map but without a shm segment + // in case the shm memory region couldn't be allocated. + std::array + m_fast_block_map_fallback{}; // start_addr & mask -> number + + JitBlock** m_fast_block_map_ptr = 0; };