diff --git a/Source/Core/Core/PowerPC/CachedInterpreter.cpp b/Source/Core/Core/PowerPC/CachedInterpreter.cpp index 8bdc6bd23e..41ba13998c 100644 --- a/Source/Core/Core/PowerPC/CachedInterpreter.cpp +++ b/Source/Core/Core/PowerPC/CachedInterpreter.cpp @@ -41,34 +41,29 @@ void CachedInterpreter::Run() void CachedInterpreter::SingleStep() { - int block = GetBlockNumberFromStartAddress(PC); - if (block >= 0) + const u8* normalEntry = jit->GetBlockCache()->Dispatch(); + const Instruction* code = reinterpret_cast(normalEntry); + + while (true) { - Instruction* code = (Instruction*)GetCompiledCodeFromBlock(block); - - while (true) + switch (code->type) { - switch (code->type) - { - case Instruction::INSTRUCTION_ABORT: + case Instruction::INSTRUCTION_ABORT: + return; + + case Instruction::INSTRUCTION_TYPE_COMMON: + code->common_callback(UGeckoInstruction(code->data)); + code++; + break; + + case Instruction::INSTRUCTION_TYPE_CONDITIONAL: + bool ret = code->conditional_callback(code->data); + code++; + if (ret) return; - - case Instruction::INSTRUCTION_TYPE_COMMON: - code->common_callback(UGeckoInstruction(code->data)); - code++; - break; - - case Instruction::INSTRUCTION_TYPE_CONDITIONAL: - bool ret = code->conditional_callback(code->data); - code++; - if (ret) - return; - break; - } + break; } } - - Jit(PC); } static void EndBlock(UGeckoInstruction data) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index f6d3b3849d..a1ef3c409c 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -396,29 +396,12 @@ void Jit64::JustWriteExit(u32 destination, bool bl, u32 after) linkData.exitAddress = destination; linkData.linkStatus = false; - // Link opportunity! - int block; - if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0) - { - // It exists! Joy of joy! - JitBlock* jb = blocks.GetBlock(block); - const u8* addr = jb->checkedEntry; - linkData.exitPtrs = GetWritableCodePtr(); - if (bl) - CALL(addr); - else - JMP(addr, true); - linkData.linkStatus = true; - } + MOV(32, PPCSTATE(pc), Imm32(destination)); + linkData.exitPtrs = GetWritableCodePtr(); + if (bl) + CALL(asm_routines.dispatcher); else - { - MOV(32, PPCSTATE(pc), Imm32(destination)); - linkData.exitPtrs = GetWritableCodePtr(); - if (bl) - CALL(asm_routines.dispatcher); - else - JMP(asm_routines.dispatcher, true); - } + JMP(asm_routines.dispatcher, true); b->linkData.push_back(linkData); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index fbc3eb584e..7c030d8e77 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -58,13 +58,13 @@ void Jit64AsmRoutineManager::Generate() AND(32, PPCSTATE(pc), Imm32(0xFFFFFFFC)); #if 0 // debug mispredicts - MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc - ABI_PushRegistersAndAdjustStack(1 << RSCRATCH2, 0); - CALL(reinterpret_cast(&ReportMispredict)); - ABI_PopRegistersAndAdjustStack(1 << RSCRATCH2, 0); + MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc + ABI_PushRegistersAndAdjustStack(1 << RSCRATCH2, 0); + CALL(reinterpret_cast(&ReportMispredict)); + ABI_PopRegistersAndAdjustStack(1 << RSCRATCH2, 0); #endif - ResetStack(); + ResetStack(*this); SUB(32, PPCSTATE(downcount), R(RSCRATCH2)); @@ -102,31 +102,15 @@ void Jit64AsmRoutineManager::Generate() MOV(64, R(RMEM), Imm64((u64)Memory::logical_base)); SetJumpTarget(membaseend); + // The following is an translation of JitBaseBlockCache::Dispatch into assembly. + + // Fast block number lookup. MOV(32, R(RSCRATCH), PPCSTATE(pc)); - - // TODO: We need to handle code which executes the same PC with - // different values of MSR.IR. It probably makes sense to handle - // MSR.DR here too, to allow IsOptimizableRAMAddress-based - // optimizations safe, because IR and DR are usually set/cleared together. - // TODO: Branching based on the 20 most significant bits of instruction - // addresses without translating them is wrong. - u64 icache = (u64)jit->GetBlockCache()->iCache.data(); - u64 icacheVmem = (u64)jit->GetBlockCache()->iCacheVMEM.data(); - u64 icacheEx = (u64)jit->GetBlockCache()->iCacheEx.data(); - u32 mask = 0; - FixupBranch no_mem; - FixupBranch exit_mem; - FixupBranch exit_vmem; - if (SConfig::GetInstance().bWii) - mask = JIT_ICACHE_EXRAM_BIT; - mask |= JIT_ICACHE_VMEM_BIT; - TEST(32, R(RSCRATCH), Imm32(mask)); - no_mem = J_CC(CC_NZ); - AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK)); - + u64 icache = reinterpret_cast(jit->GetBlockCache()->GetICache()); + AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::iCache_Mask << 2)); if (icache <= INT_MAX) { - MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icache)); + MOV(32, R(RSCRATCH), MDisp(RSCRATCH, static_cast(icache))); } else { @@ -134,73 +118,46 @@ void Jit64AsmRoutineManager::Generate() MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH)); } - exit_mem = J(); - SetJumpTarget(no_mem); - TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT)); - FixupBranch no_vmem = J_CC(CC_Z); - AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK)); - if (icacheVmem <= INT_MAX) + // Check whether the block we found matches the current state. + u64 blocks = reinterpret_cast(jit->GetBlockCache()->GetBlocks()); + IMUL(32, RSCRATCH, R(RSCRATCH), Imm32(sizeof(JitBlock))); + if (blocks <= INT_MAX) { - MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheVmem)); + ADD(64, R(RSCRATCH), Imm32(static_cast(blocks))); } else { - MOV(64, R(RSCRATCH2), Imm64(icacheVmem)); - MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH)); - } - - if (SConfig::GetInstance().bWii) - exit_vmem = J(); - SetJumpTarget(no_vmem); - if (SConfig::GetInstance().bWii) - { - TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT)); - FixupBranch no_exram = J_CC(CC_Z); - AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK)); - - if (icacheEx <= INT_MAX) - { - MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheEx)); - } - else - { - MOV(64, R(RSCRATCH2), Imm64(icacheEx)); - MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH)); - } - - SetJumpTarget(no_exram); - } - SetJumpTarget(exit_mem); - if (SConfig::GetInstance().bWii) - SetJumpTarget(exit_vmem); - - TEST(32, R(RSCRATCH), R(RSCRATCH)); - FixupBranch notfound = J_CC(CC_L); - // grab from list and jump to it - u64 codePointers = (u64)jit->GetBlockCache()->GetCodePointers(); - if (codePointers <= INT_MAX) - { - JMPptr(MScaled(RSCRATCH, SCALE_8, (s32)codePointers)); - } - else - { - MOV(64, R(RSCRATCH2), Imm64(codePointers)); - JMPptr(MComplex(RSCRATCH2, RSCRATCH, SCALE_8, 0)); + MOV(64, R(RSCRATCH2), Imm64(blocks)); + ADD(64, R(RSCRATCH), R(RSCRATCH2)); } + // Check both block.effectiveAddress and block.msrBits. + MOV(32, R(RSCRATCH2), PPCSTATE(msr)); + AND(32, R(RSCRATCH2), Imm32(JitBlock::JIT_CACHE_MSR_MASK)); + SHL(64, R(RSCRATCH2), Imm8(32)); + MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc)); + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); + CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast(offsetof(JitBlock, effectiveAddress)))); + FixupBranch notfound = J_CC(CC_NE); + // Success; branch to the block we found. + JMPptr(MDisp(RSCRATCH, static_cast(offsetof(JitBlock, normalEntry)))); SetJumpTarget(notfound); + // Failure; call into the block cache to update the state, then try again. + // (We need to loop because Jit() might not actually generate a block + // if we hit an ISI.) + // We reset the stack because Jit might clear the code cache. // Also if we are in the middle of disabling BLR optimization on windows // we need to reset the stack before _resetstkoflw() is called in Jit // otherwise we will generate a second stack overflow exception during DoJit() - ResetStack(); + ResetStack(*this); - // Ok, no block, let's jit + // Ok, no block, let's call the slow dispatcher ABI_PushRegistersAndAdjustStack({}, 0); - ABI_CallFunctionA(32, (void*)&Jit, PPCSTATE(pc)); + ABI_CallFunction(reinterpret_cast(&JitBase::Dispatch)); ABI_PopRegistersAndAdjustStack({}, 0); - - JMP(dispatcherNoCheck, true); // no point in special casing this + // JMPptr(R(ABI_RETURN)); + JMP(dispatcherNoCheck, true); SetJumpTarget(bail); doTiming = GetCodePtr(); @@ -217,7 +174,7 @@ void Jit64AsmRoutineManager::Generate() // Landing pad for drec space if (SConfig::GetInstance().bEnableDebugging) SetJumpTarget(dbg_exit); - ResetStack(); + ResetStack(*this); if (m_stack_top) { ADD(64, R(RSP), Imm8(0x18)); @@ -232,12 +189,12 @@ void Jit64AsmRoutineManager::Generate() GenerateCommon(); } -void Jit64AsmRoutineManager::ResetStack() +void Jit64AsmRoutineManager::ResetStack(X64CodeBlock& emitter) { if (m_stack_top) - MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20)); + emitter.MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20)); else - MOV(64, R(RSP), M(&s_saved_rsp)); + emitter.MOV(64, R(RSP), M(&s_saved_rsp)); } void Jit64AsmRoutineManager::GenerateCommon() diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/PowerPC/Jit64/JitAsm.h index 006d38741d..3166f7931d 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.h @@ -25,7 +25,6 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines { private: void Generate(); - void ResetStack(); void GenerateCommon(); u8* m_stack_top; @@ -41,4 +40,5 @@ public: } void Shutdown() { FreeCodeSpace(); } + void ResetStack(X64CodeBlock& emitter); }; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 32a19b7a2b..a67dad0274 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -310,6 +310,7 @@ void Jit64::dcbx(UGeckoInstruction inst) XOR(32, R(ABI_PARAM3), R(ABI_PARAM3)); ABI_CallFunction((void*)JitInterface::InvalidateICache); ABI_PopRegistersAndAdjustStack(registersInUse, 0); + asm_routines.ResetStack(*this); c = J(true); SwitchToNearCode(); SetJumpTarget(c); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index eeeb91024f..78f8e190e2 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -391,6 +391,10 @@ void Jit64::mtmsr(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); + // Our jit cache also stores some MSR bits, as they have changed, we either + // have to validate them in the BLR/RET check, or just flush the stack here. + asm_routines.ResetStack(*this); + // If some exceptions are pending and EE are now enabled, force checking // external exceptions when going out of mtmsr in order to execute delayed // interrupts as soon as possible. diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index 9d526cf3bc..03d8253493 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -373,19 +373,9 @@ void JitIL::WriteExit(u32 destination) linkData.exitPtrs = GetWritableCodePtr(); linkData.linkStatus = false; - // Link opportunity! - int block; - if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0) - { - // It exists! Joy of joy! - JMP(blocks.GetBlock(block)->checkedEntry, true); - linkData.linkStatus = true; - } - else - { - MOV(32, PPCSTATE(pc), Imm32(destination)); - JMP(asm_routines.dispatcher, true); - } + MOV(32, PPCSTATE(pc), Imm32(destination)); + JMP(asm_routines.dispatcher, true); + b->linkData.push_back(linkData); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp index fdacec2c4a..7a52d869f3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp @@ -17,7 +17,7 @@ void JitArm64BlockCache::WriteLinkBlock(u8* location, const JitBlock& block) { emit.B(CC_LE, block.normalEntry); - // We can't write DISPATCHER_PC here, as blink linking is only for 8bytes. + // We can't write DISPATCHER_PC here, as block linking may only use 8 bytes. // So we'll hit two jumps when calling Advance. emit.B(block.checkedEntry); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 670cfdc98c..1876ff454e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -791,11 +791,6 @@ void JitArm64::dcbz(UGeckoInstruction inst) int a = inst.RA, b = inst.RB; - u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; - - // The following masks the region used by the GC/Wii virtual memory lib - mem_mask |= Memory::ADDR_MASK_MEM1; - gpr.Lock(W0); ARM64Reg addr_reg = W0; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 2626ee42f4..b42cfba2ba 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -48,54 +48,54 @@ void JitArm64::GenerateAsm() dispatcherNoCheck = GetCodePtr(); - FixupBranch exram, vmem, not_exram, not_vmem; - ARM64Reg pc_masked = W25; - ARM64Reg cache_base = X27; + bool assembly_dispatcher = true; - // VMEM - not_vmem = TBZ(DISPATCHER_PC, IntLog2(JIT_ICACHE_VMEM_BIT)); - ANDI2R(pc_masked, DISPATCHER_PC, JIT_ICACHE_MASK); - MOVI2R(cache_base, (u64)jit->GetBlockCache()->iCacheVMEM.data()); - vmem = B(); - SetJumpTarget(not_vmem); - - if (SConfig::GetInstance().bWii) + if (assembly_dispatcher) { - // Wii EX-RAM - not_exram = TBZ(DISPATCHER_PC, IntLog2(JIT_ICACHE_EXRAM_BIT)); - ANDI2R(pc_masked, DISPATCHER_PC, JIT_ICACHEEX_MASK); - MOVI2R(cache_base, (u64)jit->GetBlockCache()->iCacheEx.data()); - exram = B(); - SetJumpTarget(not_exram); + // iCache[(address >> 2) & iCache_Mask]; + ARM64Reg pc_masked = W25; + ARM64Reg cache_base = X27; + ARM64Reg block_num = W27; + ANDI2R(pc_masked, DISPATCHER_PC, JitBaseBlockCache::iCache_Mask << 2); + MOVP2R(cache_base, jit->GetBlockCache()->GetICache()); + LDR(block_num, cache_base, EncodeRegTo64(pc_masked)); + + // blocks[block_num] + ARM64Reg block = X30; + ARM64Reg jit_block_size = W24; + MOVI2R(jit_block_size, sizeof(JitBlock)); + MUL(block_num, block_num, jit_block_size); + MOVP2R(block, jit->GetBlockCache()->GetBlocks()); + ADD(block, block, EncodeRegTo64(block_num)); + + // b.effectiveAddress != addr || b.msrBits != msr + ARM64Reg pc_and_msr = W25; + ARM64Reg pc_and_msr2 = W24; + LDR(INDEX_UNSIGNED, pc_and_msr, block, offsetof(JitBlock, effectiveAddress)); + CMP(pc_and_msr, DISPATCHER_PC); + FixupBranch pc_missmatch = B(CC_NEQ); + + LDR(INDEX_UNSIGNED, pc_and_msr2, PPC_REG, PPCSTATE_OFF(msr)); + ANDI2R(pc_and_msr2, pc_and_msr2, JitBlock::JIT_CACHE_MSR_MASK); + LDR(INDEX_UNSIGNED, pc_and_msr, block, offsetof(JitBlock, msrBits)); + CMP(pc_and_msr, pc_and_msr2); + FixupBranch msr_missmatch = B(CC_NEQ); + + // return blocks[block_num].normalEntry; + LDR(INDEX_UNSIGNED, block, block, offsetof(JitBlock, normalEntry)); + BR(block); + SetJumpTarget(pc_missmatch); + SetJumpTarget(msr_missmatch); } - // Common memory - ANDI2R(pc_masked, DISPATCHER_PC, JIT_ICACHE_MASK); - MOVI2R(cache_base, (u64)jit->GetBlockCache()->iCache.data()); - - SetJumpTarget(vmem); - if (SConfig::GetInstance().bWii) - SetJumpTarget(exram); - - LDR(W27, cache_base, EncodeRegTo64(pc_masked)); - - FixupBranch JitBlock = TBNZ(W27, 7); // Test the 7th bit - // Success, it is our Jitblock. - MOVI2R(X30, (u64)jit->GetBlockCache()->GetCodePointers()); - UBFM(X27, X27, 61, 60); // Same as X27 << 3 - LDR(X30, X30, X27); // Load the block address in to R14 - BR(X30); - // No need to jump anywhere after here, the block will go back to dispatcher start - - SetJumpTarget(JitBlock); - + // Call C version of Dispatch(). + // FIXME: Implement this in inline assembly. STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); - MOVI2R(X30, (u64) & ::Jit); + MOVP2R(X30, reinterpret_cast(&JitBase::Dispatch)); BLR(X30); - LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); - - B(dispatcherNoCheck); + // Jump to next block. + BR(X0); SetJumpTarget(bail); doTiming = GetCodePtr(); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 2367f01cc7..7b96a6b739 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -55,6 +55,10 @@ #define JITDISABLE(setting) \ FALLBACK_IF(SConfig::GetInstance().bJITOff || SConfig::GetInstance().setting) +class JitBase; + +extern JitBase* jit; + class JitBase : public CPUCoreBase { protected: @@ -125,6 +129,7 @@ public: JitOptions jo; JitState js; + static const u8* Dispatch() { return jit->GetBlockCache()->Dispatch(); }; virtual JitBaseBlockCache* GetBlockCache() = 0; virtual void Jit(u32 em_address) = 0; @@ -147,8 +152,6 @@ public: bool HandleFault(uintptr_t access_address, SContext* ctx) override; }; -extern JitBase* jit; - void Jit(u32 em_address); // Merged routines that should be moved somewhere better diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 91c9e10504..659ec20d31 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -34,26 +34,15 @@ bool JitBaseBlockCache::IsFull() const void JitBaseBlockCache::Init() { - if (m_initialized) - { - PanicAlert("JitBaseBlockCache::Init() - iCache is already initialized"); - return; - } - JitRegister::Init(SConfig::GetInstance().m_perfDir); - iCache.fill(JIT_ICACHE_INVALID_BYTE); - iCacheEx.fill(JIT_ICACHE_INVALID_BYTE); - iCacheVMEM.fill(JIT_ICACHE_INVALID_BYTE); + iCache.fill(0); Clear(); - - m_initialized = true; } void JitBaseBlockCache::Shutdown() { num_blocks = 0; - m_initialized = false; JitRegister::Shutdown(); } @@ -80,7 +69,8 @@ void JitBaseBlockCache::Clear() valid_block.ClearAll(); num_blocks = 0; - blockCodePointers.fill(nullptr); + blocks[0].msrBits = 0xFFFFFFFF; + blocks[0].invalid = true; } void JitBaseBlockCache::Reset() @@ -103,7 +93,9 @@ int JitBaseBlockCache::AllocateBlock(u32 em_address) { JitBlock& b = blocks[num_blocks]; b.invalid = false; - b.originalAddress = em_address; + b.effectiveAddress = em_address; + b.physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address; + b.msrBits = MSR & JitBlock::JIT_CACHE_MSR_MASK; b.linkData.clear(); num_blocks++; // commit the current block return num_blocks - 1; @@ -111,13 +103,23 @@ int JitBaseBlockCache::AllocateBlock(u32 em_address) void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8* code_ptr) { - blockCodePointers[block_num] = code_ptr; JitBlock& b = blocks[block_num]; + if (start_block_map.count(b.physicalAddress)) + { + // We already have a block at this address; invalidate the old block. + // This should be very rare. This will only happen if the same block + // is called both with DR/IR enabled or disabled. + WARN_LOG(DYNA_REC, "Invalidating compiled block at same address %08x", b.physicalAddress); + int old_block_num = start_block_map[b.physicalAddress]; + const JitBlock& old_b = blocks[old_block_num]; + block_map.erase( + std::make_pair(old_b.physicalAddress + 4 * old_b.originalSize - 1, old_b.physicalAddress)); + DestroyBlock(old_block_num, true); + } + start_block_map[b.physicalAddress] = block_num; + FastLookupEntryForAddress(b.effectiveAddress) = block_num; - std::memcpy(GetICachePtr(b.originalAddress), &block_num, sizeof(u32)); - - // Convert the logical address to a physical address for the block map - u32 pAddr = b.originalAddress & 0x1FFFFFFF; + u32 pAddr = b.physicalAddress; for (u32 block = pAddr / 32; block <= (pAddr + (b.originalSize - 1) * 4) / 32; ++block) valid_block.Set(block); @@ -135,46 +137,62 @@ void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8* LinkBlockExits(block_num); } - JitRegister::Register(blockCodePointers[block_num], b.codeSize, "JIT_PPC_%08x", - b.originalAddress); + JitRegister::Register(b.checkedEntry, b.codeSize, "JIT_PPC_%08x", b.physicalAddress); } -const u8** JitBaseBlockCache::GetCodePointers() +int JitBaseBlockCache::GetBlockNumberFromStartAddress(u32 addr, u32 msr) { - return blockCodePointers.data(); -} + u32 translated_addr = addr; + if (UReg_MSR(msr).IR) + { + auto translated = PowerPC::JitCache_TranslateAddress(addr); + if (!translated.valid) + { + return -1; + } + translated_addr = translated.address; + } -u8* JitBaseBlockCache::GetICachePtr(u32 addr) -{ - if (addr & JIT_ICACHE_VMEM_BIT) - return &jit->GetBlockCache()->iCacheVMEM[addr & JIT_ICACHE_MASK]; - - if (addr & JIT_ICACHE_EXRAM_BIT) - return &jit->GetBlockCache()->iCacheEx[addr & JIT_ICACHEEX_MASK]; - - return &jit->GetBlockCache()->iCache[addr & JIT_ICACHE_MASK]; -} - -int JitBaseBlockCache::GetBlockNumberFromStartAddress(u32 addr) -{ - u32 inst; - std::memcpy(&inst, GetICachePtr(addr), sizeof(u32)); - - if (inst & 0xfc000000) // definitely not a JIT block + auto map_result = start_block_map.find(translated_addr); + if (map_result == start_block_map.end()) return -1; - - if ((int)inst >= num_blocks) + int block_num = map_result->second; + const JitBlock& b = blocks[block_num]; + if (b.invalid) return -1; - - if (blocks[inst].originalAddress != addr) + if (b.effectiveAddress != addr) return -1; - - return inst; + if (b.msrBits != (msr & JitBlock::JIT_CACHE_MSR_MASK)) + return -1; + return block_num; } -CompiledCode JitBaseBlockCache::GetCompiledCodeFromBlock(int block_num) +void JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr) { - return (CompiledCode)blockCodePointers[block_num]; + int block_num = GetBlockNumberFromStartAddress(addr, msr); + if (block_num < 0) + { + Jit(addr); + } + else + { + FastLookupEntryForAddress(addr) = block_num; + LinkBlock(block_num); + } +} + +const u8* JitBaseBlockCache::Dispatch() +{ + int block_num = FastLookupEntryForAddress(PC); + + while (blocks[block_num].effectiveAddress != PC || + blocks[block_num].msrBits != (MSR & JitBlock::JIT_CACHE_MSR_MASK)) + { + MoveBlockIntoFastCache(PC, MSR & JitBlock::JIT_CACHE_MSR_MASK); + block_num = FastLookupEntryForAddress(PC); + } + + return blocks[block_num].normalEntry; } // Block linker @@ -195,7 +213,7 @@ void JitBaseBlockCache::LinkBlockExits(int i) { if (!e.linkStatus) { - int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress); + int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress, b.msrBits); if (destinationBlock != -1) { WriteLinkBlock(e.exitPtrs, blocks[destinationBlock]); @@ -208,39 +226,35 @@ void JitBaseBlockCache::LinkBlockExits(int i) void JitBaseBlockCache::LinkBlock(int i) { LinkBlockExits(i); - JitBlock& b = blocks[i]; - // equal_range(b) returns pair representing the range - // of element with key b - auto ppp = links_to.equal_range(b.originalAddress); - - if (ppp.first == ppp.second) - return; + const JitBlock& b = blocks[i]; + auto ppp = links_to.equal_range(b.effectiveAddress); for (auto iter = ppp.first; iter != ppp.second; ++iter) { - // PanicAlert("Linking block %i to block %i", iter->second, i); - LinkBlockExits(iter->second); + const JitBlock& b2 = blocks[iter->second]; + if (b.msrBits == b2.msrBits) + LinkBlockExits(iter->second); } } void JitBaseBlockCache::UnlinkBlock(int i) { JitBlock& b = blocks[i]; - auto ppp = links_to.equal_range(b.originalAddress); - - if (ppp.first == ppp.second) - return; + auto ppp = links_to.equal_range(b.effectiveAddress); for (auto iter = ppp.first; iter != ppp.second; ++iter) { JitBlock& sourceBlock = blocks[iter->second]; + if (sourceBlock.msrBits != b.msrBits) + continue; + for (auto& e : sourceBlock.linkData) { - if (e.exitAddress == b.originalAddress) + if (e.exitAddress == b.effectiveAddress) e.linkStatus = false; } } - links_to.erase(b.originalAddress); + links_to.erase(b.effectiveAddress); } void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate) @@ -258,20 +272,23 @@ void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate) return; } b.invalid = true; - std::memcpy(GetICachePtr(b.originalAddress), &JIT_ICACHE_INVALID_WORD, sizeof(u32)); + start_block_map.erase(b.physicalAddress); + FastLookupEntryForAddress(b.effectiveAddress) = 0; UnlinkBlock(block_num); // Send anyone who tries to run this block back to the dispatcher. // Not entirely ideal, but .. pretty good. // Spurious entrances from previously linked blocks can only come through checkedEntry - WriteDestroyBlock(b.checkedEntry, b.originalAddress); + WriteDestroyBlock(b.checkedEntry, b.effectiveAddress); } void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool forced) { - // Convert the logical address to a physical address for the block map - u32 pAddr = address & 0x1FFFFFFF; + auto translated = PowerPC::JitCache_TranslateAddress(address); + if (!translated.valid) + return; + u32 pAddr = translated.address; // Optimize the common case of length == 32 which is used by Interpreter::dcb* bool destroy_block = true; @@ -288,20 +305,11 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for // address if (destroy_block) { - std::map, u32>::iterator it1 = block_map.lower_bound( - std::make_pair(pAddr, 0)), - it2 = it1; - while (it2 != block_map.end() && it2->first.second < pAddr + length) + auto it = block_map.lower_bound(std::make_pair(pAddr, 0)); + while (it != block_map.end() && it->first.second < pAddr + length) { - JitBlock& b = blocks[it2->second]; - std::memcpy(GetICachePtr(b.originalAddress), &JIT_ICACHE_INVALID_WORD, sizeof(u32)); - - DestroyBlock(it2->second, true); - ++it2; - } - if (it1 != it2) - { - block_map.erase(it1, it2); + DestroyBlock(it->second, true); + it = block_map.erase(it); } // If the code was actually modified, we need to clear the relevant entries from the diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 131e0e2d3f..91c4f8b78c 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -12,32 +12,55 @@ #include "Common/CommonTypes.h" -static const u32 JIT_ICACHE_SIZE = 0x2000000; -static const u32 JIT_ICACHE_MASK = 0x1ffffff; -static const u32 JIT_ICACHEEX_SIZE = 0x4000000; -static const u32 JIT_ICACHEEX_MASK = 0x3ffffff; -static const u32 JIT_ICACHE_EXRAM_BIT = 0x10000000; -static const u32 JIT_ICACHE_VMEM_BIT = 0x20000000; - -// This corresponds to opcode 5 which is invalid in PowerPC -static const u32 JIT_ICACHE_INVALID_BYTE = 0x80; -static const u32 JIT_ICACHE_INVALID_WORD = 0x80808080; - +// A JitBlock is block of compiled code which corresponds to the PowerPC +// code at a given address. +// +// The notion of the address of a block is a bit complicated because of the +// way address translation works, but basically it's the combination of an +// effective address, the address translation bits in MSR, and the physical +// address. struct JitBlock { + enum + { + // Mask for the MSR bits which determine whether a compiled block + // is valid (MSR.IR and MSR.DR, the address translation bits). + JIT_CACHE_MSR_MASK = 0x30, + }; + + // A special entry point for block linking; usually used to check the + // downcount. const u8* checkedEntry; + // The normal entry point for the block, returned by Dispatch(). const u8* normalEntry; - u32 originalAddress; + // The effective address (PC) for the beginning of the block. + u32 effectiveAddress; + // The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK. + u32 msrBits; + // The physical address of the code represented by this block. + // Various maps in the cache are indexed by this (start_block_map, + // block_map, and valid_block in particular). This is useful because of + // of the way the instruction cache works on PowerPC. + u32 physicalAddress; + // The number of bytes of JIT'ed code contained in this block. Mostly + // useful for logging. u32 codeSize; + // The number of PPC instructions represented by this block. Mostly + // useful for logging. u32 originalSize; int runCount; // for profiling. + // Whether this struct refers to a valid block. This is mostly useful as + // a debugging aid. + // FIXME: Change current users of invalid bit to assertions? bool invalid; + // Information about exits to a known address from this block. + // This is used to implement block linking. struct LinkData { - u8* exitPtrs; // to be able to rewrite the exit jum + u8* exitPtrs; // to be able to rewrite the exit jump u32 exitAddress; bool linkStatus; // is it already linked? }; @@ -59,7 +82,12 @@ class ValidBlockBitSet final public: enum { - VALID_BLOCK_MASK_SIZE = 0x20000000 / 32, + // ValidBlockBitSet covers the whole 32-bit address-space in 32-byte + // chunks. + // FIXME: Maybe we can get away with less? There isn't any actual + // RAM in most of this space. + VALID_BLOCK_MASK_SIZE = (1ULL << 32) / 32, + // The number of elements in the allocated array. Each u32 contains 32 bits. VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32 }; // Directly accessed by Jit64. @@ -79,33 +107,53 @@ public: class JitBaseBlockCache { - enum - { - MAX_NUM_BLOCKS = 65536 * 2, - }; +public: + static constexpr int MAX_NUM_BLOCKS = 65536 * 2; + static constexpr u32 iCache_Num_Elements = 0x10000; + static constexpr u32 iCache_Mask = iCache_Num_Elements - 1; - std::array blockCodePointers; - std::array blocks; +private: + // We store the metadata of all blocks in a linear way within this array. + std::array blocks; // number -> JitBlock int num_blocks; - std::multimap links_to; + + // links_to hold all exit points of all valid blocks in a reverse way. + // It is used to query all blocks which links to an address. + std::multimap links_to; // destination_PC -> number + + // Map indexed by the physical memory location. + // It is used to invalidate blocks based on memory location. std::map, u32> block_map; // (end_addr, start_addr) -> number + + // Map indexed by the physical address of the entry point. + // This is used to query the block based on the current PC in a slow way. + // TODO: This is redundant with block_map, and both should be a multimap. + std::map start_block_map; // start_addr -> number + + // This bitsets shows which cachelines overlap with any blocks. + // It is used to provide a fast way to query if no icache invalidation is needed. ValidBlockBitSet valid_block; - bool m_initialized; + // This array is indexed with the masked PC and likely holds the correct block id. + // This is used as a fast cache of start_block_map used in the assembly dispatcher. + std::array iCache; // start_addr & mask -> number void LinkBlockExits(int i); void LinkBlock(int i); void UnlinkBlock(int i); - u8* GetICachePtr(u32 addr); void DestroyBlock(int block_num, bool invalidate); + void MoveBlockIntoFastCache(u32 em_address, u32 msr); + + // Fast but risky block lookup based on iCache. + int& FastLookupEntryForAddress(u32 address) { return iCache[(address >> 2) & iCache_Mask]; } // Virtual for overloaded virtual void WriteLinkBlock(u8* location, const JitBlock& block) = 0; virtual void WriteDestroyBlock(const u8* location, u32 address) = 0; public: - JitBaseBlockCache() : num_blocks(0), m_initialized(false) {} + JitBaseBlockCache() : num_blocks(0) {} virtual ~JitBaseBlockCache() {} int AllocateBlock(u32 em_address); void FinalizeBlock(int block_num, bool block_link, const u8* code_ptr); @@ -119,18 +167,20 @@ public: // Code Cache JitBlock* GetBlock(int block_num); + JitBlock* GetBlocks() { return blocks.data(); } + int* GetICache() { return iCache.data(); } int GetNumBlocks() const; - const u8** GetCodePointers(); - std::array iCache; - std::array iCacheEx; - std::array iCacheVMEM; - // Fast way to get a block. Only works on the first ppc instruction of a block. - int GetBlockNumberFromStartAddress(u32 em_address); + // Look for the block in the slow but accurate way. + // This function shall be used if FastLookupEntryForAddress() failed. + int GetBlockNumberFromStartAddress(u32 em_address, u32 msr); - CompiledCode GetCompiledCodeFromBlock(int block_num); + // Get the normal entry for the block associated with the current program + // counter. This will JIT code if necessary. (This is the reference + // implementation; high-performance JITs will want to use a custom + // assembly version.) + const u8* Dispatch(); - // DOES NOT WORK CORRECTLY WITH INLINING void InvalidateICache(u32 address, const u32 length, bool forced); u32* GetBlockBitSet() const { return valid_block.m_valid_block.get(); } diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index 73d64013c5..8ace00aa0e 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -150,7 +150,7 @@ void GetProfileResults(ProfileStats* prof_stats) u64 timecost = block->ticCounter; // Todo: tweak. if (block->runCount >= 1) - prof_stats->block_stats.emplace_back(i, block->originalAddress, cost, timecost, + prof_stats->block_stats.emplace_back(i, block->effectiveAddress, cost, timecost, block->runCount, block->codeSize); prof_stats->cost_sum += cost; prof_stats->timecost_sum += timecost; @@ -169,12 +169,12 @@ int GetHostCode(u32* address, const u8** code, u32* code_size) return 1; } - int block_num = jit->GetBlockCache()->GetBlockNumberFromStartAddress(*address); + int block_num = jit->GetBlockCache()->GetBlockNumberFromStartAddress(*address, MSR); if (block_num < 0) { for (int i = 0; i < 500; i++) { - block_num = jit->GetBlockCache()->GetBlockNumberFromStartAddress(*address - 4 * i); + block_num = jit->GetBlockCache()->GetBlockNumberFromStartAddress(*address - 4 * i, MSR); if (block_num >= 0) break; } @@ -182,8 +182,8 @@ int GetHostCode(u32* address, const u8** code, u32* code_size) if (block_num >= 0) { JitBlock* block = jit->GetBlockCache()->GetBlock(block_num); - if (!(block->originalAddress <= *address && - block->originalSize + block->originalAddress >= *address)) + if (!(block->effectiveAddress <= *address && + block->originalSize + block->effectiveAddress >= *address)) block_num = -1; } @@ -199,7 +199,7 @@ int GetHostCode(u32* address, const u8** code, u32* code_size) *code = block->checkedEntry; *code_size = block->codeSize; - *address = block->originalAddress; + *address = block->effectiveAddress; return 0; } diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 6cb0b43dc6..c6a65d64ba 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -76,7 +76,19 @@ enum XCheckTLBFlag FLAG_READ, FLAG_WRITE, FLAG_OPCODE, + FLAG_OPCODE_NO_EXCEPTION }; + +static bool IsOpcodeFlag(XCheckTLBFlag flag) +{ + return flag == FLAG_OPCODE || flag == FLAG_OPCODE_NO_EXCEPTION; +} + +static bool IsNoExceptionFlag(XCheckTLBFlag flag) +{ + return flag == FLAG_NO_EXCEPTION || flag == FLAG_OPCODE_NO_EXCEPTION; +} + template static u32 TranslateAddress(const u32 address); @@ -836,6 +848,43 @@ bool IsOptimizableGatherPipeWrite(u32 address) return address == 0xCC008000; } +TranslateResult JitCache_TranslateAddress(u32 address) +{ + if (!UReg_MSR(MSR).IR) + return TranslateResult{true, true, address}; + + bool from_bat = true; + + int segment = address >> 28; + + if (SConfig::GetInstance().bMMU && (address & Memory::ADDR_MASK_MEM1)) + { + u32 tlb_addr = TranslateAddress(address); + if (tlb_addr == 0) + { + return TranslateResult{false, false, 0}; + } + else + { + address = tlb_addr; + from_bat = false; + } + } + else + { + if ((segment == 0x8 || segment == 0x0) && (address & 0x0FFFFFFF) < Memory::REALRAM_SIZE) + address = address & 0x3FFFFFFF; + else if (Memory::m_pEXRAM && segment == 0x9 && (address & 0x0FFFFFFF) < Memory::EXRAM_SIZE) + address = address & 0x3FFFFFFF; + else if (Memory::bFakeVMEM && (segment == 0x7 || segment == 0x4)) + address = 0x7E000000 | (address & Memory::FAKEVMEM_MASK); + else + return TranslateResult{false, false, 0}; + } + + return TranslateResult{true, from_bat, address}; +} + // ********************************************************************************* // Warning: Test Area // @@ -990,7 +1039,7 @@ static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag fl u32* paddr) { u32 tag = vpa >> HW_PAGE_INDEX_SHIFT; - PowerPC::tlb_entry* tlbe = &PowerPC::ppcState.tlb[flag == FLAG_OPCODE][tag & HW_PAGE_INDEX_MASK]; + PowerPC::tlb_entry* tlbe = &PowerPC::ppcState.tlb[IsOpcodeFlag(flag)][tag & HW_PAGE_INDEX_MASK]; if (tlbe->tag[0] == tag) { // Check if C bit requires updating @@ -1006,7 +1055,7 @@ static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag fl } } - if (flag != FLAG_NO_EXCEPTION) + if (!IsNoExceptionFlag(flag)) tlbe->recent = 0; *paddr = tlbe->paddr[0] | (vpa & 0xfff); @@ -1028,7 +1077,7 @@ static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag fl } } - if (flag != FLAG_NO_EXCEPTION) + if (!IsNoExceptionFlag(flag)) tlbe->recent = 1; *paddr = tlbe->paddr[1] | (vpa & 0xfff); @@ -1040,11 +1089,11 @@ static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag fl static __forceinline void UpdateTLBEntry(const XCheckTLBFlag flag, UPTE2 PTE2, const u32 address) { - if (flag == FLAG_NO_EXCEPTION) + if (IsNoExceptionFlag(flag)) return; int tag = address >> HW_PAGE_INDEX_SHIFT; - PowerPC::tlb_entry* tlbe = &PowerPC::ppcState.tlb[flag == FLAG_OPCODE][tag & HW_PAGE_INDEX_MASK]; + PowerPC::tlb_entry* tlbe = &PowerPC::ppcState.tlb[IsOpcodeFlag(flag)][tag & HW_PAGE_INDEX_MASK]; int index = tlbe->recent == 0 && tlbe->tag[0] != TLB_TAG_INVALID; tlbe->recent = index; tlbe->paddr[index] = PTE2.RPN << HW_PAGE_INDEX_SHIFT; @@ -1110,6 +1159,7 @@ static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLB switch (flag) { case FLAG_NO_EXCEPTION: + case FLAG_OPCODE_NO_EXCEPTION: break; case FLAG_READ: PTE2.R = 1; @@ -1123,7 +1173,7 @@ static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLB break; } - if (flag != FLAG_NO_EXCEPTION) + if (!IsNoExceptionFlag(flag)) *(u32*)&Memory::physical_base[pteg_addr + 4] = bswap(PTE2.Hex); // We already updated the TLB entry if this was caused by a C bit. diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 33f1f159ad..98d281e851 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -272,6 +272,13 @@ bool IsOptimizableRAMAddress(const u32 address); u32 IsOptimizableMMIOAccess(u32 address, u32 accessSize); bool IsOptimizableGatherPipeWrite(u32 address); +struct TranslateResult +{ + bool valid; + bool from_bat; + u32 address; +}; +TranslateResult JitCache_TranslateAddress(u32 address); } // namespace enum CRBits