diff --git a/Source/Core/Core/Boot/Boot_BS2Emu.cpp b/Source/Core/Core/Boot/Boot_BS2Emu.cpp index f907b85647..5be2905096 100644 --- a/Source/Core/Core/Boot/Boot_BS2Emu.cpp +++ b/Source/Core/Core/Boot/Boot_BS2Emu.cpp @@ -190,12 +190,6 @@ bool CBoot::RunApploader(bool is_wii, const DiscIO::VolumeDisc& volume, INFO_LOG_FMT(BOOT, "DVDRead: offset: {:08x} memOffset: {:08x} length: {}", dvd_offset, ram_address, length); DVDRead(volume, dvd_offset, ram_address, length, partition); - for (u32 i = 0; i < length; i += 32) - { - if (PowerPC::ppcState.m_enable_dcache) - PowerPC::ppcState.dCache.Invalidate(ram_address + i); - PowerPC::ppcState.iCache.Invalidate(ram_address + i); - } DiscIO::Riivolution::ApplyApploaderMemoryPatches(riivolution_patches, ram_address, length); diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 401e98e8f1..1c1a341c85 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -448,7 +448,7 @@ void Interpreter::dcbf(UGeckoInstruction inst) return; } - PowerPC::FlushCacheLine(address); + PowerPC::FlushDCacheLine(address); } void Interpreter::dcbi(UGeckoInstruction inst) @@ -469,7 +469,7 @@ void Interpreter::dcbi(UGeckoInstruction inst) return; } - PowerPC::InvalidateCacheLine(address); + PowerPC::InvalidateDCacheLine(address); } void Interpreter::dcbst(UGeckoInstruction inst) @@ -484,7 +484,7 @@ void Interpreter::dcbst(UGeckoInstruction inst) return; } - PowerPC::StoreCacheLine(address); + PowerPC::StoreDCacheLine(address); } // These instructions hint that it might be optimal to prefetch the specified cache line into the @@ -520,7 +520,7 @@ void Interpreter::dcbz(UGeckoInstruction inst) } } - PowerPC::ClearCacheLine(dcbz_addr & (~31)); + PowerPC::ClearDCacheLine(dcbz_addr & (~31)); } void Interpreter::dcbz_l(UGeckoInstruction inst) @@ -539,7 +539,7 @@ void Interpreter::dcbz_l(UGeckoInstruction inst) return; } - PowerPC::ClearCacheLine(address & (~31)); + PowerPC::ClearDCacheLine(address & (~31)); } // eciwx/ecowx technically should access the specified device @@ -594,7 +594,6 @@ void Interpreter::icbi(UGeckoInstruction inst) { // TODO: Raise DSI if translation fails (except for direct-store segments). const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - JitInterface::InvalidateICacheLine(address); PowerPC::ppcState.iCache.Invalidate(address); } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 6e01e1ecae..09a96aafeb 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -272,8 +272,9 @@ void Interpreter::mfspr(UGeckoInstruction inst) break; case SPR_IABR: - // A strange quirk: reading back this register on hardware will always have this bit set to 0 - // (despite the bit appearing to function normally when set). This does not apply to the DABR. + // A strange quirk: reading back this register on hardware will always have the TE (Translation + // enabled) bit set to 0 (despite the bit appearing to function normally when set). This does + // not apply to the DABR. rGPR[inst.RD] = rSPR(index) & ~1; return; } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 969eb1bf16..fe95271136 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -446,7 +446,7 @@ void Jit64::dcbz(UGeckoInstruction inst) MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); - ABI_CallFunctionR(PowerPC::ClearCacheLine, RSCRATCH); + ABI_CallFunctionR(PowerPC::ClearDCacheLine, RSCRATCH); ABI_PopRegistersAndAdjustStack(registersInUse, 0); if (emit_fast_path) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 386c0fb69b..e32d166be2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -231,7 +231,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, } else if (flags & BackPatchInfo::FLAG_ZERO_256) { - MOVP2R(ARM64Reg::X8, &PowerPC::ClearCacheLine); + MOVP2R(ARM64Reg::X8, &PowerPC::ClearDCacheLine); BLR(ARM64Reg::X8); } else diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 1ea8397f96..1ccc81d959 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1137,7 +1137,7 @@ void DMA_MemoryToLC(const u32 cache_address, const u32 mem_address, const u32 nu memcpy(dst, src, 32 * num_blocks); } -void ClearCacheLine(u32 address) +void ClearDCacheLine(u32 address) { DEBUG_ASSERT((address & 0x1F) == 0); if (MSR.DR) @@ -1168,7 +1168,7 @@ void ClearCacheLine(u32 address) WriteToHardware(memory, address + i, 0, 4); } -void StoreCacheLine(u32 address) +void StoreDCacheLine(u32 address) { address &= ~0x1F; @@ -1192,7 +1192,7 @@ void StoreCacheLine(u32 address) ppcState.dCache.Store(address); } -void InvalidateCacheLine(u32 address) +void InvalidateDCacheLine(u32 address) { address &= ~0x1F; @@ -1214,7 +1214,7 @@ void InvalidateCacheLine(u32 address) ppcState.dCache.Invalidate(address); } -void FlushCacheLine(u32 address) +void FlushDCacheLine(u32 address) { address &= ~0x1F; @@ -1238,7 +1238,7 @@ void FlushCacheLine(u32 address) ppcState.dCache.Flush(address); } -void TouchCacheLine(u32 address, bool store) +void TouchDCacheLine(u32 address, bool store) { address &= ~0x1F; diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index bfb855478d..44b9785611 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -165,11 +165,11 @@ void Write_F64(double var, u32 address); void DMA_LCToMemory(u32 mem_address, u32 cache_address, u32 num_blocks); void DMA_MemoryToLC(u32 cache_address, u32 mem_address, u32 num_blocks); -void ClearCacheLine(u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned -void StoreCacheLine(u32 address); -void InvalidateCacheLine(u32 address); -void FlushCacheLine(u32 address); -void TouchCacheLine(u32 address, bool store); +void ClearDCacheLine(u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned +void StoreDCacheLine(u32 address); +void InvalidateDCacheLine(u32 address); +void FlushDCacheLine(u32 address); +void TouchDCacheLine(u32 address, bool store); // TLB functions void SDRUpdated(); diff --git a/Source/Core/Core/PowerPC/PPCCache.cpp b/Source/Core/Core/PowerPC/PPCCache.cpp index acdcfaf0c5..b13793522e 100644 --- a/Source/Core/Core/PowerPC/PPCCache.cpp +++ b/Source/Core/Core/PowerPC/PPCCache.cpp @@ -98,7 +98,7 @@ void Cache::Reset() { valid.fill(0); plru.fill(0); - wrote.fill(0); + modified.fill(0); lookup_table.fill(0xFF); lookup_table_ex.fill(0xFF); lookup_table_vmem.fill(0xFF); @@ -113,7 +113,6 @@ void InstructionCache::Reset() void Cache::Init() { data.fill({}); - tags.fill({}); addrs.fill({}); Reset(); } @@ -137,9 +136,9 @@ void Cache::Store(u32 addr) if (way == 0xff) return; - if (valid[set] & (1U << way) && wrote[set] & (1U << way)) + if (valid[set] & (1U << way) && modified[set] & (1U << way)) memory.CopyToEmu((addr & ~0x1f), reinterpret_cast(data[set][way].data()), 32); - wrote[set] &= ~(1U << way); + modified[set] &= ~(1U << way); } void Cache::FlushAll() @@ -151,7 +150,7 @@ void Cache::FlushAll() { for (size_t way = 0; way < CACHE_WAYS; way++) { - if (valid[set] & (1U << way) && wrote[set] & (1U << way)) + if (valid[set] & (1U << way) && modified[set] & (1U << way)) memory.CopyToEmu(addrs[set][way], reinterpret_cast(data[set][way].data()), 32); } } @@ -168,15 +167,15 @@ void Cache::Invalidate(u32 addr) if (valid[set] & (1U << way)) { - if (tags[set][way] & (CACHE_VMEM_BIT >> 12)) - lookup_table_vmem[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; - else if (tags[set][way] & (CACHE_EXRAM_BIT >> 12)) - lookup_table_ex[((tags[set][way] << 7) | set) & 0x1fffff] = 0xff; + if (addrs[set][way] & CACHE_VMEM_BIT) + lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + else if (addrs[set][way] & CACHE_EXRAM_BIT) + lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; else - lookup_table[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; + lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; valid[set] &= ~(1U << way); - wrote[set] &= ~(1U << way); + modified[set] &= ~(1U << way); } } @@ -192,18 +191,18 @@ void Cache::Flush(u32 addr) if (valid[set] & (1U << way)) { - if (wrote[set] & (1U << way)) + if (modified[set] & (1U << way)) memory.CopyToEmu((addr & ~0x1f), reinterpret_cast(data[set][way].data()), 32); - if (tags[set][way] & (CACHE_VMEM_BIT >> 12)) - lookup_table_vmem[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; - else if (tags[set][way] & (CACHE_EXRAM_BIT >> 12)) - lookup_table_ex[((tags[set][way] << 7) | set) & 0x1fffff] = 0xff; + if (addrs[set][way] & CACHE_VMEM_BIT) + lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + else if (addrs[set][way] & CACHE_EXRAM_BIT) + lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; else - lookup_table[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; + lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; valid[set] &= ~(1U << way); - wrote[set] &= ~(1U << way); + modified[set] &= ~(1U << way); } } @@ -237,8 +236,6 @@ std::pair Cache::GetCache(u32 addr, bool locked) // load to the cache if (!locked && way == 0xff) { - u32 tag = addr >> 12; - // select a way if (valid[set] != 0xff) way = s_way_from_valid[valid[set]]; @@ -248,15 +245,15 @@ std::pair Cache::GetCache(u32 addr, bool locked) if (valid[set] & (1 << way)) { // store the cache back to main memory - if (wrote[set] & (1 << way)) + if (modified[set] & (1 << way)) memory.CopyToEmu(addrs[set][way], reinterpret_cast(data[set][way].data()), 32); - if (tags[set][way] & (CACHE_VMEM_BIT >> 12)) - lookup_table_vmem[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; - else if (tags[set][way] & (CACHE_EXRAM_BIT >> 12)) - lookup_table_ex[((tags[set][way] << 7) | set) & 0x1fffff] = 0xff; + if (addrs[set][way] & CACHE_VMEM_BIT) + lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + else if (addrs[set][way] & CACHE_EXRAM_BIT) + lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; else - lookup_table[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; + lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; } // load @@ -268,15 +265,14 @@ std::pair Cache::GetCache(u32 addr, bool locked) lookup_table_ex[(addr >> 5) & 0x1fffff] = way; else lookup_table[(addr >> 5) & 0xfffff] = way; - tags[set][way] = tag; addrs[set][way] = addr; valid[set] |= (1 << way); - wrote[set] &= ~(1 << way); - } + modified[set] &= ~(1 << way); - // update plru - if (way != 0xff) - plru[set] = (plru[set] & ~s_plru_mask[way]) | s_plru_value[way]; + // update plru + if (way != 0xff) + plru[set] = (plru[set] & ~s_plru_mask[way]) | s_plru_value[way]; + } return {set, way}; } @@ -329,7 +325,7 @@ void Cache::Write(u32 addr, const void* buffer, u32 len, bool locked) { std::memcpy(reinterpret_cast(data[set][way].data()) + offset_in_block, value, len_in_block); - wrote[set] |= (1 << way); + modified[set] |= (1 << way); } else { @@ -354,24 +350,22 @@ void Cache::DoState(PointerWrap& p) { if ((valid[set] & (1 << way)) != 0) { - const u32 addr = (tags[set][way] << 12) | (set << 5); - if (addr & CACHE_VMEM_BIT) - lookup_table_vmem[(addr >> 5) & 0xfffff] = 0xff; - else if (addr & CACHE_EXRAM_BIT) - lookup_table_ex[(addr >> 5) & 0x1fffff] = 0xff; + if (addrs[set][way] & CACHE_VMEM_BIT) + lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + else if (addrs[set][way] & CACHE_EXRAM_BIT) + lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; else - lookup_table[(addr >> 5) & 0xfffff] = 0xff; + lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; } } } } p.DoArray(data); - p.DoArray(tags); p.DoArray(plru); p.DoArray(valid); p.DoArray(addrs); - p.DoArray(wrote); + p.DoArray(modified); if (p.IsReadMode()) { @@ -382,13 +376,12 @@ void Cache::DoState(PointerWrap& p) { if ((valid[set] & (1 << way)) != 0) { - const u32 addr = (tags[set][way] << 12) | (set << 5); - if (addr & CACHE_VMEM_BIT) - lookup_table_vmem[(addr >> 5) & 0xfffff] = way; - else if (addr & CACHE_EXRAM_BIT) - lookup_table_ex[(addr >> 5) & 0x1fffff] = way; + if (addrs[set][way] & CACHE_VMEM_BIT) + lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + else if (addrs[set][way] & CACHE_EXRAM_BIT) + lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; else - lookup_table[(addr >> 5) & 0xfffff] = way; + lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; } } } @@ -413,7 +406,22 @@ void InstructionCache::Invalidate(u32 addr) if (!HID0.ICE || m_disable_icache) return; - Cache::Invalidate(addr); + // Invalidates the whole set + const u32 set = (addr >> 5) & 0x7f; + for (size_t way = 0; way < 8; way++) + { + if (valid[set] & (1U << way)) + { + if (addrs[set][way] & CACHE_VMEM_BIT) + lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + else if (addrs[set][way] & CACHE_EXRAM_BIT) + lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; + else + lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + } + } + valid[set] = 0; + modified[set] = 0; JitInterface::InvalidateICacheLine(addr); } diff --git a/Source/Core/Core/PowerPC/PPCCache.h b/Source/Core/Core/PowerPC/PPCCache.h index 8aa6c4c811..6e0d1dc33e 100644 --- a/Source/Core/Core/PowerPC/PPCCache.h +++ b/Source/Core/Core/PowerPC/PPCCache.h @@ -23,11 +23,10 @@ constexpr u32 CACHE_VMEM_BIT = 0x20000000; struct Cache { std::array, CACHE_WAYS>, CACHE_SETS> data{}; - std::array, CACHE_SETS> tags{}; + std::array, CACHE_SETS> addrs{}; std::array plru{}; std::array valid{}; - std::array, CACHE_SETS> addrs{}; - std::array wrote{}; + std::array modified{}; // Note: This is only for performance purposes; this same data could be computed at runtime // from the tags and valid fields (and that's how it's done on the actual cache) diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index f05ed37065..411705d833 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -141,6 +141,10 @@ void DoState(PointerWrap& p) INFO_LOG_FMT(POWERPC, "Flushing data cache"); ppcState.dCache.FlushAll(); } + else + { + ppcState.dCache.Reset(); + } RoundingModeUpdated(); IBATUpdated(); @@ -275,14 +279,7 @@ void Init(CPUCore cpu_core) ppcState.iCache.Init(); ppcState.dCache.Init(); - if (Config::Get(Config::MAIN_ACCURATE_CPU_CACHE)) - { - ppcState.m_enable_dcache = true; - } - else - { - ppcState.m_enable_dcache = false; - } + ppcState.m_enable_dcache = Config::Get(Config::MAIN_ACCURATE_CPU_CACHE); if (Config::Get(Config::MAIN_ENABLE_DEBUGGING)) breakpoints.ClearAllTemporary();