Implement PPC write-back data cache

This commit is contained in:
TheLordScruffy 2022-10-17 15:28:29 -04:00
parent abf08b5869
commit e97d380437
22 changed files with 559 additions and 132 deletions

View File

@ -190,6 +190,12 @@ bool CBoot::RunApploader(bool is_wii, const DiscIO::VolumeDisc& volume,
INFO_LOG_FMT(BOOT, "DVDRead: offset: {:08x} memOffset: {:08x} length: {}", dvd_offset,
ram_address, length);
DVDRead(volume, dvd_offset, ram_address, length, partition);
for (u32 i = 0; i < length; i += 32)
{
if (PowerPC::ppcState.m_enable_dcache)
PowerPC::ppcState.dCache.Invalidate(ram_address + i);
PowerPC::ppcState.iCache.Invalidate(ram_address + i);
}
DiscIO::Riivolution::ApplyApploaderMemoryPatches(riivolution_patches, ram_address, length);

View File

@ -37,6 +37,7 @@ const Info<PowerPC::CPUCore> MAIN_CPU_CORE{{System::Main, "Core", "CPUCore"},
PowerPC::DefaultCPUCore()};
const Info<bool> MAIN_JIT_FOLLOW_BRANCH{{System::Main, "Core", "JITFollowBranch"}, true};
const Info<bool> MAIN_FASTMEM{{System::Main, "Core", "Fastmem"}, true};
const Info<bool> MAIN_ACCURATE_CPU_CACHE{{System::Main, "Core", "AccurateCPUCache"}, false};
const Info<bool> MAIN_DSP_HLE{{System::Main, "Core", "DSPHLE"}, true};
const Info<int> MAIN_TIMING_VARIANCE{{System::Main, "Core", "TimingVariance"}, 40};
const Info<bool> MAIN_CPU_THREAD{{System::Main, "Core", "CPUThread"}, true};

View File

@ -55,6 +55,7 @@ extern const Info<bool> MAIN_SKIP_IPL;
extern const Info<PowerPC::CPUCore> MAIN_CPU_CORE;
extern const Info<bool> MAIN_JIT_FOLLOW_BRANCH;
extern const Info<bool> MAIN_FASTMEM;
extern const Info<bool> MAIN_ACCURATE_CPU_CACHE;
// Should really be in the DSP section, but we're kind of stuck with bad decisions made in the past.
extern const Info<bool> MAIN_DSP_HLE;
extern const Info<int> MAIN_TIMING_VARIANCE;

View File

@ -127,6 +127,7 @@ bool IsSettingSaveable(const Config::Location& config_location)
&Config::MAIN_CPU_THREAD.GetLocation(),
&Config::MAIN_MMU.GetLocation(),
&Config::MAIN_PAUSE_ON_PANIC.GetLocation(),
&Config::MAIN_ACCURATE_CPU_CACHE.GetLocation(),
&Config::MAIN_BB_DUMP_PORT.GetLocation(),
&Config::MAIN_SYNC_GPU.GetLocation(),
&Config::MAIN_SYNC_GPU_MAX_DISTANCE.GetLocation(),

View File

@ -438,14 +438,17 @@ void Interpreter::dcba(UGeckoInstruction inst)
void Interpreter::dcbf(UGeckoInstruction inst)
{
// TODO: Implement some sort of L2 emulation.
// TODO: Raise DSI if translation fails (except for direct-store segments).
// Invalidate the JIT cache here as a heuristic to compensate for
// the lack of precise L1 icache emulation in the JIT. (Portable software
// should use icbi consistently, but games aren't portable.)
const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst);
JitInterface::InvalidateICacheLine(address);
if (!PowerPC::ppcState.m_enable_dcache)
{
// Invalidate the JIT cache here as a heuristic to compensate for
// the lack of precise L1 icache emulation in the JIT. (Portable software
// should use icbi consistently, but games aren't portable.)
JitInterface::InvalidateICacheLine(address);
return;
}
PowerPC::FlushCacheLine(address);
}
void Interpreter::dcbi(UGeckoInstruction inst)
@ -456,42 +459,44 @@ void Interpreter::dcbi(UGeckoInstruction inst)
return;
}
// TODO: Implement some sort of L2 emulation.
// TODO: Raise DSI if translation fails (except for direct-store segments).
// Invalidate the JIT cache here as a heuristic to compensate for
// the lack of precise L1 icache emulation in the JIT. (Portable software
// should use icbi consistently, but games aren't portable.)
const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst);
JitInterface::InvalidateICacheLine(address);
if (!PowerPC::ppcState.m_enable_dcache)
{
// Invalidate the JIT cache here as a heuristic to compensate for
// the lack of precise L1 icache emulation in the JIT. (Portable software
// should use icbi consistently, but games aren't portable.)
JitInterface::InvalidateICacheLine(address);
return;
}
PowerPC::InvalidateCacheLine(address);
}
void Interpreter::dcbst(UGeckoInstruction inst)
{
// TODO: Implement some sort of L2 emulation.
// TODO: Raise DSI if translation fails (except for direct-store segments).
// Invalidate the JIT cache here as a heuristic to compensate for
// the lack of precise L1 icache emulation in the JIT. (Portable software
// should use icbi consistently, but games aren't portable.)
const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst);
JitInterface::InvalidateICacheLine(address);
if (!PowerPC::ppcState.m_enable_dcache)
{
// Invalidate the JIT cache here as a heuristic to compensate for
// the lack of precise L1 icache emulation in the JIT. (Portable software
// should use icbi consistently, but games aren't portable.)
JitInterface::InvalidateICacheLine(address);
return;
}
PowerPC::StoreCacheLine(address);
}
// These instructions hint that it might be optimal to prefetch the specified cache line into the
// data cache. But the CPU is never guaranteed to do this fetch, and in practice it's not more
// performant to emulate it.
void Interpreter::dcbt(UGeckoInstruction inst)
{
if (HID0.NOOPTI)
return;
// TODO: Implement some sort of L2 emulation.
}
void Interpreter::dcbtst(UGeckoInstruction inst)
{
if (HID0.NOOPTI)
return;
// TODO: Implement some sort of L2 emulation.
}
void Interpreter::dcbz(UGeckoInstruction inst)
@ -504,14 +509,17 @@ void Interpreter::dcbz(UGeckoInstruction inst)
return;
}
// Hack to stop dcbz/dcbi over low MEM1 trashing memory.
if ((dcbz_addr < 0x80008000) && (dcbz_addr >= 0x80000000) &&
Config::Get(Config::MAIN_LOW_DCBZ_HACK))
if (!PowerPC::ppcState.m_enable_dcache)
{
return;
// Hack to stop dcbz/dcbi over low MEM1 trashing memory. This is not needed if data cache
// emulation is enabled.
if ((dcbz_addr < 0x80008000) && (dcbz_addr >= 0x80000000) &&
Config::Get(Config::MAIN_LOW_DCBZ_HACK))
{
return;
}
}
// TODO: Implement some sort of L2 emulation.
PowerPC::ClearCacheLine(dcbz_addr & (~31));
}
@ -531,7 +539,6 @@ void Interpreter::dcbz_l(UGeckoInstruction inst)
return;
}
// FAKE: clear memory instead of clearing the cache block
PowerPC::ClearCacheLine(address & (~31));
}
@ -587,6 +594,7 @@ void Interpreter::icbi(UGeckoInstruction inst)
{
// TODO: Raise DSI if translation fails (except for direct-store segments).
const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst);
JitInterface::InvalidateICacheLine(address);
PowerPC::ppcState.iCache.Invalidate(address);
}

View File

@ -250,9 +250,32 @@ void Interpreter::mfspr(UGeckoInstruction inst)
rSPR(index) &= ~1;
}
break;
case SPR_XER:
rSPR(index) = PowerPC::GetXER().Hex;
break;
case SPR_UPMC1:
rSPR(index) = rSPR(SPR_PMC1);
break;
case SPR_UPMC2:
rSPR(index) = rSPR(SPR_PMC2);
break;
case SPR_UPMC3:
rSPR(index) = rSPR(SPR_PMC3);
break;
case SPR_UPMC4:
rSPR(index) = rSPR(SPR_PMC4);
break;
case SPR_IABR:
// A strange quirk: reading back this register on hardware will always have this bit set to 0
// (despite the bit appearing to function normally when set). This does not apply to the DABR.
rGPR[inst.RD] = rSPR(index) & ~1;
return;
}
rGPR[inst.RD] = rSPR(index);
}

View File

@ -229,6 +229,8 @@ void Jit64::lXXx(UGeckoInstruction inst)
void Jit64::dcbx(UGeckoInstruction inst)
{
FALLBACK_IF(m_accurate_cpu_cache_enabled);
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);

View File

@ -412,6 +412,11 @@ void Jit64::mfspr(UGeckoInstruction inst)
case SPR_PMC2:
case SPR_PMC3:
case SPR_PMC4:
case SPR_UPMC1:
case SPR_UPMC2:
case SPR_UPMC3:
case SPR_UPMC4:
case SPR_IABR:
FALLBACK_IF(true);
default:
{

View File

@ -61,6 +61,9 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
{
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
if (m_accurate_cpu_cache_enabled)
mode = MemAccessMode::AlwaysSafe;
const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe;
const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe;

View File

@ -625,6 +625,8 @@ void JitArm64::stmw(UGeckoInstruction inst)
void JitArm64::dcbx(UGeckoInstruction inst)
{
FALLBACK_IF(m_accurate_cpu_cache_enabled);
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);

View File

@ -395,6 +395,15 @@ void JitArm64::mfspr(UGeckoInstruction inst)
break;
case SPR_WPAR:
case SPR_DEC:
case SPR_PMC1:
case SPR_PMC2:
case SPR_PMC3:
case SPR_PMC4:
case SPR_UPMC1:
case SPR_UPMC2:
case SPR_UPMC3:
case SPR_UPMC4:
case SPR_IABR:
FALLBACK_IF(true);
default:
gpr.BindToRegister(d, false);

View File

@ -58,6 +58,13 @@ void JitBase::RefreshConfig()
m_fastmem_enabled = Config::Get(Config::MAIN_FASTMEM);
m_mmu_enabled = Core::System::GetInstance().IsMMUMode();
m_pause_on_panic_enabled = Core::System::GetInstance().IsPauseOnPanicMode();
m_accurate_cpu_cache_enabled = Config::Get(Config::MAIN_ACCURATE_CPU_CACHE);
if (m_accurate_cpu_cache_enabled)
{
m_fastmem_enabled = false;
// This hack is unneeded if the data cache is being emulated.
m_low_dcbz_hack = false;
}
analyzer.SetDebuggingEnabled(m_enable_debugging);
analyzer.SetBranchFollowingEnabled(Config::Get(Config::MAIN_JIT_FOLLOW_BRANCH));

View File

@ -136,6 +136,7 @@ protected:
bool m_fastmem_enabled = false;
bool m_mmu_enabled = false;
bool m_pause_on_panic_enabled = false;
bool m_accurate_cpu_cache_enabled = false;
void RefreshConfig();

View File

@ -187,6 +187,8 @@ static T ReadFromHardware(Memory::MemoryManager& memory, u32 em_address)
return static_cast<T>(var);
}
bool wi = false;
if (!never_translate && MSR.DR)
{
auto translated_addr = TranslateAddress<flag>(em_address);
@ -197,6 +199,7 @@ static T ReadFromHardware(Memory::MemoryManager& memory, u32 em_address)
return 0;
}
em_address = translated_addr.address;
wi = translated_addr.wi;
}
if (flag == XCheckTLBFlag::Read && (em_address & 0xF8000000) == 0x08000000)
@ -221,7 +224,18 @@ static T ReadFromHardware(Memory::MemoryManager& memory, u32 em_address)
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
T value;
std::memcpy(&value, &memory.GetRAM()[em_address & memory.GetRamMask()], sizeof(T));
em_address &= memory.GetRamMask();
if (!ppcState.m_enable_dcache || wi)
{
std::memcpy(&value, &memory.GetRAM()[em_address], sizeof(T));
}
else
{
ppcState.dCache.Read(em_address, &value, sizeof(T),
HID0.DLOCK || flag != XCheckTLBFlag::Read);
}
return bswap(value);
}
@ -229,7 +243,18 @@ static T ReadFromHardware(Memory::MemoryManager& memory, u32 em_address)
(em_address & 0x0FFFFFFF) < memory.GetExRamSizeReal())
{
T value;
std::memcpy(&value, &memory.GetEXRAM()[em_address & 0x0FFFFFFF], sizeof(T));
em_address &= 0x0FFFFFFF;
if (!ppcState.m_enable_dcache || wi)
{
std::memcpy(&value, &memory.GetEXRAM()[em_address], sizeof(T));
}
else
{
ppcState.dCache.Read(em_address + 0x10000000, &value, sizeof(T),
HID0.DLOCK || flag != XCheckTLBFlag::Read);
}
return bswap(value);
}
@ -391,14 +416,28 @@ static void WriteToHardware(Memory::MemoryManager& memory, u32 em_address, const
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
std::memcpy(&memory.GetRAM()[em_address & memory.GetRamMask()], &swapped_data, size);
em_address &= memory.GetRamMask();
if (ppcState.m_enable_dcache && !wi)
ppcState.dCache.Write(em_address, &swapped_data, size, HID0.DLOCK);
if (!ppcState.m_enable_dcache || wi || flag != XCheckTLBFlag::Write)
std::memcpy(&memory.GetRAM()[em_address], &swapped_data, size);
return;
}
if (memory.GetEXRAM() && (em_address >> 28) == 0x1 &&
(em_address & 0x0FFFFFFF) < memory.GetExRamSizeReal())
{
std::memcpy(&memory.GetEXRAM()[em_address & 0x0FFFFFFF], &swapped_data, size);
em_address &= 0x0FFFFFFF;
if (ppcState.m_enable_dcache && !wi)
ppcState.dCache.Write(em_address + 0x10000000, &swapped_data, size, HID0.DLOCK);
if (!ppcState.m_enable_dcache || wi || flag != XCheckTLBFlag::Write)
std::memcpy(&memory.GetEXRAM()[em_address], &swapped_data, size);
return;
}
@ -1129,6 +1168,100 @@ void ClearCacheLine(u32 address)
WriteToHardware<XCheckTLBFlag::Write, true>(memory, address + i, 0, 4);
}
void StoreCacheLine(u32 address)
{
address &= ~0x1F;
if (MSR.DR)
{
auto translated_address = TranslateAddress<XCheckTLBFlag::Write>(address);
if (translated_address.result == TranslateAddressResultEnum::DIRECT_STORE_SEGMENT)
{
return;
}
if (translated_address.result == TranslateAddressResultEnum::PAGE_FAULT)
{
// If translation fails, generate a DSI.
GenerateDSIException(address, true);
return;
}
address = translated_address.address;
}
if (ppcState.m_enable_dcache)
ppcState.dCache.Store(address);
}
void InvalidateCacheLine(u32 address)
{
address &= ~0x1F;
if (MSR.DR)
{
auto translated_address = TranslateAddress<XCheckTLBFlag::Write>(address);
if (translated_address.result == TranslateAddressResultEnum::DIRECT_STORE_SEGMENT)
{
return;
}
if (translated_address.result == TranslateAddressResultEnum::PAGE_FAULT)
{
return;
}
address = translated_address.address;
}
if (ppcState.m_enable_dcache)
ppcState.dCache.Invalidate(address);
}
void FlushCacheLine(u32 address)
{
address &= ~0x1F;
if (MSR.DR)
{
auto translated_address = TranslateAddress<XCheckTLBFlag::Write>(address);
if (translated_address.result == TranslateAddressResultEnum::DIRECT_STORE_SEGMENT)
{
return;
}
if (translated_address.result == TranslateAddressResultEnum::PAGE_FAULT)
{
// If translation fails, generate a DSI.
GenerateDSIException(address, true);
return;
}
address = translated_address.address;
}
if (ppcState.m_enable_dcache)
ppcState.dCache.Flush(address);
}
void TouchCacheLine(u32 address, bool store)
{
address &= ~0x1F;
if (MSR.DR)
{
auto translated_address = TranslateAddress<XCheckTLBFlag::Write>(address);
if (translated_address.result == TranslateAddressResultEnum::DIRECT_STORE_SEGMENT)
{
return;
}
if (translated_address.result == TranslateAddressResultEnum::PAGE_FAULT)
{
// If translation fails, generate a DSI.
GenerateDSIException(address, true);
return;
}
address = translated_address.address;
}
if (ppcState.m_enable_dcache)
ppcState.dCache.Touch(address, store);
}
u32 IsOptimizableMMIOAccess(u32 address, u32 access_size)
{
if (PowerPC::memchecks.HasAny())

View File

@ -164,7 +164,12 @@ void Write_F64(double var, u32 address);
void DMA_LCToMemory(u32 mem_address, u32 cache_address, u32 num_blocks);
void DMA_MemoryToLC(u32 cache_address, u32 mem_address, u32 num_blocks);
void ClearCacheLine(u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned
void StoreCacheLine(u32 address);
void InvalidateCacheLine(u32 address);
void FlushCacheLine(u32 address);
void TouchCacheLine(u32 address, bool store);
// TLB functions
void SDRUpdated();

View File

@ -94,134 +94,270 @@ InstructionCache::~InstructionCache()
Config::RemoveConfigChangedCallback(*m_config_callback_id);
}
void InstructionCache::Reset()
void Cache::Reset()
{
valid.fill(0);
plru.fill(0);
wrote.fill(0);
lookup_table.fill(0xFF);
lookup_table_ex.fill(0xFF);
lookup_table_vmem.fill(0xFF);
}
void InstructionCache::Reset()
{
Cache::Reset();
JitInterface::ClearSafe();
}
void Cache::Init()
{
data.fill({});
tags.fill({});
addrs.fill({});
Reset();
}
void InstructionCache::Init()
{
if (!m_config_callback_id)
m_config_callback_id = Config::AddConfigChangedCallback([this] { RefreshConfig(); });
RefreshConfig();
data.fill({});
tags.fill({});
Reset();
Cache::Init();
}
void InstructionCache::Invalidate(u32 addr)
{
if (!HID0.ICE || m_disable_icache)
return;
// Invalidates the whole set
const u32 set = (addr >> 5) & 0x7f;
for (size_t i = 0; i < 8; i++)
{
if (valid[set] & (1U << i))
{
if (tags[set][i] & (ICACHE_VMEM_BIT >> 12))
lookup_table_vmem[((tags[set][i] << 7) | set) & 0xfffff] = 0xff;
else if (tags[set][i] & (ICACHE_EXRAM_BIT >> 12))
lookup_table_ex[((tags[set][i] << 7) | set) & 0x1fffff] = 0xff;
else
lookup_table[((tags[set][i] << 7) | set) & 0xfffff] = 0xff;
}
}
valid[set] = 0;
JitInterface::InvalidateICacheLine(addr);
}
u32 InstructionCache::ReadInstruction(u32 addr)
void Cache::Store(u32 addr)
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
if (!HID0.ICE || m_disable_icache) // instruction cache is disabled
return memory.Read_U32(addr);
u32 set = (addr >> 5) & 0x7f;
u32 tag = addr >> 12;
auto [set, way] = GetCache(addr, true);
u32 t;
if (addr & ICACHE_VMEM_BIT)
if (way == 0xff)
return;
if (valid[set] & (1U << way) && wrote[set] & (1U << way))
memory.CopyToEmu((addr & ~0x1f), reinterpret_cast<u8*>(data[set][way].data()), 32);
wrote[set] &= ~(1U << way);
}
void Cache::FlushAll()
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
for (size_t set = 0; set < CACHE_SETS; set++)
{
t = lookup_table_vmem[(addr >> 5) & 0xfffff];
for (size_t way = 0; way < CACHE_WAYS; way++)
{
if (valid[set] & (1U << way) && wrote[set] & (1U << way))
memory.CopyToEmu(addrs[set][way], reinterpret_cast<u8*>(data[set][way].data()), 32);
}
}
else if (addr & ICACHE_EXRAM_BIT)
Reset();
}
void Cache::Invalidate(u32 addr)
{
auto [set, way] = GetCache(addr, true);
if (way == 0xff)
return;
if (valid[set] & (1U << way))
{
t = lookup_table_ex[(addr >> 5) & 0x1fffff];
if (tags[set][way] & (CACHE_VMEM_BIT >> 12))
lookup_table_vmem[((tags[set][way] << 7) | set) & 0xfffff] = 0xff;
else if (tags[set][way] & (CACHE_EXRAM_BIT >> 12))
lookup_table_ex[((tags[set][way] << 7) | set) & 0x1fffff] = 0xff;
else
lookup_table[((tags[set][way] << 7) | set) & 0xfffff] = 0xff;
valid[set] &= ~(1U << way);
wrote[set] &= ~(1U << way);
}
}
void Cache::Flush(u32 addr)
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
auto [set, way] = GetCache(addr, true);
if (way == 0xff)
return;
if (valid[set] & (1U << way))
{
if (wrote[set] & (1U << way))
memory.CopyToEmu((addr & ~0x1f), reinterpret_cast<u8*>(data[set][way].data()), 32);
if (tags[set][way] & (CACHE_VMEM_BIT >> 12))
lookup_table_vmem[((tags[set][way] << 7) | set) & 0xfffff] = 0xff;
else if (tags[set][way] & (CACHE_EXRAM_BIT >> 12))
lookup_table_ex[((tags[set][way] << 7) | set) & 0x1fffff] = 0xff;
else
lookup_table[((tags[set][way] << 7) | set) & 0xfffff] = 0xff;
valid[set] &= ~(1U << way);
wrote[set] &= ~(1U << way);
}
}
void Cache::Touch(u32 addr, bool store)
{
GetCache(addr, false);
}
std::pair<u32, u32> Cache::GetCache(u32 addr, bool locked)
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
addr &= ~31;
u32 set = (addr >> 5) & 0x7f;
u32 way;
if (addr & CACHE_VMEM_BIT)
{
way = lookup_table_vmem[(addr >> 5) & 0xfffff];
}
else if (addr & CACHE_EXRAM_BIT)
{
way = lookup_table_ex[(addr >> 5) & 0x1fffff];
}
else
{
t = lookup_table[(addr >> 5) & 0xfffff];
way = lookup_table[(addr >> 5) & 0xfffff];
}
if (t == 0xff) // load to the cache
// load to the cache
if (!locked && way == 0xff)
{
if (HID0.ILOCK) // instruction cache is locked
return memory.Read_U32(addr);
u32 tag = addr >> 12;
// select a way
if (valid[set] != 0xff)
t = s_way_from_valid[valid[set]];
way = s_way_from_valid[valid[set]];
else
t = s_way_from_plru[plru[set]];
// load
memory.CopyFromEmu(reinterpret_cast<u8*>(data[set][t].data()), (addr & ~0x1f), 32);
if (valid[set] & (1 << t))
way = s_way_from_plru[plru[set]];
if (valid[set] & (1 << way))
{
if (tags[set][t] & (ICACHE_VMEM_BIT >> 12))
lookup_table_vmem[((tags[set][t] << 7) | set) & 0xfffff] = 0xff;
else if (tags[set][t] & (ICACHE_EXRAM_BIT >> 12))
lookup_table_ex[((tags[set][t] << 7) | set) & 0x1fffff] = 0xff;
// store the cache back to main memory
if (wrote[set] & (1 << way))
memory.CopyToEmu(addrs[set][way], reinterpret_cast<u8*>(data[set][way].data()), 32);
if (tags[set][way] & (CACHE_VMEM_BIT >> 12))
lookup_table_vmem[((tags[set][way] << 7) | set) & 0xfffff] = 0xff;
else if (tags[set][way] & (CACHE_EXRAM_BIT >> 12))
lookup_table_ex[((tags[set][way] << 7) | set) & 0x1fffff] = 0xff;
else
lookup_table[((tags[set][t] << 7) | set) & 0xfffff] = 0xff;
lookup_table[((tags[set][way] << 7) | set) & 0xfffff] = 0xff;
}
if (addr & ICACHE_VMEM_BIT)
lookup_table_vmem[(addr >> 5) & 0xfffff] = t;
else if (addr & ICACHE_EXRAM_BIT)
lookup_table_ex[(addr >> 5) & 0x1fffff] = t;
// load
memory.CopyFromEmu(reinterpret_cast<u8*>(data[set][way].data()), (addr & ~0x1f), 32);
if (addr & CACHE_VMEM_BIT)
lookup_table_vmem[(addr >> 5) & 0xfffff] = way;
else if (addr & CACHE_EXRAM_BIT)
lookup_table_ex[(addr >> 5) & 0x1fffff] = way;
else
lookup_table[(addr >> 5) & 0xfffff] = t;
tags[set][t] = tag;
valid[set] |= (1 << t);
lookup_table[(addr >> 5) & 0xfffff] = way;
tags[set][way] = tag;
addrs[set][way] = addr;
valid[set] |= (1 << way);
wrote[set] &= ~(1 << way);
}
// update plru
plru[set] = (plru[set] & ~s_plru_mask[t]) | s_plru_value[t];
const u32 res = Common::swap32(data[set][t][(addr >> 2) & 7]);
const u32 inmem = memory.Read_U32(addr);
if (res != inmem)
{
INFO_LOG_FMT(POWERPC,
"ICache read at {:08x} returned stale data: CACHED: {:08x} vs. RAM: {:08x}", addr,
res, inmem);
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::ICACHE_MATTERS);
}
return res;
if (way != 0xff)
plru[set] = (plru[set] & ~s_plru_mask[way]) | s_plru_value[way];
return {set, way};
}
void InstructionCache::DoState(PointerWrap& p)
void Cache::Read(u32 addr, void* buffer, u32 len, bool locked)
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
auto* value = static_cast<u8*>(buffer);
while (len > 0)
{
auto [set, way] = GetCache(addr, locked);
u32 offset_in_block = addr - (addr & ~31);
u32 len_in_block = std::min<u32>(len, ((addr + 32) & ~31) - addr);
if (way != 0xff)
{
std::memcpy(value, reinterpret_cast<u8*>(data[set][way].data()) + offset_in_block,
len_in_block);
}
else
{
memory.CopyFromEmu(value, addr, len_in_block);
}
addr += len_in_block;
len -= len_in_block;
value += len_in_block;
}
}
void Cache::Write(u32 addr, const void* buffer, u32 len, bool locked)
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
auto* value = static_cast<const u8*>(buffer);
while (len > 0)
{
auto [set, way] = GetCache(addr, locked);
u32 offset_in_block = addr - (addr & ~31);
u32 len_in_block = std::min<u32>(len, ((addr + 32) & ~31) - addr);
if (way != 0xff)
{
std::memcpy(reinterpret_cast<u8*>(data[set][way].data()) + offset_in_block, value,
len_in_block);
wrote[set] |= (1 << way);
}
else
{
memory.CopyToEmu(addr, value, len_in_block);
}
addr += len_in_block;
len -= len_in_block;
value += len_in_block;
}
}
void Cache::DoState(PointerWrap& p)
{
if (p.IsReadMode())
{
// Clear valid parts of the lookup tables (this is done instead of using fill(0xff) to avoid
// loading the entire 4MB of tables into cache)
for (u32 set = 0; set < ICACHE_SETS; set++)
for (u32 set = 0; set < CACHE_SETS; set++)
{
for (u32 way = 0; way < ICACHE_WAYS; way++)
for (u32 way = 0; way < CACHE_WAYS; way++)
{
if ((valid[set] & (1 << way)) != 0)
{
const u32 addr = (tags[set][way] << 12) | (set << 5);
if (addr & ICACHE_VMEM_BIT)
if (addr & CACHE_VMEM_BIT)
lookup_table_vmem[(addr >> 5) & 0xfffff] = 0xff;
else if (addr & ICACHE_EXRAM_BIT)
else if (addr & CACHE_EXRAM_BIT)
lookup_table_ex[(addr >> 5) & 0x1fffff] = 0xff;
else
lookup_table[(addr >> 5) & 0xfffff] = 0xff;
@ -234,20 +370,22 @@ void InstructionCache::DoState(PointerWrap& p)
p.DoArray(tags);
p.DoArray(plru);
p.DoArray(valid);
p.DoArray(addrs);
p.DoArray(wrote);
if (p.IsReadMode())
{
// Recompute lookup tables
for (u32 set = 0; set < ICACHE_SETS; set++)
for (u32 set = 0; set < CACHE_SETS; set++)
{
for (u32 way = 0; way < ICACHE_WAYS; way++)
for (u32 way = 0; way < CACHE_WAYS; way++)
{
if ((valid[set] & (1 << way)) != 0)
{
const u32 addr = (tags[set][way] << 12) | (set << 5);
if (addr & ICACHE_VMEM_BIT)
if (addr & CACHE_VMEM_BIT)
lookup_table_vmem[(addr >> 5) & 0xfffff] = way;
else if (addr & ICACHE_EXRAM_BIT)
else if (addr & CACHE_EXRAM_BIT)
lookup_table_ex[(addr >> 5) & 0x1fffff] = way;
else
lookup_table[(addr >> 5) & 0xfffff] = way;
@ -257,6 +395,29 @@ void InstructionCache::DoState(PointerWrap& p)
}
}
u32 InstructionCache::ReadInstruction(u32 addr)
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
if (!HID0.ICE || m_disable_icache) // instruction cache is disabled
return memory.Read_U32(addr);
u32 value;
Read(addr, &value, sizeof(value), HID0.ILOCK);
return Common::swap32(value);
}
void InstructionCache::Invalidate(u32 addr)
{
if (!HID0.ICE || m_disable_icache)
return;
Cache::Invalidate(addr);
JitInterface::InvalidateICacheLine(addr);
}
void InstructionCache::RefreshConfig()
{
m_disable_icache = Config::Get(Config::MAIN_DISABLE_ICACHE);

View File

@ -12,20 +12,22 @@ class PointerWrap;
namespace PowerPC
{
constexpr u32 ICACHE_SETS = 128;
constexpr u32 ICACHE_WAYS = 8;
constexpr u32 CACHE_SETS = 128;
constexpr u32 CACHE_WAYS = 8;
// size of an instruction cache block in words
constexpr u32 ICACHE_BLOCK_SIZE = 8;
constexpr u32 CACHE_BLOCK_SIZE = 8;
constexpr u32 ICACHE_EXRAM_BIT = 0x10000000;
constexpr u32 ICACHE_VMEM_BIT = 0x20000000;
constexpr u32 CACHE_EXRAM_BIT = 0x10000000;
constexpr u32 CACHE_VMEM_BIT = 0x20000000;
struct InstructionCache
struct Cache
{
std::array<std::array<std::array<u32, ICACHE_BLOCK_SIZE>, ICACHE_WAYS>, ICACHE_SETS> data{};
std::array<std::array<u32, ICACHE_WAYS>, ICACHE_SETS> tags{};
std::array<u32, ICACHE_SETS> plru{};
std::array<u32, ICACHE_SETS> valid{};
std::array<std::array<std::array<u32, CACHE_BLOCK_SIZE>, CACHE_WAYS>, CACHE_SETS> data{};
std::array<std::array<u32, CACHE_WAYS>, CACHE_SETS> tags{};
std::array<u32, CACHE_SETS> plru{};
std::array<u32, CACHE_SETS> valid{};
std::array<std::array<u32, CACHE_WAYS>, CACHE_SETS> addrs{};
std::array<u32, CACHE_SETS> wrote{};
// Note: This is only for performance purposes; this same data could be computed at runtime
// from the tags and valid fields (and that's how it's done on the actual cache)
@ -33,16 +35,36 @@ struct InstructionCache
std::array<u8, 1 << 21> lookup_table_ex{};
std::array<u8, 1 << 20> lookup_table_vmem{};
bool m_disable_icache = false;
void Store(u32 addr);
void Invalidate(u32 addr);
void Flush(u32 addr);
void Touch(u32 addr, bool store);
void FlushAll();
std::pair<u32, u32> GetCache(u32 addr, bool locked);
void Read(u32 addr, void* buffer, u32 len, bool locked);
void Write(u32 addr, const void* buffer, u32 len, bool locked);
void Init();
void Reset();
void DoState(PointerWrap& p);
};
struct InstructionCache : public Cache
{
std::optional<size_t> m_config_callback_id = std::nullopt;
bool m_disable_icache = false;
InstructionCache() = default;
~InstructionCache();
u32 ReadInstruction(u32 addr);
void Invalidate(u32 addr);
void Init();
void Reset();
void DoState(PointerWrap& p);
void RefreshConfig();
};
} // namespace PowerPC

View File

@ -132,9 +132,16 @@ void DoState(PointerWrap& p)
p.Do(ppcState.reserve_address);
ppcState.iCache.DoState(p);
ppcState.dCache.DoState(p);
if (p.IsReadMode())
{
if (!ppcState.m_enable_dcache)
{
INFO_LOG_FMT(POWERPC, "Flushing data cache");
ppcState.dCache.FlushAll();
}
RoundingModeUpdated();
IBATUpdated();
DBATUpdated();
@ -266,6 +273,16 @@ void Init(CPUCore cpu_core)
InitializeCPUCore(cpu_core);
ppcState.iCache.Init();
ppcState.dCache.Init();
if (Config::Get(Config::MAIN_ACCURATE_CPU_CACHE))
{
ppcState.m_enable_dcache = true;
}
else
{
ppcState.m_enable_dcache = false;
}
if (Config::Get(Config::MAIN_ENABLE_DEBUGGING))
breakpoints.ClearAllTemporary();
@ -279,6 +296,7 @@ void Reset()
ResetRegisters();
ppcState.iCache.Reset();
ppcState.dCache.Reset();
}
void ScheduleInvalidateCacheThreadSafe(u32 address)

View File

@ -172,6 +172,8 @@ struct PowerPCState
u32 pagetable_hashmask = 0;
InstructionCache iCache;
bool m_enable_dcache = false;
Cache dCache;
// Reservation monitor for lwarx and its friend stwcxd.
bool reserve;

View File

@ -95,7 +95,7 @@ static size_t s_state_writes_in_queue;
static std::condition_variable s_state_write_queue_is_empty;
// Don't forget to increase this after doing changes on the savestate system
constexpr u32 STATE_VERSION = 156; // Last changed in PR 11184
constexpr u32 STATE_VERSION = 157; // Last changed in PR 11183
// Maps savestate versions to Dolphin versions.
// Versions after 42 don't need to be added to this list,
@ -223,14 +223,18 @@ static void DoState(PointerWrap& p)
g_video_backend->DoState(p);
p.DoMarker("video_backend");
PowerPC::DoState(p);
p.DoMarker("PowerPC");
// CoreTiming needs to be restored before restoring Hardware because
// the controller code might need to schedule an event if the controller has changed.
system.GetCoreTiming().DoState(p);
p.DoMarker("CoreTiming");
// HW needs to be restored before PowerPC because the data cache might need to be flushed.
HW::DoState(p);
p.DoMarker("HW");
PowerPC::DoState(p);
p.DoMarker("PowerPC");
if (SConfig::GetInstance().bWii)
Wiimote::DoState(p);
p.DoMarker("Wiimote");

View File

@ -74,6 +74,12 @@ void AdvancedPane::CreateLayout()
"affect performance.\nThe performance impact is the same as having Enable MMU on."));
cpu_options_group_layout->addWidget(m_pause_on_panic_checkbox);
m_accurate_cpu_cache_checkbox = new QCheckBox(tr("Enable Write-Back Cache (slow)"));
m_accurate_cpu_cache_checkbox->setToolTip(
tr("Enables emulation of the CPU write-back cache.\nEnabling will have a significant impact "
"on performance.\nThis should be left disabled unless absolutely needed."));
cpu_options_group_layout->addWidget(m_accurate_cpu_cache_checkbox);
auto* clock_override = new QGroupBox(tr("Clock Override"));
auto* clock_override_layout = new QVBoxLayout();
clock_override->setLayout(clock_override_layout);
@ -189,6 +195,9 @@ void AdvancedPane::ConnectLayout()
connect(m_pause_on_panic_checkbox, &QCheckBox::toggled, this,
[](bool checked) { Config::SetBaseOrCurrent(Config::MAIN_PAUSE_ON_PANIC, checked); });
connect(m_accurate_cpu_cache_checkbox, &QCheckBox::toggled, this,
[](bool checked) { Config::SetBaseOrCurrent(Config::MAIN_ACCURATE_CPU_CACHE, checked); });
m_cpu_clock_override_checkbox->setChecked(Config::Get(Config::MAIN_OVERCLOCK_ENABLE));
connect(m_cpu_clock_override_checkbox, &QCheckBox::toggled, [this](bool enable_clock_override) {
Config::SetBaseOrCurrent(Config::MAIN_OVERCLOCK_ENABLE, enable_clock_override);
@ -258,6 +267,9 @@ void AdvancedPane::Update()
m_pause_on_panic_checkbox->setChecked(Config::Get(Config::MAIN_PAUSE_ON_PANIC));
m_pause_on_panic_checkbox->setEnabled(!running);
m_accurate_cpu_cache_checkbox->setChecked(Config::Get(Config::MAIN_ACCURATE_CPU_CACHE));
m_accurate_cpu_cache_checkbox->setEnabled(!running);
QFont bf = font();
bf.setBold(Config::GetActiveLayerForConfig(Config::MAIN_OVERCLOCK_ENABLE) !=
Config::LayerType::Base);

View File

@ -33,6 +33,7 @@ private:
QComboBox* m_cpu_emulation_engine_combobox;
QCheckBox* m_enable_mmu_checkbox;
QCheckBox* m_pause_on_panic_checkbox;
QCheckBox* m_accurate_cpu_cache_checkbox;
QCheckBox* m_cpu_clock_override_checkbox;
QSlider* m_cpu_clock_override_slider;
QLabel* m_cpu_clock_override_slider_label;