CPU: Implement instruction cache simulation
Implemented for all execution modes. Disabled by default in the cached interpreter and recompiler, always enabled in the pure interpreter.
This commit is contained in:
parent
efc00a2d0e
commit
19d6037b99
229
src/core/bus.cpp
229
src/core/bus.cpp
|
@ -742,10 +742,153 @@ ALWAYS_INLINE static TickCount DoDMAAccess(u32 offset, u32& value)
|
|||
|
||||
namespace CPU {
|
||||
|
||||
template<bool add_ticks, bool icache_read = false, u32 word_count = 1>
|
||||
ALWAYS_INLINE_RELEASE void DoInstructionRead(PhysicalMemoryAddress address, void* data)
|
||||
{
|
||||
using namespace Bus;
|
||||
|
||||
address &= PHYSICAL_MEMORY_ADDRESS_MASK;
|
||||
|
||||
if (address < RAM_MIRROR_END)
|
||||
{
|
||||
std::memcpy(data, &g_ram[address & RAM_MASK], sizeof(u32) * word_count);
|
||||
if constexpr (add_ticks)
|
||||
g_state.pending_ticks += (icache_read ? 1 : 4) * word_count;
|
||||
}
|
||||
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
|
||||
{
|
||||
std::memcpy(data, &g_bios[(address - BIOS_BASE) & BIOS_MASK], sizeof(u32));
|
||||
if constexpr (add_ticks)
|
||||
g_state.pending_ticks += m_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)] * word_count;
|
||||
}
|
||||
else
|
||||
{
|
||||
CPU::RaiseException(address, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0));
|
||||
std::memset(data, 0, sizeof(u32) * word_count);
|
||||
}
|
||||
}
|
||||
|
||||
TickCount GetInstructionReadTicks(VirtualMemoryAddress address)
|
||||
{
|
||||
using namespace Bus;
|
||||
|
||||
address &= PHYSICAL_MEMORY_ADDRESS_MASK;
|
||||
|
||||
if (address < RAM_MIRROR_END)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
|
||||
{
|
||||
return m_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)];
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
TickCount GetICacheFillTicks(VirtualMemoryAddress address)
|
||||
{
|
||||
using namespace Bus;
|
||||
|
||||
address &= PHYSICAL_MEMORY_ADDRESS_MASK;
|
||||
|
||||
if (address < RAM_MIRROR_END)
|
||||
{
|
||||
return 1 * (ICACHE_LINE_SIZE / sizeof(u32));
|
||||
}
|
||||
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
|
||||
{
|
||||
return m_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)] * (ICACHE_LINE_SIZE / sizeof(u32));
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks)
|
||||
{
|
||||
VirtualMemoryAddress current_pc = g_state.regs.pc & ICACHE_TAG_ADDRESS_MASK;
|
||||
if (IsCachedAddress(current_pc))
|
||||
{
|
||||
TickCount ticks = 0;
|
||||
TickCount cached_ticks_per_line = GetICacheFillTicks(current_pc);
|
||||
for (u32 i = 0; i < line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
||||
{
|
||||
const u32 line = GetICacheLine(current_pc);
|
||||
if (g_state.icache_tags[line] != current_pc)
|
||||
{
|
||||
g_state.icache_tags[line] = current_pc;
|
||||
ticks += cached_ticks_per_line;
|
||||
}
|
||||
}
|
||||
|
||||
g_state.pending_ticks += ticks;
|
||||
}
|
||||
else
|
||||
{
|
||||
g_state.pending_ticks += uncached_ticks;
|
||||
}
|
||||
}
|
||||
|
||||
u32 FillICache(VirtualMemoryAddress address)
|
||||
{
|
||||
const u32 line = GetICacheLine(address);
|
||||
g_state.icache_tags[line] = GetICacheTagForAddress(address);
|
||||
u8* line_data = &g_state.icache_data[line * ICACHE_LINE_SIZE];
|
||||
DoInstructionRead<true, true, 4>(address & ~(ICACHE_LINE_SIZE - 1u), line_data);
|
||||
|
||||
const u32 offset = GetICacheLineOffset(address);
|
||||
u32 result;
|
||||
std::memcpy(&result, &line_data[offset], sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
void ClearICache()
|
||||
{
|
||||
std::memset(g_state.icache_data.data(), 0, ICACHE_SIZE);
|
||||
g_state.icache_tags.fill(ICACHE_INVALD_BIT | ICACHE_DISABLED_BIT);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE_RELEASE static u32 ReadICache(VirtualMemoryAddress address)
|
||||
{
|
||||
const u32 line = GetICacheLine(address);
|
||||
const u8* line_data = &g_state.icache_data[line * ICACHE_LINE_SIZE];
|
||||
const u32 offset = GetICacheLineOffset(address);
|
||||
u32 result;
|
||||
std::memcpy(&result, &line_data[offset], sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE_RELEASE static void WriteICache(VirtualMemoryAddress address, u32 value)
|
||||
{
|
||||
const u32 line = GetICacheLine(address);
|
||||
const u32 offset = GetICacheLineOffset(address);
|
||||
g_state.icache_tags[line] = GetICacheTagForAddress(address) | ICACHE_INVALD_BIT;
|
||||
std::memcpy(&g_state.icache_data[line * ICACHE_LINE_SIZE + offset], &value, sizeof(value));
|
||||
}
|
||||
|
||||
static void WriteCacheControl(u32 value)
|
||||
{
|
||||
Log_WarningPrintf("Cache control <- 0x%08X", value);
|
||||
g_state.cache_control = value;
|
||||
|
||||
CacheControl changed_bits{g_state.cache_control.bits ^ value};
|
||||
g_state.cache_control.bits = value;
|
||||
if (changed_bits.icache_enable)
|
||||
{
|
||||
if (g_state.cache_control.icache_enable)
|
||||
{
|
||||
for (u32 i = 0; i < ICACHE_LINES; i++)
|
||||
g_state.icache_tags[i] &= ~ICACHE_DISABLED_BIT;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (u32 i = 0; i < ICACHE_LINES; i++)
|
||||
g_state.icache_tags[i] |= ICACHE_DISABLED_BIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<MemoryAccessType type, MemoryAccessSize size>
|
||||
|
@ -797,7 +940,10 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32&
|
|||
if constexpr (type == MemoryAccessType::Write)
|
||||
{
|
||||
if (g_state.cop0_regs.sr.Isc)
|
||||
{
|
||||
WriteICache(address, value);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
address &= PHYSICAL_MEMORY_ADDRESS_MASK;
|
||||
|
@ -829,7 +975,7 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32&
|
|||
if (address == 0xFFFE0130)
|
||||
{
|
||||
if constexpr (type == MemoryAccessType::Read)
|
||||
value = g_state.cache_control;
|
||||
value = g_state.cache_control.bits;
|
||||
else
|
||||
WriteCacheControl(value);
|
||||
|
||||
|
@ -849,6 +995,10 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32&
|
|||
{
|
||||
return DoRAMAccess<type, size>(address, value);
|
||||
}
|
||||
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
|
||||
{
|
||||
return DoBIOSAccess<type, size>(static_cast<u32>(address - BIOS_BASE), value);
|
||||
}
|
||||
else if (address < EXP1_BASE)
|
||||
{
|
||||
return DoInvalidAccess(type, size, address, value);
|
||||
|
@ -921,14 +1071,6 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32&
|
|||
{
|
||||
return DoEXP2Access<type, size>(address & EXP2_MASK, value);
|
||||
}
|
||||
else if (address < BIOS_BASE)
|
||||
{
|
||||
return DoInvalidAccess(type, size, address, value);
|
||||
}
|
||||
else if (address < (BIOS_BASE + BIOS_SIZE))
|
||||
{
|
||||
return DoBIOSAccess<type, size>(static_cast<u32>(address - BIOS_BASE), value);
|
||||
}
|
||||
else
|
||||
{
|
||||
return DoInvalidAccess(type, size, address, value);
|
||||
|
@ -961,12 +1103,45 @@ static bool DoAlignmentCheck(VirtualMemoryAddress address)
|
|||
bool FetchInstruction()
|
||||
{
|
||||
DebugAssert(Common::IsAlignedPow2(g_state.regs.npc, 4));
|
||||
if (DoMemoryAccess<MemoryAccessType::Read, MemoryAccessSize::Word>(g_state.regs.npc, g_state.next_instruction.bits) <
|
||||
0)
|
||||
|
||||
using namespace Bus;
|
||||
|
||||
PhysicalMemoryAddress address = g_state.regs.npc;
|
||||
switch (address >> 29)
|
||||
{
|
||||
// Bus errors don't set BadVaddr.
|
||||
RaiseException(g_state.regs.npc, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0));
|
||||
return false;
|
||||
case 0x00: // KUSEG 0M-512M
|
||||
case 0x04: // KSEG0 - physical memory cached
|
||||
{
|
||||
#if 0
|
||||
// TODO: icache
|
||||
TickCount cycles;
|
||||
DoInstructionRead(address, cycles, g_state.next_instruction.bits);
|
||||
#else
|
||||
if (CompareICacheTag(address))
|
||||
g_state.next_instruction.bits = ReadICache(address);
|
||||
else
|
||||
g_state.next_instruction.bits = FillICache(address);
|
||||
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x05: // KSEG1 - physical memory uncached
|
||||
{
|
||||
DoInstructionRead<true, false, 1>(address, &g_state.next_instruction.bits);
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x01: // KUSEG 512M-1024M
|
||||
case 0x02: // KUSEG 1024M-1536M
|
||||
case 0x03: // KUSEG 1536M-2048M
|
||||
case 0x06: // KSEG2
|
||||
case 0x07: // KSEG2
|
||||
default:
|
||||
{
|
||||
CPU::RaiseException(address, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
g_state.regs.pc = g_state.regs.npc;
|
||||
|
@ -974,6 +1149,30 @@ bool FetchInstruction()
|
|||
return true;
|
||||
}
|
||||
|
||||
bool SafeReadInstruction(VirtualMemoryAddress addr, u32* value)
|
||||
{
|
||||
switch (addr >> 29)
|
||||
{
|
||||
case 0x00: // KUSEG 0M-512M
|
||||
case 0x04: // KSEG0 - physical memory cached
|
||||
case 0x05: // KSEG1 - physical memory uncached
|
||||
{
|
||||
DoInstructionRead<false, false, 1>(addr, value);
|
||||
return true;
|
||||
}
|
||||
|
||||
case 0x01: // KUSEG 512M-1024M
|
||||
case 0x02: // KUSEG 1024M-1536M
|
||||
case 0x03: // KUSEG 1536M-2048M
|
||||
case 0x06: // KSEG2
|
||||
case 0x07: // KSEG2
|
||||
default:
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ReadMemoryByte(VirtualMemoryAddress addr, u8* value)
|
||||
{
|
||||
u32 temp = 0;
|
||||
|
|
|
@ -78,41 +78,6 @@ extern std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits;
|
|||
extern u8 g_ram[RAM_SIZE]; // 2MB RAM
|
||||
extern u8 g_bios[BIOS_SIZE]; // 512K BIOS ROM
|
||||
|
||||
/// Returns the address which should be used for code caching (i.e. removes mirrors).
|
||||
ALWAYS_INLINE PhysicalMemoryAddress UnmirrorAddress(PhysicalMemoryAddress address)
|
||||
{
|
||||
// RAM
|
||||
if (address < 0x800000)
|
||||
return address & UINT32_C(0x1FFFFF);
|
||||
else
|
||||
return address;
|
||||
}
|
||||
|
||||
/// Returns true if the address specified is cacheable (RAM or BIOS).
|
||||
ALWAYS_INLINE bool IsCacheableAddress(PhysicalMemoryAddress address)
|
||||
{
|
||||
return (address < RAM_MIRROR_END) || (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE));
|
||||
}
|
||||
|
||||
/// Reads a cachable address (RAM or BIOS).
|
||||
ALWAYS_INLINE u32 ReadCacheableAddress(PhysicalMemoryAddress address)
|
||||
{
|
||||
u32 value;
|
||||
if (address < RAM_MIRROR_END)
|
||||
{
|
||||
std::memcpy(&value, &g_ram[address & RAM_MASK], sizeof(value));
|
||||
return value;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::memcpy(&value, &g_bios[address & BIOS_MASK], sizeof(value));
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the address specified is writable (RAM).
|
||||
ALWAYS_INLINE bool IsRAMAddress(PhysicalMemoryAddress address) { return address < RAM_MIRROR_END; }
|
||||
|
||||
/// Flags a RAM region as code, so we know when to invalidate blocks.
|
||||
ALWAYS_INLINE void SetRAMCodePage(u32 index) { m_ram_code_bits[index] = true; }
|
||||
|
||||
|
|
|
@ -139,8 +139,7 @@ static void ExecuteImpl()
|
|||
{
|
||||
if (HasPendingInterrupt())
|
||||
{
|
||||
// TODO: Fill in m_next_instruction...
|
||||
SafeReadMemoryWord(g_state.regs.pc, &g_state.next_instruction.bits);
|
||||
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
|
||||
DispatchInterrupt();
|
||||
next_block_key = GetNextBlockKey();
|
||||
}
|
||||
|
@ -165,6 +164,9 @@ static void ExecuteImpl()
|
|||
LogCurrentState();
|
||||
#endif
|
||||
|
||||
if (g_settings.cpu_recompiler_icache)
|
||||
CheckAndUpdateICacheTags(block->icache_line_count, block->uncached_fetch_ticks);
|
||||
|
||||
InterpretCachedBlock<pgxp_mode>(*block);
|
||||
|
||||
if (g_state.pending_ticks >= g_state.downcount)
|
||||
|
@ -247,7 +249,7 @@ void ExecuteRecompiler()
|
|||
{
|
||||
if (HasPendingInterrupt())
|
||||
{
|
||||
SafeReadMemoryWord(g_state.regs.pc, &g_state.next_instruction.bits);
|
||||
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
|
||||
DispatchInterrupt();
|
||||
}
|
||||
|
||||
|
@ -351,7 +353,8 @@ bool RevalidateBlock(CodeBlock* block)
|
|||
{
|
||||
for (const CodeBlockInstruction& cbi : block->instructions)
|
||||
{
|
||||
u32 new_code = Bus::ReadCacheableAddress(cbi.pc & PHYSICAL_MEMORY_ADDRESS_MASK);
|
||||
u32 new_code = 0;
|
||||
SafeReadInstruction(cbi.pc, &new_code);
|
||||
if (cbi.instruction.bits != new_code)
|
||||
{
|
||||
Log_DebugPrintf("Block 0x%08X changed at PC 0x%08X - %08X to %08X - recompiling.", block->GetPC(), cbi.pc,
|
||||
|
@ -395,16 +398,12 @@ bool CompileBlock(CodeBlock* block)
|
|||
__debugbreak();
|
||||
#endif
|
||||
|
||||
u32 last_cache_line = ICACHE_LINES;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
CodeBlockInstruction cbi = {};
|
||||
|
||||
const PhysicalMemoryAddress phys_addr = pc & PHYSICAL_MEMORY_ADDRESS_MASK;
|
||||
if (!Bus::IsCacheableAddress(phys_addr))
|
||||
break;
|
||||
|
||||
cbi.instruction.bits = Bus::ReadCacheableAddress(phys_addr);
|
||||
if (!IsInvalidInstruction(cbi.instruction))
|
||||
if (!SafeReadInstruction(pc, &cbi.instruction.bits) || !IsInvalidInstruction(cbi.instruction))
|
||||
break;
|
||||
|
||||
cbi.pc = pc;
|
||||
|
@ -416,6 +415,18 @@ bool CompileBlock(CodeBlock* block)
|
|||
cbi.has_load_delay = InstructionHasLoadDelay(cbi.instruction);
|
||||
cbi.can_trap = CanInstructionTrap(cbi.instruction, InUserMode());
|
||||
|
||||
if (g_settings.cpu_recompiler_icache)
|
||||
{
|
||||
const u32 icache_line = GetICacheLine(pc);
|
||||
if (icache_line != last_cache_line)
|
||||
{
|
||||
block->icache_line_count++;
|
||||
block->icache_line_count = GetICacheFillTicks(pc);
|
||||
last_cache_line = icache_line;
|
||||
}
|
||||
block->uncached_fetch_ticks += GetInstructionReadTicks(pc);
|
||||
}
|
||||
|
||||
// instruction is decoded now
|
||||
block->instructions.push_back(cbi);
|
||||
pc += sizeof(cbi.instruction.bits);
|
||||
|
|
|
@ -61,6 +61,8 @@ struct CodeBlock
|
|||
std::vector<CodeBlock*> link_predecessors;
|
||||
std::vector<CodeBlock*> link_successors;
|
||||
|
||||
TickCount uncached_fetch_ticks = 0;
|
||||
u32 icache_line_count = 0;
|
||||
bool invalidated = false;
|
||||
|
||||
const u32 GetPC() const { return key.GetPC(); }
|
||||
|
|
|
@ -80,6 +80,8 @@ void Reset()
|
|||
g_state.cop0_regs.sr.bits = 0;
|
||||
g_state.cop0_regs.cause.bits = 0;
|
||||
|
||||
ClearICache();
|
||||
|
||||
GTE::Reset();
|
||||
|
||||
SetPC(RESET_VECTOR);
|
||||
|
@ -117,14 +119,17 @@ bool DoState(StateWrapper& sw)
|
|||
sw.Do(&g_state.load_delay_value);
|
||||
sw.Do(&g_state.next_load_delay_reg);
|
||||
sw.Do(&g_state.next_load_delay_value);
|
||||
sw.Do(&g_state.cache_control);
|
||||
sw.Do(&g_state.cache_control.bits);
|
||||
sw.DoBytes(g_state.dcache.data(), g_state.dcache.size());
|
||||
|
||||
if (!GTE::DoState(sw))
|
||||
return false;
|
||||
|
||||
if (sw.IsReading())
|
||||
{
|
||||
ClearICache();
|
||||
PGXP::Initialize();
|
||||
}
|
||||
|
||||
return !sw.HasError();
|
||||
}
|
||||
|
@ -1416,7 +1421,6 @@ void InterpretCachedBlock(const CodeBlock& block)
|
|||
{
|
||||
// set up the state so we've already fetched the instruction
|
||||
DebugAssert(g_state.regs.pc == block.GetPC());
|
||||
|
||||
g_state.regs.npc = block.GetPC() + 4;
|
||||
|
||||
for (const CodeBlockInstruction& cbi : block.instructions)
|
||||
|
|
|
@ -19,7 +19,32 @@ enum : PhysicalMemoryAddress
|
|||
DCACHE_LOCATION = UINT32_C(0x1F800000),
|
||||
DCACHE_LOCATION_MASK = UINT32_C(0xFFFFFC00),
|
||||
DCACHE_OFFSET_MASK = UINT32_C(0x000003FF),
|
||||
DCACHE_SIZE = UINT32_C(0x00000400)
|
||||
DCACHE_SIZE = UINT32_C(0x00000400),
|
||||
ICACHE_SIZE = UINT32_C(0x00001000),
|
||||
ICACHE_SLOTS = ICACHE_SIZE / sizeof(u32),
|
||||
ICACHE_LINE_SIZE = 16,
|
||||
ICACHE_LINES = ICACHE_SIZE / ICACHE_LINE_SIZE,
|
||||
ICACHE_SLOTS_PER_LINE = ICACHE_SLOTS / ICACHE_LINES,
|
||||
ICACHE_TAG_ADDRESS_MASK = 0xFFFFFFF0u
|
||||
};
|
||||
|
||||
enum : u32
|
||||
{
|
||||
ICACHE_DISABLED_BIT = 0x01,
|
||||
ICACHE_INVALD_BIT = 0x02,
|
||||
};
|
||||
|
||||
union CacheControl
|
||||
{
|
||||
u32 bits;
|
||||
|
||||
BitField<u32, bool, 0, 1> lock_mode;
|
||||
BitField<u32, bool, 1, 1> invalidate_mode;
|
||||
BitField<u32, bool, 2, 1> tag_test_mode;
|
||||
BitField<u32, bool, 3, 1> dcache_scratchpad;
|
||||
BitField<u32, bool, 7, 1> dcache_enable;
|
||||
BitField<u32, u8, 8, 2> icache_fill_size; // actually dcache? icache always fills to 16 bytes
|
||||
BitField<u32, bool, 11, 1> icache_enable;
|
||||
};
|
||||
|
||||
struct State
|
||||
|
@ -49,13 +74,15 @@ struct State
|
|||
Reg next_load_delay_reg = Reg::count;
|
||||
u32 next_load_delay_value = 0;
|
||||
|
||||
u32 cache_control = 0;
|
||||
CacheControl cache_control{ 0 };
|
||||
|
||||
// GTE registers are stored here so we can access them on ARM with a single instruction
|
||||
GTE::Regs gte_regs = {};
|
||||
|
||||
// data cache (used as scratchpad)
|
||||
std::array<u8, DCACHE_SIZE> dcache = {};
|
||||
std::array<u32, ICACHE_LINES> icache_tags = {};
|
||||
std::array<u8, ICACHE_SIZE> icache_data = {};
|
||||
};
|
||||
|
||||
extern State g_state;
|
||||
|
@ -64,6 +91,7 @@ void Initialize();
|
|||
void Shutdown();
|
||||
void Reset();
|
||||
bool DoState(StateWrapper& sw);
|
||||
void ClearICache();
|
||||
|
||||
/// Executes interpreter loop.
|
||||
void Execute();
|
||||
|
|
|
@ -34,8 +34,38 @@ ALWAYS_INLINE static void DispatchInterrupt()
|
|||
g_state.regs.pc);
|
||||
}
|
||||
|
||||
// icache stuff
|
||||
ALWAYS_INLINE bool IsCachedAddress(VirtualMemoryAddress address)
|
||||
{
|
||||
// KUSEG, KSEG0
|
||||
return (address >> 29) <= 4;
|
||||
}
|
||||
ALWAYS_INLINE u32 GetICacheLine(VirtualMemoryAddress address)
|
||||
{
|
||||
return ((address >> 4) & 0xFFu);
|
||||
}
|
||||
ALWAYS_INLINE u32 GetICacheLineOffset(VirtualMemoryAddress address)
|
||||
{
|
||||
return (address & (ICACHE_LINE_SIZE - 1));
|
||||
}
|
||||
ALWAYS_INLINE u32 GetICacheTagForAddress(VirtualMemoryAddress address)
|
||||
{
|
||||
return (address & ICACHE_TAG_ADDRESS_MASK);
|
||||
}
|
||||
ALWAYS_INLINE bool CompareICacheTag(VirtualMemoryAddress address)
|
||||
{
|
||||
const u32 line = GetICacheLine(address);
|
||||
return (g_state.icache_tags[line] == GetICacheTagForAddress(address));
|
||||
}
|
||||
|
||||
TickCount GetInstructionReadTicks(VirtualMemoryAddress address);
|
||||
TickCount GetICacheFillTicks(VirtualMemoryAddress address);
|
||||
u32 FillICache(VirtualMemoryAddress address);
|
||||
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks);
|
||||
|
||||
// defined in cpu_memory.cpp - memory access functions which return false if an exception was thrown.
|
||||
bool FetchInstruction();
|
||||
bool SafeReadInstruction(VirtualMemoryAddress addr, u32* value);
|
||||
bool ReadMemoryByte(VirtualMemoryAddress addr, u8* value);
|
||||
bool ReadMemoryHalfWord(VirtualMemoryAddress addr, u16* value);
|
||||
bool ReadMemoryWord(VirtualMemoryAddress addr, u32* value);
|
||||
|
|
|
@ -34,7 +34,7 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin
|
|||
const CodeBlockInstruction* cbi = m_block_start;
|
||||
while (cbi != m_block_end)
|
||||
{
|
||||
#ifndef Y_BUILD_CONFIG_RELEASE
|
||||
#ifdef _DEBUG
|
||||
SmallString disasm;
|
||||
DisassembleInstruction(&disasm, cbi->pc, cbi->instruction.bits, nullptr);
|
||||
Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray());
|
||||
|
@ -840,6 +840,9 @@ void CodeGenerator::BlockPrologue()
|
|||
{
|
||||
EmitStoreCPUStructField(offsetof(State, exception_raised), Value::FromConstantU8(0));
|
||||
|
||||
if (m_block->uncached_fetch_ticks > 0)
|
||||
EmitICacheCheckAndUpdate();
|
||||
|
||||
// we don't know the state of the last block, so assume load delays might be in progress
|
||||
// TODO: Pull load delay into register cache
|
||||
m_current_instruction_in_branch_delay_slot_dirty = true;
|
||||
|
|
|
@ -61,6 +61,7 @@ public:
|
|||
void EmitFlushInterpreterLoadDelay();
|
||||
void EmitMoveNextInterpreterLoadDelay();
|
||||
void EmitCancelInterpreterLoadDelayForReg(Reg reg);
|
||||
void EmitICacheCheckAndUpdate();
|
||||
void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset);
|
||||
void EmitStoreCPUStructField(u32 offset, const Value& value);
|
||||
void EmitAddCPUStructField(u32 offset, const Value& value);
|
||||
|
|
|
@ -22,4 +22,48 @@ void CodeGenerator::EmitStoreInterpreterLoadDelay(Reg reg, const Value& value)
|
|||
m_load_delay_dirty = true;
|
||||
}
|
||||
|
||||
#ifndef CPU_X64
|
||||
|
||||
void CodeGenerator::EmitICacheCheckAndUpdate()
|
||||
{
|
||||
Value pc = CalculatePC();
|
||||
Value temp = m_register_cache.AllocateScratch(RegSize_32);
|
||||
m_register_cache.InhibitAllocation();
|
||||
|
||||
EmitShr(temp.GetHostRegister(), pc.GetHostRegister(), RegSize_32, Value::FromConstantU32(29));
|
||||
LabelType is_cached;
|
||||
LabelType ready_to_execute;
|
||||
EmitConditionalBranch(Condition::LessEqual, false, temp.GetHostRegister(), Value::FromConstantU32(4), &is_cached);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
|
||||
EmitBranch(&ready_to_execute);
|
||||
EmitBindLabel(&is_cached);
|
||||
|
||||
// cached path
|
||||
EmitAnd(pc.GetHostRegister(), pc.GetHostRegister(), Value::FromConstantU32(ICACHE_TAG_ADDRESS_MASK));
|
||||
VirtualMemoryAddress current_address = (m_block->instructions[0].pc & ICACHE_TAG_ADDRESS_MASK);
|
||||
for (u32 i = 0; i < m_block->icache_line_count; i++, current_address += ICACHE_LINE_SIZE)
|
||||
{
|
||||
const TickCount fill_ticks = GetICacheFillTicks(current_address);
|
||||
if (fill_ticks <= 0)
|
||||
continue;
|
||||
|
||||
const u32 line = GetICacheLine(current_address);
|
||||
const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32));
|
||||
LabelType cache_hit;
|
||||
|
||||
EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offset);
|
||||
EmitConditionalBranch(Condition::Equal, false, temp.GetHostRegister(), pc, &cache_hit);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(fill_ticks)));
|
||||
EmitStoreCPUStructField(offset, pc);
|
||||
EmitBindLabel(&cache_hit);
|
||||
EmitAdd(pc.GetHostRegister(), pc.GetHostRegister(), Value::FromConstantU32(ICACHE_LINE_SIZE), false);
|
||||
}
|
||||
|
||||
EmitBindLabel(&ready_to_execute);
|
||||
m_register_cache.UnunhibitAllocation();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace CPU::Recompiler
|
||||
|
|
|
@ -2187,6 +2187,52 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
|
|||
m_emit->L(skip_cancel);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitICacheCheckAndUpdate()
|
||||
{
|
||||
Value pc = CalculatePC();
|
||||
Value seg = m_register_cache.AllocateScratch(RegSize_32);
|
||||
m_register_cache.InhibitAllocation();
|
||||
|
||||
m_emit->mov(GetHostReg32(seg), GetHostReg32(pc));
|
||||
m_emit->shr(GetHostReg32(seg), 29);
|
||||
|
||||
Xbyak::Label is_cached;
|
||||
m_emit->cmp(GetHostReg32(seg), 4);
|
||||
m_emit->jle(is_cached);
|
||||
|
||||
// uncached
|
||||
Xbyak::Label done;
|
||||
m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)],
|
||||
static_cast<u32>(m_block->uncached_fetch_ticks));
|
||||
m_emit->jmp(done, Xbyak::CodeGenerator::T_NEAR);
|
||||
|
||||
// cached
|
||||
m_emit->L(is_cached);
|
||||
m_emit->and_(GetHostReg32(pc), ICACHE_TAG_ADDRESS_MASK);
|
||||
|
||||
VirtualMemoryAddress current_address = (m_block->instructions[0].pc & ICACHE_TAG_ADDRESS_MASK);
|
||||
for (u32 i = 0; i < m_block->icache_line_count; i++, current_address += ICACHE_LINE_SIZE)
|
||||
{
|
||||
const TickCount fill_ticks = GetICacheFillTicks(current_address);
|
||||
if (fill_ticks <= 0)
|
||||
continue;
|
||||
|
||||
const u32 line = GetICacheLine(current_address);
|
||||
const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32));
|
||||
Xbyak::Label cache_hit;
|
||||
|
||||
m_emit->cmp(GetHostReg32(pc), m_emit->dword[GetCPUPtrReg() + offset]);
|
||||
m_emit->je(cache_hit);
|
||||
m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], GetHostReg32(pc));
|
||||
m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], static_cast<u32>(fill_ticks));
|
||||
m_emit->L(cache_hit);
|
||||
m_emit->add(GetHostReg32(pc), ICACHE_LINE_SIZE);
|
||||
}
|
||||
|
||||
m_emit->L(done);
|
||||
m_register_cache.UnunhibitAllocation();
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
|
||||
{
|
||||
const s64 jump_distance =
|
||||
|
|
|
@ -14,6 +14,7 @@ namespace Recompiler::Thunks {
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
bool InterpretInstruction();
|
||||
bool InterpretInstructionPGXP();
|
||||
void CheckAndUpdateICache(u32 pc, u32 line_count);
|
||||
|
||||
// Memory access functions for the JIT - MSB is set on exception.
|
||||
u64 ReadMemoryByte(u32 address);
|
||||
|
|
|
@ -362,6 +362,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
|
|||
|
||||
si.SetStringValue("CPU", "ExecutionMode", Settings::GetCPUExecutionModeName(Settings::DEFAULT_CPU_EXECUTION_MODE));
|
||||
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false);
|
||||
si.SetBoolValue("CPU", "ICache", false);
|
||||
|
||||
si.SetStringValue("GPU", "Renderer", Settings::GetRendererName(Settings::DEFAULT_GPU_RENDERER));
|
||||
si.SetIntValue("GPU", "ResolutionScale", 1);
|
||||
|
@ -452,7 +453,8 @@ void HostInterface::FixIncompatibleSettings(bool display_osd_messages)
|
|||
{
|
||||
if (display_osd_messages)
|
||||
{
|
||||
AddOSDMessage(TranslateStdString("OSDMessage", "PGXP is incompatible with the software renderer, disabling PGXP."), 10.0f);
|
||||
AddOSDMessage(
|
||||
TranslateStdString("OSDMessage", "PGXP is incompatible with the software renderer, disabling PGXP."), 10.0f);
|
||||
}
|
||||
g_settings.gpu_pgxp_enable = false;
|
||||
}
|
||||
|
@ -510,6 +512,8 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
|
|||
AddFormattedOSDMessage(5.0f, "Switching to %s CPU execution mode.",
|
||||
Settings::GetCPUExecutionModeName(g_settings.cpu_execution_mode));
|
||||
CPU::CodeCache::SetUseRecompiler(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
|
||||
CPU::CodeCache::Flush();
|
||||
CPU::ClearICache();
|
||||
}
|
||||
|
||||
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler &&
|
||||
|
@ -520,6 +524,15 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
|
|||
CPU::CodeCache::Flush();
|
||||
}
|
||||
|
||||
if (g_settings.cpu_execution_mode != CPUExecutionMode::Interpreter &&
|
||||
g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache)
|
||||
{
|
||||
AddFormattedOSDMessage(5.0f, "CPU ICache %s, flushing all blocks.",
|
||||
g_settings.cpu_recompiler_icache ? "enabled" : "disabled");
|
||||
CPU::CodeCache::Flush();
|
||||
CPU::ClearICache();
|
||||
}
|
||||
|
||||
m_audio_stream->SetOutputVolume(g_settings.audio_output_muted ? 0 : g_settings.audio_output_volume);
|
||||
|
||||
if (g_settings.gpu_resolution_scale != old_settings.gpu_resolution_scale ||
|
||||
|
|
|
@ -92,6 +92,7 @@ void Settings::Load(SettingsInterface& si)
|
|||
si.GetStringValue("CPU", "ExecutionMode", GetCPUExecutionModeName(DEFAULT_CPU_EXECUTION_MODE)).c_str())
|
||||
.value_or(DEFAULT_CPU_EXECUTION_MODE);
|
||||
cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false);
|
||||
cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false);
|
||||
|
||||
gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str())
|
||||
.value_or(DEFAULT_GPU_RENDERER);
|
||||
|
@ -206,6 +207,7 @@ void Settings::Save(SettingsInterface& si) const
|
|||
|
||||
si.SetStringValue("CPU", "ExecutionMode", GetCPUExecutionModeName(cpu_execution_mode));
|
||||
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions);
|
||||
si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache);
|
||||
|
||||
si.SetStringValue("GPU", "Renderer", GetRendererName(gpu_renderer));
|
||||
si.SetStringValue("GPU", "Adapter", gpu_adapter.c_str());
|
||||
|
|
|
@ -69,6 +69,7 @@ struct Settings
|
|||
|
||||
CPUExecutionMode cpu_execution_mode = CPUExecutionMode::Interpreter;
|
||||
bool cpu_recompiler_memory_exceptions = false;
|
||||
bool cpu_recompiler_icache = false;
|
||||
|
||||
float emulation_speed = 1.0f;
|
||||
bool speed_limiter_enabled = true;
|
||||
|
|
|
@ -370,7 +370,7 @@ void LibretroHostInterface::OnSystemDestroyed()
|
|||
m_using_hardware_renderer = false;
|
||||
}
|
||||
|
||||
static std::array<retro_core_option_definition, 31> s_option_definitions = {{
|
||||
static std::array<retro_core_option_definition, 32> s_option_definitions = {{
|
||||
{"duckstation_Console.Region",
|
||||
"Console Region",
|
||||
"Determines which region/hardware to emulate. Auto-Detect will use the region of the disc inserted.",
|
||||
|
@ -406,6 +406,12 @@ static std::array<retro_core_option_definition, 31> s_option_definitions = {{
|
|||
"Which mode to use for CPU emulation. Recompiler provides the best performance.",
|
||||
{{"Interpreter", "Interpreter"}, {"CachedIntepreter", "Cached Interpreter"}, {"Recompiler", "Recompiler"}},
|
||||
"Recompiler"},
|
||||
{"duckstation_CPU.RecompilerICache",
|
||||
"CPU Recompiler ICache",
|
||||
"Determines whether the CPU's instruction cache is simulated in the recompiler. Improves accuracy at a small cost "
|
||||
"to performance. If games are running too fast, try enabling this option.",
|
||||
{{"true", "Enabled"}, {"false", "Disabled"}},
|
||||
"false"},
|
||||
{"duckstation_GPU.Renderer",
|
||||
"GPU Renderer",
|
||||
"Which renderer to use to emulate the GPU",
|
||||
|
|
|
@ -27,6 +27,8 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface,
|
|||
SettingWidgetBinder::BindWidgetToIntSetting(m_host_interface, m_ui.gpuMaxRunAhead, "Hacks", "GPUMaxRunAhead");
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.cpuRecompilerMemoryExceptions, "CPU",
|
||||
"RecompilerMemoryExceptions", false);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.cpuRecompilerICache, "CPU", "RecompilerICache",
|
||||
false);
|
||||
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showDebugMenu, "Main", "ShowDebugMenu");
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.gpuUseDebugDevice, "GPU", "UseDebugDevice");
|
||||
|
@ -38,6 +40,10 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface,
|
|||
dialog->registerWidgetHelp(m_ui.gpuUseDebugDevice, tr("Use Debug Host GPU Device"), tr("Unchecked"),
|
||||
tr("Enables the usage of debug devices and shaders for rendering APIs which support them. "
|
||||
"Should only be used when debugging the emulator."));
|
||||
dialog->registerWidgetHelp(
|
||||
m_ui.cpuRecompilerICache, tr("Enable Recompiler ICache"), tr("Unchecked"),
|
||||
tr("Determines whether the CPU's instruction cache is simulated in the recompiler. Improves accuracy at a small "
|
||||
"cost to performance. If games are running too fast, try enabling this option."));
|
||||
}
|
||||
|
||||
AdvancedSettingsWidget::~AdvancedSettingsWidget() = default;
|
||||
|
|
|
@ -184,6 +184,20 @@
|
|||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="0">
|
||||
<widget class="QCheckBox" name="cpuRecompilerMemoryExceptions">
|
||||
<property name="text">
|
||||
<string>Enable Recompiler Memory Exceptions</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="1">
|
||||
<widget class="QCheckBox" name="cpuRecompilerICache">
|
||||
<property name="text">
|
||||
<string>Enable Recompiler ICache</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="6" column="0" colspan="2">
|
||||
<widget class="QPushButton" name="resetToDefaultButton">
|
||||
<property name="text">
|
||||
|
@ -191,13 +205,6 @@
|
|||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="cpuRecompilerMemoryExceptions">
|
||||
<property name="text">
|
||||
<string>Enable Recompiler Memory Exceptions</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
|
|
|
@ -950,6 +950,11 @@ void SDLHostInterface::DrawDebugMenu()
|
|||
settings_changed |= ImGui::MenuItem("Show Timers State", nullptr, &debug_settings.show_timers_state);
|
||||
settings_changed |= ImGui::MenuItem("Show MDEC State", nullptr, &debug_settings.show_mdec_state);
|
||||
|
||||
ImGui::Separator();
|
||||
|
||||
settings_changed |= ImGui::MenuItem("Recompiler Memory Exceptions", nullptr, &m_settings_copy.cpu_recompiler_memory_exceptions);
|
||||
settings_changed |= ImGui::MenuItem("Recompiler ICache", nullptr, &m_settings_copy.cpu_recompiler_icache);
|
||||
|
||||
if (settings_changed)
|
||||
{
|
||||
// have to apply it to the copy too, otherwise it won't save
|
||||
|
|
Loading…
Reference in New Issue