CPU/Recompiler: Optimize constant reads (and some writes)
This commit is contained in:
parent
69b2c3fc2d
commit
0afdc04d88
|
@ -22,6 +22,11 @@ Log_SetChannel(Bus);
|
||||||
|
|
||||||
namespace Bus {
|
namespace Bus {
|
||||||
|
|
||||||
|
enum : TickCount
|
||||||
|
{
|
||||||
|
RAM_READ_TICKS = 4
|
||||||
|
};
|
||||||
|
|
||||||
union MEMDELAY
|
union MEMDELAY
|
||||||
{
|
{
|
||||||
u32 bits;
|
u32 bits;
|
||||||
|
@ -288,7 +293,7 @@ ALWAYS_INLINE static TickCount DoRAMAccess(u32 offset, u32& value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return (type == MemoryAccessType::Read) ? 4 : 0;
|
return (type == MemoryAccessType::Read) ? RAM_READ_TICKS : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<MemoryAccessType type, MemoryAccessSize size>
|
template<MemoryAccessType type, MemoryAccessSize size>
|
||||||
|
@ -753,7 +758,7 @@ ALWAYS_INLINE_RELEASE void DoInstructionRead(PhysicalMemoryAddress address, void
|
||||||
{
|
{
|
||||||
std::memcpy(data, &g_ram[address & RAM_MASK], sizeof(u32) * word_count);
|
std::memcpy(data, &g_ram[address & RAM_MASK], sizeof(u32) * word_count);
|
||||||
if constexpr (add_ticks)
|
if constexpr (add_ticks)
|
||||||
g_state.pending_ticks += (icache_read ? 1 : 4) * word_count;
|
g_state.pending_ticks += (icache_read ? 1 : RAM_READ_TICKS) * word_count;
|
||||||
}
|
}
|
||||||
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
|
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
|
||||||
{
|
{
|
||||||
|
@ -776,7 +781,7 @@ TickCount GetInstructionReadTicks(VirtualMemoryAddress address)
|
||||||
|
|
||||||
if (address < RAM_MIRROR_END)
|
if (address < RAM_MIRROR_END)
|
||||||
{
|
{
|
||||||
return 4;
|
return RAM_READ_TICKS;
|
||||||
}
|
}
|
||||||
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
|
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
|
||||||
{
|
{
|
||||||
|
@ -1307,6 +1312,64 @@ bool SafeWriteMemoryWord(VirtualMemoryAddress addr, u32 value)
|
||||||
return DoMemoryAccess<MemoryAccessType::Write, MemoryAccessSize::Word>(addr, value) >= 0;
|
return DoMemoryAccess<MemoryAccessType::Write, MemoryAccessSize::Word>(addr, value) >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks)
|
||||||
|
{
|
||||||
|
using namespace Bus;
|
||||||
|
|
||||||
|
const u32 seg = (address >> 29);
|
||||||
|
if (seg != 0 && seg != 4 && seg != 5)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
const PhysicalMemoryAddress paddr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
|
||||||
|
if (paddr < RAM_MIRROR_END)
|
||||||
|
{
|
||||||
|
if (read_ticks)
|
||||||
|
*read_ticks = RAM_READ_TICKS;
|
||||||
|
|
||||||
|
return &g_ram[paddr & RAM_MASK];
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
|
||||||
|
{
|
||||||
|
if (read_ticks)
|
||||||
|
*read_ticks = 0;
|
||||||
|
|
||||||
|
return &g_state.dcache[paddr & DCACHE_OFFSET_MASK];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (paddr >= BIOS_BASE && paddr < (BIOS_BASE + BIOS_SIZE))
|
||||||
|
{
|
||||||
|
if (read_ticks)
|
||||||
|
*read_ticks = m_bios_access_time[static_cast<u32>(size)];
|
||||||
|
|
||||||
|
return &g_bios[paddr & BIOS_MASK];
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size)
|
||||||
|
{
|
||||||
|
using namespace Bus;
|
||||||
|
|
||||||
|
const u32 seg = (address >> 29);
|
||||||
|
if (seg != 0 && seg != 4 && seg != 5)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
const PhysicalMemoryAddress paddr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// Not enabled until we can protect code regions.
|
||||||
|
if (paddr < RAM_MIRROR_END)
|
||||||
|
return &g_ram[paddr & RAM_MASK];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
|
||||||
|
return &g_state.dcache[paddr & DCACHE_OFFSET_MASK];
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
namespace Recompiler::Thunks {
|
namespace Recompiler::Thunks {
|
||||||
|
|
||||||
u64 ReadMemoryByte(u32 address)
|
u64 ReadMemoryByte(u32 address)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "cpu_core.h"
|
#include "cpu_core.h"
|
||||||
|
#include "bus.h"
|
||||||
|
|
||||||
namespace CPU {
|
namespace CPU {
|
||||||
|
|
||||||
|
@ -72,5 +73,7 @@ bool ReadMemoryWord(VirtualMemoryAddress addr, u32* value);
|
||||||
bool WriteMemoryByte(VirtualMemoryAddress addr, u8 value);
|
bool WriteMemoryByte(VirtualMemoryAddress addr, u8 value);
|
||||||
bool WriteMemoryHalfWord(VirtualMemoryAddress addr, u16 value);
|
bool WriteMemoryHalfWord(VirtualMemoryAddress addr, u16 value);
|
||||||
bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value);
|
bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value);
|
||||||
|
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks);
|
||||||
|
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size);
|
||||||
|
|
||||||
} // namespace CPU
|
} // namespace CPU
|
|
@ -1283,6 +1283,23 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
|
||||||
|
|
||||||
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size)
|
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size)
|
||||||
{
|
{
|
||||||
|
if (address.IsConstant())
|
||||||
|
{
|
||||||
|
TickCount read_ticks;
|
||||||
|
void* ptr = GetDirectReadMemoryPointer(
|
||||||
|
static_cast<u32>(address.constant_value),
|
||||||
|
(size == RegSize_8) ? MemoryAccessSize::Byte :
|
||||||
|
((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word),
|
||||||
|
&read_ticks);
|
||||||
|
if (ptr)
|
||||||
|
{
|
||||||
|
Value result = m_register_cache.AllocateScratch(size);
|
||||||
|
EmitLoadGlobal(result.GetHostRegister(), size, ptr);
|
||||||
|
m_delayed_cycles_add += read_ticks;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
AddPendingCycles(true);
|
AddPendingCycles(true);
|
||||||
|
|
||||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||||
|
@ -1405,6 +1422,19 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
||||||
|
|
||||||
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
|
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
|
||||||
{
|
{
|
||||||
|
if (address.IsConstant())
|
||||||
|
{
|
||||||
|
void* ptr = GetDirectWriteMemoryPointer(
|
||||||
|
static_cast<u32>(address.constant_value),
|
||||||
|
(value.size == RegSize_8) ? MemoryAccessSize::Byte :
|
||||||
|
((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word));
|
||||||
|
if (ptr)
|
||||||
|
{
|
||||||
|
EmitStoreGlobal(ptr, value);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
AddPendingCycles(true);
|
AddPendingCycles(true);
|
||||||
|
|
||||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||||
|
@ -1480,12 +1510,50 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
||||||
|
|
||||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||||
{
|
{
|
||||||
Panic("Not implemented");
|
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
|
||||||
|
switch (size)
|
||||||
|
{
|
||||||
|
case RegSize_8:
|
||||||
|
m_emit->Ldrb(GetHostReg8(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case RegSize_16:
|
||||||
|
m_emit->Ldrh(GetHostReg16(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case RegSize_32:
|
||||||
|
m_emit->Ldr(GetHostReg32(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
UnreachableCode();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value)
|
void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value)
|
||||||
{
|
{
|
||||||
Panic("Not implemented");
|
Value value_in_hr = GetValueInHostRegister(value);
|
||||||
|
|
||||||
|
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
|
||||||
|
switch (value.size)
|
||||||
|
{
|
||||||
|
case RegSize_8:
|
||||||
|
m_emit->Strb(GetHostReg8(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case RegSize_16:
|
||||||
|
m_emit->Strh(GetHostReg16(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case RegSize_32:
|
||||||
|
m_emit->Str(GetHostReg32(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
UnreachableCode();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CodeGenerator::EmitFlushInterpreterLoadDelay()
|
void CodeGenerator::EmitFlushInterpreterLoadDelay()
|
||||||
|
|
|
@ -1738,6 +1738,23 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
|
||||||
|
|
||||||
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size)
|
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size)
|
||||||
{
|
{
|
||||||
|
if (address.IsConstant())
|
||||||
|
{
|
||||||
|
TickCount read_ticks;
|
||||||
|
void* ptr = GetDirectReadMemoryPointer(
|
||||||
|
static_cast<u32>(address.constant_value),
|
||||||
|
(size == RegSize_8) ? MemoryAccessSize::Byte :
|
||||||
|
((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word),
|
||||||
|
&read_ticks);
|
||||||
|
if (ptr)
|
||||||
|
{
|
||||||
|
Value result = m_register_cache.AllocateScratch(size);
|
||||||
|
EmitLoadGlobal(result.GetHostRegister(), size, ptr);
|
||||||
|
m_delayed_cycles_add += read_ticks;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
AddPendingCycles(true);
|
AddPendingCycles(true);
|
||||||
|
|
||||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||||
|
@ -1858,6 +1875,19 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
||||||
|
|
||||||
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
|
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
|
||||||
{
|
{
|
||||||
|
if (address.IsConstant())
|
||||||
|
{
|
||||||
|
void* ptr = GetDirectWriteMemoryPointer(
|
||||||
|
static_cast<u32>(address.constant_value),
|
||||||
|
(value.size == RegSize_8) ? MemoryAccessSize::Byte :
|
||||||
|
((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word));
|
||||||
|
if (ptr)
|
||||||
|
{
|
||||||
|
EmitStoreGlobal(ptr, value);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
AddPendingCycles(true);
|
AddPendingCycles(true);
|
||||||
|
|
||||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||||
|
|
Loading…
Reference in New Issue