CPU/Recompiler: Offset LUT fastmem pointers

This commit is contained in:
Stenzek 2023-10-07 17:31:19 +10:00
parent a062b00c1a
commit 9d40164f84
No known key found for this signature in database
6 changed files with 248 additions and 464 deletions

View File

@ -141,6 +141,8 @@ static void SetRAMSize(bool enable_8mb_ram);
static std::tuple<TickCount, TickCount, TickCount> CalculateMemoryTiming(MEMDELAY mem_delay, COMDELAY common_delay); static std::tuple<TickCount, TickCount, TickCount> CalculateMemoryTiming(MEMDELAY mem_delay, COMDELAY common_delay);
static void RecalculateMemoryTimings(); static void RecalculateMemoryTimings();
static u8* GetLUTFastmemPointer(u32 address, u8* ram_ptr);
static void SetRAMPageWritable(u32 page_index, bool writable); static void SetRAMPageWritable(u32 page_index, bool writable);
static void SetHandlers(); static void SetHandlers();
@ -460,18 +462,23 @@ CPUFastmemMode Bus::GetFastmemMode()
return s_fastmem_mode; return s_fastmem_mode;
} }
void* Bus::GetFastmemBase() void* Bus::GetFastmemBase(bool isc)
{ {
#ifdef ENABLE_MMAP_FASTMEM #ifdef ENABLE_MMAP_FASTMEM
if (s_fastmem_mode == CPUFastmemMode::MMap) if (s_fastmem_mode == CPUFastmemMode::MMap)
return s_fastmem_arena.BasePointer(); return isc ? nullptr : s_fastmem_arena.BasePointer();
#endif #endif
if (s_fastmem_mode == CPUFastmemMode::LUT) if (s_fastmem_mode == CPUFastmemMode::LUT)
return reinterpret_cast<u8*>(s_fastmem_lut); return reinterpret_cast<u8*>(s_fastmem_lut + (isc ? (FASTMEM_LUT_SIZE * sizeof(void*)) : 0));
return nullptr; return nullptr;
} }
u8* Bus::GetLUTFastmemPointer(u32 address, u8* ram_ptr)
{
return ram_ptr - address;
}
void Bus::UpdateFastmemViews(CPUFastmemMode mode) void Bus::UpdateFastmemViews(CPUFastmemMode mode)
{ {
#ifndef ENABLE_MMAP_FASTMEM #ifndef ENABLE_MMAP_FASTMEM
@ -530,20 +537,20 @@ void Bus::UpdateFastmemViews(CPUFastmemMode mode)
if (!s_fastmem_lut) if (!s_fastmem_lut)
{ {
s_fastmem_lut = static_cast<u8**>(std::malloc(sizeof(u8*) * FASTMEM_LUT_SIZE)); s_fastmem_lut = static_cast<u8**>(std::malloc(sizeof(u8*) * FASTMEM_LUT_SLOTS));
Assert(s_fastmem_lut); Assert(s_fastmem_lut);
Log_InfoPrintf("Fastmem base (software): %p", s_fastmem_lut); Log_InfoPrintf("Fastmem base (software): %p", s_fastmem_lut);
} }
std::memset(s_fastmem_lut, 0, sizeof(u8*) * FASTMEM_LUT_SIZE); std::memset(s_fastmem_lut, 0, sizeof(u8*) * FASTMEM_LUT_SLOTS);
auto MapRAM = [](u32 base_address) { auto MapRAM = [](u32 base_address) {
u8* ram_ptr = g_ram + (base_address & g_ram_mask); u8* ram_ptr = g_ram + (base_address & g_ram_mask);
for (u32 address = 0; address < g_ram_size; address += FASTMEM_LUT_PAGE_SIZE) for (u32 address = 0; address < g_ram_size; address += FASTMEM_LUT_PAGE_SIZE)
{ {
const u32 lut_index = (base_address + address) >> FASTMEM_LUT_PAGE_SHIFT; const u32 lut_index = (base_address + address) >> FASTMEM_LUT_PAGE_SHIFT;
s_fastmem_lut[lut_index] = ram_ptr; s_fastmem_lut[lut_index] = GetLUTFastmemPointer(base_address + address, ram_ptr);
ram_ptr += FASTMEM_LUT_PAGE_SIZE; ram_ptr += FASTMEM_LUT_PAGE_SIZE;
} }
}; };

View File

@ -96,7 +96,8 @@ enum : u32
FASTMEM_LUT_PAGE_SIZE = 4096, FASTMEM_LUT_PAGE_SIZE = 4096,
FASTMEM_LUT_PAGE_MASK = FASTMEM_LUT_PAGE_SIZE - 1, FASTMEM_LUT_PAGE_MASK = FASTMEM_LUT_PAGE_SIZE - 1,
FASTMEM_LUT_PAGE_SHIFT = 12, FASTMEM_LUT_PAGE_SHIFT = 12,
FASTMEM_LUT_SIZE = 0x100000, // 0x100000000 >> 12 FASTMEM_LUT_SIZE = 0x100000, // 0x100000000 >> 12
FASTMEM_LUT_SLOTS = FASTMEM_LUT_SIZE * 2, // [isc]
}; };
#ifdef ENABLE_MMAP_FASTMEM #ifdef ENABLE_MMAP_FASTMEM
@ -125,7 +126,7 @@ ALWAYS_INLINE_RELEASE static FP* OffsetHandlerArray(void** handlers, MemoryAcces
} }
CPUFastmemMode GetFastmemMode(); CPUFastmemMode GetFastmemMode();
void* GetFastmemBase(); void* GetFastmemBase(bool isc);
void UpdateFastmemViews(CPUFastmemMode mode); void UpdateFastmemViews(CPUFastmemMode mode);
bool CanUseFastmemForAddress(VirtualMemoryAddress address); bool CanUseFastmemForAddress(VirtualMemoryAddress address);

View File

@ -2396,7 +2396,7 @@ ALWAYS_INLINE_RELEASE Bus::MemoryWriteHandler CPU::GetMemoryWriteHandler(Virtual
void CPU::UpdateMemoryPointers() void CPU::UpdateMemoryPointers()
{ {
g_state.memory_handlers = Bus::GetMemoryHandlers(g_state.cop0_regs.sr.Isc, g_state.cop0_regs.sr.Swc); g_state.memory_handlers = Bus::GetMemoryHandlers(g_state.cop0_regs.sr.Isc, g_state.cop0_regs.sr.Swc);
g_state.fastmem_base = g_state.cop0_regs.sr.Isc ? nullptr : Bus::GetFastmemBase(); g_state.fastmem_base = Bus::GetFastmemBase(g_state.cop0_regs.sr.Isc);
} }
void CPU::ExecutionModeChanged() void CPU::ExecutionModeChanged()

View File

@ -1367,22 +1367,21 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
} }
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK);
m_emit->ldr(GetHostReg32(RARG1), m_emit->ldr(GetHostReg32(RARG1),
a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
switch (size) switch (size)
{ {
case RegSize_8: case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg)));
break; break;
case RegSize_16: case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg)));
break; break;
case RegSize_32: case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg)));
break; break;
default: default:

View File

@ -1734,60 +1734,38 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
address_reg = address.host_reg; address_reg = address.host_reg;
} }
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
{
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
}
else
{ {
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK); m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3));
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3)); }
switch (size) const a64::XRegister membase =
{ (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_16: switch (size)
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); {
break; case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
case RegSize_32: case RegSize_16:
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
break; break;
default: case RegSize_32:
UnreachableCode(); m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
break; break;
}
default:
UnreachableCode();
break;
} }
} }
void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, Value& result) const Value& address, RegSize size, Value& result)
{ {
// fastmem
void* host_pc = GetCurrentNearCodePointer();
HostReg address_reg; HostReg address_reg;
if (address.IsConstant()) if (address.IsConstant())
{ {
@ -1799,57 +1777,36 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const Co
address_reg = address.host_reg; address_reg = address.host_reg;
} }
m_register_cache.InhibitAllocation(); if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
{
host_pc = GetCurrentNearCodePointer();
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
}
else
{ {
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK); m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3));
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3)); }
host_pc = GetCurrentNearCodePointer(); const a64::XRegister membase =
(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
switch (size) m_register_cache.InhibitAllocation();
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_16: void* host_pc = GetCurrentNearCodePointer();
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_32: switch (size)
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); {
break; case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
default: case RegSize_16:
UnreachableCode(); m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
break; break;
}
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
} }
const u32 host_code_size = const u32 host_code_size =
@ -1957,9 +1914,6 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const C
{ {
Value value_in_hr = GetValueInHostRegister(value); Value value_in_hr = GetValueInHostRegister(value);
// fastmem
void* host_pc = GetCurrentNearCodePointer();
HostReg address_reg; HostReg address_reg;
if (address.IsConstant()) if (address.IsConstant())
{ {
@ -1971,56 +1925,37 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const C
address_reg = address.host_reg; address_reg = address.host_reg;
} }
m_register_cache.InhibitAllocation(); if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
{
host_pc = GetCurrentNearCodePointer();
switch (size)
{
case RegSize_8:
m_emit->strb(GetHostReg32(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_16:
m_emit->strh(GetHostReg32(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->str(GetHostReg32(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
}
else
{ {
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK); m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3));
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetHostReg64(RARG3), GetHostReg32(RARG1), a64::LSL, 3)); }
host_pc = GetCurrentNearCodePointer(); const a64::XRegister membase =
(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
switch (size) // fastmem
{ void* host_pc = GetCurrentNearCodePointer();
case RegSize_8:
m_emit->strb(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_16: m_register_cache.InhibitAllocation();
m_emit->strh(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_32: switch (size)
m_emit->str(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); {
break; case RegSize_8:
m_emit->strb(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
default: case RegSize_16:
UnreachableCode(); m_emit->strh(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg)));
break; break;
}
case RegSize_32:
m_emit->str(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
} }
const u32 host_code_size = const u32 host_code_size =

View File

@ -2094,200 +2094,117 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result) void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
{ {
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
{ {
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{
actual_address = &result;
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
}
// TODO: movsx/zx inline here
switch (size)
{
case RegSize_8:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg8(result.host_reg),
m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg8(result.host_reg),
m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
case RegSize_16:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg16(result.host_reg),
m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg16(result.host_reg),
m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg32(result.host_reg),
m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg32(result.host_reg),
m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
default:
UnreachableCode();
break;
}
}
else
{
// TODO: We could mask the LSBs here for unaligned protection.
EmitCopyValue(RARG1, address); EmitCopyValue(RARG1, address);
m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1)); m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->shr(GetHostReg32(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), Bus::FASTMEM_LUT_PAGE_MASK);
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]); m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
}
switch (size) const Xbyak::Reg64 membase =
(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{
actual_address = &result;
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
}
// TODO: movsx/zx inline here
switch (size)
{
case RegSize_8:
{ {
case RegSize_8: if (actual_address->IsConstant())
m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + actual_address->constant_value]);
break; else
m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + GetHostReg64(actual_address->host_reg)]);
case RegSize_16:
m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
case RegSize_32:
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
default:
UnreachableCode();
break;
} }
break;
case RegSize_16:
{
if (actual_address->IsConstant())
m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + actual_address->constant_value]);
else
m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + GetHostReg64(actual_address->host_reg)]);
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + actual_address->constant_value]);
else
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + GetHostReg64(actual_address->host_reg)]);
}
break;
default:
UnreachableCode();
break;
} }
} }
void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, Value& result) const Value& address, RegSize size, Value& result)
{ {
// fastmem if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
{
EmitCopyValue(RARG1, address);
m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
}
const Xbyak::Reg64 membase =
(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{
actual_address = &result;
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
}
void* host_pc = GetCurrentNearCodePointer(); void* host_pc = GetCurrentNearCodePointer();
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) m_register_cache.InhibitAllocation();
switch (size)
{ {
// can't store displacements > 0x80000000 in-line case RegSize_8:
const Value* actual_address = &address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{ {
actual_address = &result; if (actual_address->IsConstant())
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value); m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + actual_address->constant_value]);
host_pc = GetCurrentNearCodePointer(); else
m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + GetHostReg64(actual_address->host_reg)]);
} }
break;
m_register_cache.InhibitAllocation(); case RegSize_16:
switch (size)
{ {
case RegSize_8: if (actual_address->IsConstant())
{ m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + actual_address->constant_value]);
if (actual_address->IsConstant()) else
{ m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + GetHostReg64(actual_address->host_reg)]);
m_emit->mov(GetHostReg8(result.host_reg),
m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg8(result.host_reg),
m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
case RegSize_16:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg16(result.host_reg),
m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg16(result.host_reg),
m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg32(result.host_reg),
m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg32(result.host_reg),
m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
default:
UnreachableCode();
break;
} }
} break;
else
{
m_register_cache.InhibitAllocation();
// TODO: We could mask the LSBs here for unaligned protection. case RegSize_32:
EmitCopyValue(RARG1, address);
m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1));
m_emit->shr(GetHostReg32(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), Bus::FASTMEM_LUT_PAGE_MASK);
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
host_pc = GetCurrentNearCodePointer();
switch (size)
{ {
case RegSize_8: if (actual_address->IsConstant())
m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + actual_address->constant_value]);
break; else
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + GetHostReg64(actual_address->host_reg)]);
case RegSize_16:
m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
case RegSize_32:
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
default:
UnreachableCode();
break;
} }
break;
default:
UnreachableCode();
break;
} }
// insert nops, we need at least 5 bytes for a relative jump // insert nops, we need at least 5 bytes for a relative jump
@ -2398,168 +2315,93 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const Co
void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, const Value& value) const Value& address, RegSize size, const Value& value)
{ {
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
{
EmitCopyValue(RARG1, address);
m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
}
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
Value temp_address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{
temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32);
actual_address = &temp_address;
m_emit->mov(GetHostReg32(temp_address), address.constant_value);
}
const Xbyak::Reg64 membase =
(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
// fastmem // fastmem
void* host_pc = GetCurrentNearCodePointer(); void* host_pc = GetCurrentNearCodePointer();
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) m_register_cache.InhibitAllocation();
switch (size)
{ {
// can't store displacements > 0x80000000 in-line case RegSize_8:
const Value* actual_address = &address;
Value temp_address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{ {
temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32); if (actual_address->IsConstant())
actual_address = &temp_address;
m_emit->mov(GetHostReg32(temp_address), address.constant_value);
host_pc = GetCurrentNearCodePointer();
}
m_register_cache.InhibitAllocation();
switch (size)
{
case RegSize_8:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value],
value.constant_value & 0xFFu);
}
else
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value],
GetHostReg8(value.host_reg));
}
}
else
{
if (value.IsConstant())
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
value.constant_value & 0xFFu);
}
else
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
GetHostReg8(value.host_reg));
}
}
}
break;
case RegSize_16:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value],
value.constant_value & 0xFFFFu);
}
else
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value],
GetHostReg16(value.host_reg));
}
}
else
{
if (value.IsConstant())
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
value.constant_value & 0xFFFFu);
}
else
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
GetHostReg16(value.host_reg));
}
}
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
}
else
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value],
GetHostReg32(value.host_reg));
}
}
else
{
if (value.IsConstant())
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
value.constant_value);
}
else
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
GetHostReg32(value.host_reg));
}
}
}
break;
default:
UnreachableCode();
break;
}
}
else
{
m_register_cache.InhibitAllocation();
// TODO: We could mask the LSBs here for unaligned protection.
EmitCopyValue(RARG1, address);
m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1));
m_emit->shr(GetHostReg32(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), Bus::FASTMEM_LUT_PAGE_MASK);
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
host_pc = GetCurrentNearCodePointer();
switch (size)
{
case RegSize_8:
{ {
if (value.IsConstant()) if (value.IsConstant())
m_emit->mov(m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value & 0xFFu); m_emit->mov(m_emit->byte[membase + actual_address->constant_value], value.constant_value & 0xFFu);
else else
m_emit->mov(m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg8(value.host_reg)); m_emit->mov(m_emit->byte[membase + actual_address->constant_value], GetHostReg8(value.host_reg));
} }
break; else
case RegSize_16:
{ {
if (value.IsConstant()) if (value.IsConstant())
m_emit->mov(m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value & 0xFFFFu); m_emit->mov(m_emit->byte[membase + GetHostReg64(actual_address->host_reg)], value.constant_value & 0xFFu);
else else
m_emit->mov(m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg16(value.host_reg)); m_emit->mov(m_emit->byte[membase + GetHostReg64(actual_address->host_reg)], GetHostReg8(value.host_reg));
} }
break;
case RegSize_32:
{
if (value.IsConstant())
m_emit->mov(m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value);
else
m_emit->mov(m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg32(value.host_reg));
}
break;
default:
UnreachableCode();
break;
} }
break;
case RegSize_16:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
m_emit->mov(m_emit->word[membase + actual_address->constant_value], value.constant_value & 0xFFFFu);
else
m_emit->mov(m_emit->word[membase + actual_address->constant_value], GetHostReg16(value.host_reg));
}
else
{
if (value.IsConstant())
m_emit->mov(m_emit->word[membase + GetHostReg64(actual_address->host_reg)], value.constant_value & 0xFFFFu);
else
m_emit->mov(m_emit->word[membase + GetHostReg64(actual_address->host_reg)], GetHostReg16(value.host_reg));
}
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
m_emit->mov(m_emit->dword[membase + actual_address->constant_value], value.constant_value);
else
m_emit->mov(m_emit->dword[membase + actual_address->constant_value], GetHostReg32(value.host_reg));
}
else
{
if (value.IsConstant())
m_emit->mov(m_emit->dword[membase + GetHostReg64(actual_address->host_reg)], value.constant_value);
else
m_emit->mov(m_emit->dword[membase + GetHostReg64(actual_address->host_reg)], GetHostReg32(value.host_reg));
}
}
break;
default:
UnreachableCode();
break;
} }
// insert nops, we need at least 5 bytes for a relative jump // insert nops, we need at least 5 bytes for a relative jump