CPU/Recompiler: Offset LUT fastmem pointers

This commit is contained in:
Stenzek 2023-10-07 17:31:19 +10:00
parent a062b00c1a
commit 9d40164f84
No known key found for this signature in database
6 changed files with 248 additions and 464 deletions

View File

@ -141,6 +141,8 @@ static void SetRAMSize(bool enable_8mb_ram);
static std::tuple<TickCount, TickCount, TickCount> CalculateMemoryTiming(MEMDELAY mem_delay, COMDELAY common_delay);
static void RecalculateMemoryTimings();
static u8* GetLUTFastmemPointer(u32 address, u8* ram_ptr);
static void SetRAMPageWritable(u32 page_index, bool writable);
static void SetHandlers();
@ -460,18 +462,23 @@ CPUFastmemMode Bus::GetFastmemMode()
return s_fastmem_mode;
}
void* Bus::GetFastmemBase()
void* Bus::GetFastmemBase(bool isc)
{
#ifdef ENABLE_MMAP_FASTMEM
if (s_fastmem_mode == CPUFastmemMode::MMap)
return s_fastmem_arena.BasePointer();
return isc ? nullptr : s_fastmem_arena.BasePointer();
#endif
if (s_fastmem_mode == CPUFastmemMode::LUT)
return reinterpret_cast<u8*>(s_fastmem_lut);
return reinterpret_cast<u8*>(s_fastmem_lut + (isc ? (FASTMEM_LUT_SIZE * sizeof(void*)) : 0));
return nullptr;
}
u8* Bus::GetLUTFastmemPointer(u32 address, u8* ram_ptr)
{
return ram_ptr - address;
}
void Bus::UpdateFastmemViews(CPUFastmemMode mode)
{
#ifndef ENABLE_MMAP_FASTMEM
@ -530,20 +537,20 @@ void Bus::UpdateFastmemViews(CPUFastmemMode mode)
if (!s_fastmem_lut)
{
s_fastmem_lut = static_cast<u8**>(std::malloc(sizeof(u8*) * FASTMEM_LUT_SIZE));
s_fastmem_lut = static_cast<u8**>(std::malloc(sizeof(u8*) * FASTMEM_LUT_SLOTS));
Assert(s_fastmem_lut);
Log_InfoPrintf("Fastmem base (software): %p", s_fastmem_lut);
}
std::memset(s_fastmem_lut, 0, sizeof(u8*) * FASTMEM_LUT_SIZE);
std::memset(s_fastmem_lut, 0, sizeof(u8*) * FASTMEM_LUT_SLOTS);
auto MapRAM = [](u32 base_address) {
u8* ram_ptr = g_ram + (base_address & g_ram_mask);
for (u32 address = 0; address < g_ram_size; address += FASTMEM_LUT_PAGE_SIZE)
{
const u32 lut_index = (base_address + address) >> FASTMEM_LUT_PAGE_SHIFT;
s_fastmem_lut[lut_index] = ram_ptr;
s_fastmem_lut[lut_index] = GetLUTFastmemPointer(base_address + address, ram_ptr);
ram_ptr += FASTMEM_LUT_PAGE_SIZE;
}
};

View File

@ -96,7 +96,8 @@ enum : u32
FASTMEM_LUT_PAGE_SIZE = 4096,
FASTMEM_LUT_PAGE_MASK = FASTMEM_LUT_PAGE_SIZE - 1,
FASTMEM_LUT_PAGE_SHIFT = 12,
FASTMEM_LUT_SIZE = 0x100000, // 0x100000000 >> 12
FASTMEM_LUT_SIZE = 0x100000, // 0x100000000 >> 12
FASTMEM_LUT_SLOTS = FASTMEM_LUT_SIZE * 2, // [isc]
};
#ifdef ENABLE_MMAP_FASTMEM
@ -125,7 +126,7 @@ ALWAYS_INLINE_RELEASE static FP* OffsetHandlerArray(void** handlers, MemoryAcces
}
CPUFastmemMode GetFastmemMode();
void* GetFastmemBase();
void* GetFastmemBase(bool isc);
void UpdateFastmemViews(CPUFastmemMode mode);
bool CanUseFastmemForAddress(VirtualMemoryAddress address);

View File

@ -2396,7 +2396,7 @@ ALWAYS_INLINE_RELEASE Bus::MemoryWriteHandler CPU::GetMemoryWriteHandler(Virtual
void CPU::UpdateMemoryPointers()
{
g_state.memory_handlers = Bus::GetMemoryHandlers(g_state.cop0_regs.sr.Isc, g_state.cop0_regs.sr.Swc);
g_state.fastmem_base = g_state.cop0_regs.sr.Isc ? nullptr : Bus::GetFastmemBase();
g_state.fastmem_base = Bus::GetFastmemBase(g_state.cop0_regs.sr.Isc);
}
void CPU::ExecutionModeChanged()

View File

@ -1367,22 +1367,21 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
}
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK);
m_emit->ldr(GetHostReg32(RARG1),
a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg)));
break;
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg)));
break;
default:

View File

@ -1734,60 +1734,38 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
address_reg = address.host_reg;
}
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
{
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
}
else
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
{
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK);
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3));
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3));
}
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
const a64::XRegister membase =
(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
}
void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, Value& result)
{
// fastmem
void* host_pc = GetCurrentNearCodePointer();
HostReg address_reg;
if (address.IsConstant())
{
@ -1799,57 +1777,36 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const Co
address_reg = address.host_reg;
}
m_register_cache.InhibitAllocation();
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
{
host_pc = GetCurrentNearCodePointer();
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
}
else
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
{
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK);
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3));
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3));
}
host_pc = GetCurrentNearCodePointer();
const a64::XRegister membase =
(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
m_register_cache.InhibitAllocation();
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
void* host_pc = GetCurrentNearCodePointer();
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
const u32 host_code_size =
@ -1957,9 +1914,6 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const C
{
Value value_in_hr = GetValueInHostRegister(value);
// fastmem
void* host_pc = GetCurrentNearCodePointer();
HostReg address_reg;
if (address.IsConstant())
{
@ -1971,56 +1925,37 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const C
address_reg = address.host_reg;
}
m_register_cache.InhibitAllocation();
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
{
host_pc = GetCurrentNearCodePointer();
switch (size)
{
case RegSize_8:
m_emit->strb(GetHostReg32(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_16:
m_emit->strh(GetHostReg32(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->str(GetHostReg32(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
}
else
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
{
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK);
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetHostReg64(RARG3), GetHostReg32(RARG1), a64::LSL, 3));
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3));
}
host_pc = GetCurrentNearCodePointer();
const a64::XRegister membase =
(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
switch (size)
{
case RegSize_8:
m_emit->strb(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
// fastmem
void* host_pc = GetCurrentNearCodePointer();
case RegSize_16:
m_emit->strh(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
m_register_cache.InhibitAllocation();
case RegSize_32:
m_emit->str(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
switch (size)
{
case RegSize_8:
m_emit->strb(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
case RegSize_16:
m_emit->strh(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->str(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
const u32 host_code_size =

View File

@ -2094,200 +2094,117 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
{
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
{
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{
actual_address = &result;
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
}
// TODO: movsx/zx inline here
switch (size)
{
case RegSize_8:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg8(result.host_reg),
m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg8(result.host_reg),
m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
case RegSize_16:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg16(result.host_reg),
m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg16(result.host_reg),
m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg32(result.host_reg),
m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg32(result.host_reg),
m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
default:
UnreachableCode();
break;
}
}
else
{
// TODO: We could mask the LSBs here for unaligned protection.
EmitCopyValue(RARG1, address);
m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1));
m_emit->shr(GetHostReg32(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), Bus::FASTMEM_LUT_PAGE_MASK);
m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
}
switch (size)
const Xbyak::Reg64 membase =
(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{
actual_address = &result;
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
}
// TODO: movsx/zx inline here
switch (size)
{
case RegSize_8:
{
case RegSize_8:
m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
case RegSize_16:
m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
case RegSize_32:
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
default:
UnreachableCode();
break;
if (actual_address->IsConstant())
m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + actual_address->constant_value]);
else
m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + GetHostReg64(actual_address->host_reg)]);
}
break;
case RegSize_16:
{
if (actual_address->IsConstant())
m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + actual_address->constant_value]);
else
m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + GetHostReg64(actual_address->host_reg)]);
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + actual_address->constant_value]);
else
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + GetHostReg64(actual_address->host_reg)]);
}
break;
default:
UnreachableCode();
break;
}
}
void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, Value& result)
{
// fastmem
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
{
EmitCopyValue(RARG1, address);
m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
}
const Xbyak::Reg64 membase =
(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{
actual_address = &result;
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
}
void* host_pc = GetCurrentNearCodePointer();
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
m_register_cache.InhibitAllocation();
switch (size)
{
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
case RegSize_8:
{
actual_address = &result;
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
host_pc = GetCurrentNearCodePointer();
if (actual_address->IsConstant())
m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + actual_address->constant_value]);
else
m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + GetHostReg64(actual_address->host_reg)]);
}
break;
m_register_cache.InhibitAllocation();
switch (size)
case RegSize_16:
{
case RegSize_8:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg8(result.host_reg),
m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg8(result.host_reg),
m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
case RegSize_16:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg16(result.host_reg),
m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg16(result.host_reg),
m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg32(result.host_reg),
m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg32(result.host_reg),
m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
default:
UnreachableCode();
break;
if (actual_address->IsConstant())
m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + actual_address->constant_value]);
else
m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + GetHostReg64(actual_address->host_reg)]);
}
}
else
{
m_register_cache.InhibitAllocation();
break;
// TODO: We could mask the LSBs here for unaligned protection.
EmitCopyValue(RARG1, address);
m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1));
m_emit->shr(GetHostReg32(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), Bus::FASTMEM_LUT_PAGE_MASK);
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
host_pc = GetCurrentNearCodePointer();
switch (size)
case RegSize_32:
{
case RegSize_8:
m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
case RegSize_16:
m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
case RegSize_32:
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
default:
UnreachableCode();
break;
if (actual_address->IsConstant())
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + actual_address->constant_value]);
else
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + GetHostReg64(actual_address->host_reg)]);
}
break;
default:
UnreachableCode();
break;
}
// insert nops, we need at least 5 bytes for a relative jump
@ -2398,168 +2315,93 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const Co
void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, const Value& value)
{
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
{
EmitCopyValue(RARG1, address);
m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
}
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
Value temp_address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{
temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32);
actual_address = &temp_address;
m_emit->mov(GetHostReg32(temp_address), address.constant_value);
}
const Xbyak::Reg64 membase =
(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
// fastmem
void* host_pc = GetCurrentNearCodePointer();
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
m_register_cache.InhibitAllocation();
switch (size)
{
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
Value temp_address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
case RegSize_8:
{
temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32);
actual_address = &temp_address;
m_emit->mov(GetHostReg32(temp_address), address.constant_value);
host_pc = GetCurrentNearCodePointer();
}
m_register_cache.InhibitAllocation();
switch (size)
{
case RegSize_8:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value],
value.constant_value & 0xFFu);
}
else
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value],
GetHostReg8(value.host_reg));
}
}
else
{
if (value.IsConstant())
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
value.constant_value & 0xFFu);
}
else
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
GetHostReg8(value.host_reg));
}
}
}
break;
case RegSize_16:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value],
value.constant_value & 0xFFFFu);
}
else
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value],
GetHostReg16(value.host_reg));
}
}
else
{
if (value.IsConstant())
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
value.constant_value & 0xFFFFu);
}
else
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
GetHostReg16(value.host_reg));
}
}
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
}
else
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value],
GetHostReg32(value.host_reg));
}
}
else
{
if (value.IsConstant())
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
value.constant_value);
}
else
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
GetHostReg32(value.host_reg));
}
}
}
break;
default:
UnreachableCode();
break;
}
}
else
{
m_register_cache.InhibitAllocation();
// TODO: We could mask the LSBs here for unaligned protection.
EmitCopyValue(RARG1, address);
m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1));
m_emit->shr(GetHostReg32(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
m_emit->and_(GetHostReg32(RARG2), Bus::FASTMEM_LUT_PAGE_MASK);
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
host_pc = GetCurrentNearCodePointer();
switch (size)
{
case RegSize_8:
if (actual_address->IsConstant())
{
if (value.IsConstant())
m_emit->mov(m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value & 0xFFu);
m_emit->mov(m_emit->byte[membase + actual_address->constant_value], value.constant_value & 0xFFu);
else
m_emit->mov(m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg8(value.host_reg));
m_emit->mov(m_emit->byte[membase + actual_address->constant_value], GetHostReg8(value.host_reg));
}
break;
case RegSize_16:
else
{
if (value.IsConstant())
m_emit->mov(m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value & 0xFFFFu);
m_emit->mov(m_emit->byte[membase + GetHostReg64(actual_address->host_reg)], value.constant_value & 0xFFu);
else
m_emit->mov(m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg16(value.host_reg));
m_emit->mov(m_emit->byte[membase + GetHostReg64(actual_address->host_reg)], GetHostReg8(value.host_reg));
}
break;
case RegSize_32:
{
if (value.IsConstant())
m_emit->mov(m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value);
else
m_emit->mov(m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg32(value.host_reg));
}
break;
default:
UnreachableCode();
break;
}
break;
case RegSize_16:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
m_emit->mov(m_emit->word[membase + actual_address->constant_value], value.constant_value & 0xFFFFu);
else
m_emit->mov(m_emit->word[membase + actual_address->constant_value], GetHostReg16(value.host_reg));
}
else
{
if (value.IsConstant())
m_emit->mov(m_emit->word[membase + GetHostReg64(actual_address->host_reg)], value.constant_value & 0xFFFFu);
else
m_emit->mov(m_emit->word[membase + GetHostReg64(actual_address->host_reg)], GetHostReg16(value.host_reg));
}
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
m_emit->mov(m_emit->dword[membase + actual_address->constant_value], value.constant_value);
else
m_emit->mov(m_emit->dword[membase + actual_address->constant_value], GetHostReg32(value.host_reg));
}
else
{
if (value.IsConstant())
m_emit->mov(m_emit->dword[membase + GetHostReg64(actual_address->host_reg)], value.constant_value);
else
m_emit->mov(m_emit->dword[membase + GetHostReg64(actual_address->host_reg)], GetHostReg32(value.host_reg));
}
}
break;
default:
UnreachableCode();
break;
}
// insert nops, we need at least 5 bytes for a relative jump