Merge pull request #10745 from JosJuice/softmmu

JitArm64: Implement "soft MMU"
This commit is contained in:
JMC47 2022-07-08 15:55:49 -04:00 committed by GitHub
commit 59e8aacf42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 248 additions and 110 deletions

View File

@ -47,6 +47,8 @@ namespace Memory
// Store the MemArena here // Store the MemArena here
u8* physical_base = nullptr; u8* physical_base = nullptr;
u8* logical_base = nullptr; u8* logical_base = nullptr;
u8* physical_page_mappings_base = nullptr;
u8* logical_page_mappings_base = nullptr;
static bool is_fastmem_arena_initialized = false; static bool is_fastmem_arena_initialized = false;
// The MemArena class // The MemArena class
@ -223,6 +225,9 @@ static std::array<PhysicalMemoryRegion, 4> s_physical_regions;
static std::vector<LogicalMemoryView> logical_mapped_entries; static std::vector<LogicalMemoryView> logical_mapped_entries;
static std::array<void*, PowerPC::BAT_PAGE_COUNT> s_physical_page_mappings;
static std::array<void*, PowerPC::BAT_PAGE_COUNT> s_logical_page_mappings;
void Init() void Init()
{ {
const auto get_mem1_size = [] { const auto get_mem1_size = [] {
@ -280,6 +285,8 @@ void Init()
} }
g_arena.GrabSHMSegment(mem_size); g_arena.GrabSHMSegment(mem_size);
s_physical_page_mappings.fill(nullptr);
// Create an anonymous view of the physical memory // Create an anonymous view of the physical memory
for (const PhysicalMemoryRegion& region : s_physical_regions) for (const PhysicalMemoryRegion& region : s_physical_regions)
{ {
@ -295,7 +302,16 @@ void Init()
region.physical_address, region.size); region.physical_address, region.size);
exit(0); exit(0);
} }
for (u32 i = 0; i < region.size; i += PowerPC::BAT_PAGE_SIZE)
{
const size_t index = (i + region.physical_address) >> PowerPC::BAT_INDEX_SHIFT;
s_physical_page_mappings[index] = *region.out_pointer + i;
} }
}
physical_page_mappings_base = reinterpret_cast<u8*>(s_physical_page_mappings.data());
logical_page_mappings_base = reinterpret_cast<u8*>(s_logical_page_mappings.data());
InitMMIO(wii); InitMMIO(wii);
@ -347,14 +363,14 @@ bool InitFastmemArena()
void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
{ {
if (!is_fastmem_arena_initialized)
return;
for (auto& entry : logical_mapped_entries) for (auto& entry : logical_mapped_entries)
{ {
g_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); g_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size);
} }
logical_mapped_entries.clear(); logical_mapped_entries.clear();
s_logical_page_mappings.fill(nullptr);
for (u32 i = 0; i < dbat_table.size(); ++i) for (u32 i = 0; i < dbat_table.size(); ++i)
{ {
if (dbat_table[i] & PowerPC::BAT_PHYSICAL_BIT) if (dbat_table[i] & PowerPC::BAT_PHYSICAL_BIT)
@ -375,6 +391,9 @@ void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
if (intersection_start < intersection_end) if (intersection_start < intersection_end)
{ {
// Found an overlapping region; map it. // Found an overlapping region; map it.
if (is_fastmem_arena_initialized)
{
u32 position = physical_region.shm_position + intersection_start - mapping_address; u32 position = physical_region.shm_position + intersection_start - mapping_address;
u8* base = logical_base + logical_address + intersection_start - translated_address; u8* base = logical_base + logical_address + intersection_start - translated_address;
u32 mapped_size = intersection_end - intersection_start; u32 mapped_size = intersection_end - intersection_start;
@ -382,13 +401,18 @@ void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base); void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base);
if (!mapped_pointer) if (!mapped_pointer)
{ {
PanicAlertFmt("Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} " PanicAlertFmt(
"Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} "
"(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.",
intersection_start, mapped_size, logical_address); intersection_start, mapped_size, logical_address);
exit(0); exit(0);
} }
logical_mapped_entries.push_back({mapped_pointer, mapped_size}); logical_mapped_entries.push_back({mapped_pointer, mapped_size});
} }
s_logical_page_mappings[i] =
*physical_region.out_pointer + intersection_start - mapping_address;
}
} }
} }
} }

View File

@ -21,16 +21,18 @@ class Mapping;
namespace Memory namespace Memory
{ {
// Base is a pointer to the base of the memory map. Yes, some MMU tricks // Base is a pointer to the base of the memory map. Yes, some MMU tricks
// are used to set up a full GC or Wii memory map in process memory. on // are used to set up a full GC or Wii memory map in process memory.
// 32-bit, you have to mask your offsets with 0x3FFFFFFF. This means that
// some things are mirrored too many times, but eh... it works.
// In 64-bit, this might point to "high memory" (above the 32-bit limit), // In 64-bit, this might point to "high memory" (above the 32-bit limit),
// so be sure to load it into a 64-bit register. // so be sure to load it into a 64-bit register.
extern u8* physical_base; extern u8* physical_base;
extern u8* logical_base; extern u8* logical_base;
// These are guaranteed to point to "low memory" addresses (sub-32-bit). // This page table is used for a "soft MMU" implementation when
// setting up the full memory map in process memory isn't possible.
extern u8* physical_page_mappings_base;
extern u8* logical_page_mappings_base;
// The actual memory used for backing the memory map.
extern u8* m_pRAM; extern u8* m_pRAM;
extern u8* m_pEXRAM; extern u8* m_pEXRAM;
extern u8* m_pL1Cache; extern u8* m_pL1Cache;

View File

@ -215,10 +215,27 @@ protected:
// Dump a memory range of code // Dump a memory range of code
void DumpCode(const u8* start, const u8* end); void DumpCode(const u8* start, const u8* end);
// This enum is used for selecting an implementation of EmitBackpatchRoutine.
enum class MemAccessMode
{
// Always calls the slow C++ code. For performance reasons, should generally only be used if
// the guest address is known in advance and IsOptimizableRAMAddress returns false for it.
AlwaysSafe,
// Only emits fast access code. Must only be used if the guest address is known in advance
// and IsOptimizableRAMAddress returns true for it, otherwise Dolphin will likely crash!
AlwaysUnsafe,
// Best in most cases. If backpatching is possible (!emitting_routine && jo.fastmem_arena):
// Tries to run fast access code, and if that fails, uses backpatching to replace the code
// with a call to the slow C++ code. Otherwise: Checks whether the fast access code will work,
// then branches to either the fast access code or the slow C++ code.
Auto,
};
// This is the core routine for accessing emulated memory, with support for // This is the core routine for accessing emulated memory, with support for
// many different kinds of loads and stores as well as fastmem backpatching. // many different kinds of loads and stores as well as fastmem/backpatching.
// //
// Registers used: // Registers used:
//
// addr scratch // addr scratch
// Store: X1 X0 // Store: X1 X0
// Load: X0 // Load: X0
@ -226,15 +243,21 @@ protected:
// Store float: X1 Q0 // Store float: X1 Q0
// Load float: X0 // Load float: X0
// //
// If fastmem && !do_farcode, the addr argument can be any register. // If mode == AlwaysUnsafe, the addr argument can be any register.
// Otherwise it must be the register listed in the table above. // Otherwise it must be the register listed in the table above.
// //
// Additional scratch registers are used in the following situations: // Additional scratch registers are used in the following situations:
// fastmem && do_farcode && emitting_routine: X2 //
// fastmem && do_farcode && emitting_routine && (flags & BackPatchInfo::FLAG_STORE): X0 // emitting_routine && mode == Auto: X2
// fastmem && do_farcode && emitting_routine && !(flags & BackPatchInfo::FLAG_STORE): X3 // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
// !fastmem || do_farcode: X30 (plus lots more unless you set gprs_to_push and fprs_to_push) // emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X3
void EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, Arm64Gen::ARM64Reg RS, // mode != AlwaysSafe && !jo.fastmem_arena: X2
// !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30
// !emitting_routine && mode == Auto && jo.fastmem_arena: X30
//
// Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push
// may be clobbered if mode != AlwaysUnsafe.
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false);

View File

@ -54,19 +54,44 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx)
ERROR_LOG_FMT(DYNA_REC, "Full block: {}", pc_memory); ERROR_LOG_FMT(DYNA_REC, "Full block: {}", pc_memory);
} }
void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, ARM64Reg RS, void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr,
ARM64Reg addr, BitSet32 gprs_to_push, BitSet32 fprs_to_push, BitSet32 gprs_to_push, BitSet32 fprs_to_push,
bool emitting_routine) bool emitting_routine)
{ {
const u32 access_size = BackPatchInfo::GetFlagSize(flags); const u32 access_size = BackPatchInfo::GetFlagSize(flags);
const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe;
const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe;
bool in_far_code = false; bool in_far_code = false;
const u8* fastmem_start = GetCodePtr(); const u8* fastmem_start = GetCodePtr();
std::optional<FixupBranch> slowmem_fixup; std::optional<FixupBranch> slowmem_fixup;
if (fastmem) if (emit_fastmem)
{ {
if (do_farcode && emitting_routine) ARM64Reg memory_base = MEM_REG;
ARM64Reg memory_offset = addr;
if (!jo.fastmem_arena)
{
const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30;
memory_base = EncodeRegTo64(temp);
memory_offset = ARM64Reg::W2;
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
LDR(memory_base, MEM_REG, ArithOption(temp, true));
if (emit_slowmem)
{
FixupBranch pass = CBNZ(memory_base);
slowmem_fixup = B();
SetJumpTarget(pass);
}
AND(memory_offset, addr, LogicalImm(PowerPC::BAT_PAGE_SIZE - 1, 64));
}
else if (emit_slowmem && emitting_routine)
{ {
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3; const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3;
const ARM64Reg temp2 = ARM64Reg::W2; const ARM64Reg temp2 = ARM64Reg::W2;
@ -79,11 +104,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
ARM64Reg temp = ARM64Reg::D0; ARM64Reg temp = ARM64Reg::D0;
temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true); temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true);
m_float_emit.STR(access_size, temp, MEM_REG, addr); m_float_emit.STR(access_size, temp, memory_base, memory_offset);
} }
else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT)) else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT))
{ {
m_float_emit.LDR(access_size, EncodeRegToDouble(RS), MEM_REG, addr); m_float_emit.LDR(access_size, EncodeRegToDouble(RS), memory_base, memory_offset);
ByteswapAfterLoad(this, &m_float_emit, EncodeRegToDouble(RS), EncodeRegToDouble(RS), flags, ByteswapAfterLoad(this, &m_float_emit, EncodeRegToDouble(RS), EncodeRegToDouble(RS), flags,
true, false); true, false);
@ -94,44 +119,44 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true); temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
STR(temp, MEM_REG, addr); STR(temp, memory_base, memory_offset);
else if (flags & BackPatchInfo::FLAG_SIZE_16) else if (flags & BackPatchInfo::FLAG_SIZE_16)
STRH(temp, MEM_REG, addr); STRH(temp, memory_base, memory_offset);
else else
STRB(temp, MEM_REG, addr); STRB(temp, memory_base, memory_offset);
} }
else if (flags & BackPatchInfo::FLAG_ZERO_256) else if (flags & BackPatchInfo::FLAG_ZERO_256)
{ {
// This literally only stores 32bytes of zeros to the target address // This literally only stores 32bytes of zeros to the target address
ARM64Reg temp = ARM64Reg::X30; ARM64Reg temp = ARM64Reg::X30;
ADD(temp, addr, MEM_REG); ADD(temp, memory_base, memory_offset);
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0); STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0);
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16); STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16);
} }
else else
{ {
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
LDR(RS, MEM_REG, addr); LDR(RS, memory_base, memory_offset);
else if (flags & BackPatchInfo::FLAG_SIZE_16) else if (flags & BackPatchInfo::FLAG_SIZE_16)
LDRH(RS, MEM_REG, addr); LDRH(RS, memory_base, memory_offset);
else if (flags & BackPatchInfo::FLAG_SIZE_8) else if (flags & BackPatchInfo::FLAG_SIZE_8)
LDRB(RS, MEM_REG, addr); LDRB(RS, memory_base, memory_offset);
ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false); ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false);
} }
} }
const u8* fastmem_end = GetCodePtr(); const u8* fastmem_end = GetCodePtr();
if (!fastmem || do_farcode) if (emit_slowmem)
{ {
const bool memcheck = jo.memcheck && !emitting_routine; const bool memcheck = jo.memcheck && !emitting_routine;
if (fastmem && do_farcode) if (emit_fastmem)
{ {
in_far_code = true; in_far_code = true;
SwitchToFarCode(); SwitchToFarCode();
if (!emitting_routine) if (jo.fastmem_arena && !emitting_routine)
{ {
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end]; FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
fastmem_area->fastmem_code = fastmem_start; fastmem_area->fastmem_code = fastmem_start;
@ -261,7 +286,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
if (in_far_code) if (in_far_code)
{ {
if (emitting_routine) if (slowmem_fixup)
{ {
FixupBranch done = B(); FixupBranch done = B();
SwitchToNearCode(); SwitchToNearCode();

View File

@ -27,6 +27,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
{ {
// We want to make sure to not get LR as a temp register // We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false); gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
ARM64Reg dest_reg = gpr.R(dest); ARM64Reg dest_reg = gpr.R(dest);
@ -121,6 +123,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update) if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.memcheck) if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0; regs_in_use[DecodeReg(dest_reg)] = 0;
@ -129,10 +133,11 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (is_immediate) if (is_immediate)
mmio_address = PowerPC::IsOptimizableMMIOAccess(imm_addr, access_size); mmio_address = PowerPC::IsOptimizableMMIOAccess(imm_addr, access_size);
if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0)); EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, dest_reg, XA, regs_in_use,
fprs_in_use);
} }
else if (mmio_address) else if (mmio_address)
{ {
@ -142,7 +147,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
else else
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, XA, regs_in_use, fprs_in_use); EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use);
} }
gpr.BindToRegister(dest, false, true); gpr.BindToRegister(dest, false, true);
@ -156,6 +161,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
} }
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
} }
void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset,
@ -163,6 +170,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
{ {
// We want to make sure to not get LR as a temp register // We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
ARM64Reg RS = gpr.R(value); ARM64Reg RS = gpr.R(value);
@ -258,6 +267,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!update || early_update) if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0; u32 mmio_address = 0;
@ -290,10 +301,10 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
js.fifoBytesSinceCheck += accessSize >> 3; js.fifoBytesSinceCheck += accessSize >> 3;
} }
else if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) else if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0)); EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, RS, XA, regs_in_use, fprs_in_use);
} }
else if (mmio_address) else if (mmio_address)
{ {
@ -303,7 +314,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
else else
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, RS, XA, regs_in_use, fprs_in_use); EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use);
} }
if (update && !early_update) if (update && !early_update)
@ -314,6 +325,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
} }
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
} }
FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp, FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
@ -496,6 +509,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
s32 offset = inst.SIMM_16; s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
// MMU games make use of a >= d despite this being invalid according to the PEM. // MMU games make use of a >= d despite this being invalid according to the PEM.
// Because of this, make sure to not re-read rA after starting doing the loads. // Because of this, make sure to not re-read rA after starting doing the loads.
@ -521,13 +536,15 @@ void JitArm64::lmw(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (i == 31) if (i == 31)
regs_in_use[DecodeReg(addr_reg)] = 0; regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.memcheck) if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0; regs_in_use[DecodeReg(dest_reg)] = 0;
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, EncodeRegTo64(addr_reg), EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), regs_in_use,
regs_in_use, fprs_in_use); fprs_in_use);
gpr.BindToRegister(i, false, true); gpr.BindToRegister(i, false, true);
ASSERT(dest_reg == gpr.R(i)); ASSERT(dest_reg == gpr.R(i));
@ -537,6 +554,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
} }
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
} }
void JitArm64::stmw(UGeckoInstruction inst) void JitArm64::stmw(UGeckoInstruction inst)
@ -548,6 +567,8 @@ void JitArm64::stmw(UGeckoInstruction inst)
s32 offset = inst.SIMM_16; s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
ARM64Reg addr_reg = ARM64Reg::W1; ARM64Reg addr_reg = ARM64Reg::W1;
if (a) if (a)
@ -571,17 +592,21 @@ void JitArm64::stmw(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (i == 31) if (i == 31)
regs_in_use[DecodeReg(addr_reg)] = 0; regs_in_use[DecodeReg(addr_reg)] = 0;
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, src_reg, EncodeRegTo64(addr_reg), EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
regs_in_use, fprs_in_use); fprs_in_use);
if (i != 31) if (i != 31)
ADD(addr_reg, addr_reg, 4); ADD(addr_reg, addr_reg, 4);
} }
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
} }
void JitArm64::dcbx(UGeckoInstruction inst) void JitArm64::dcbx(UGeckoInstruction inst)
@ -770,8 +795,14 @@ void JitArm64::dcbz(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB; int a = inst.RA, b = inst.RB;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); }); Common::ScopeGuard register_guard([&] {
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
});
constexpr ARM64Reg addr_reg = ARM64Reg::W0; constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg temp_reg = ARM64Reg::W30; constexpr ARM64Reg temp_reg = ARM64Reg::W30;
@ -838,8 +869,10 @@ void JitArm64::dcbz(UGeckoInstruction inst)
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0; gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
gprs_to_push[DecodeReg(ARM64Reg::W2)] = 0;
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, jo.fastmem, jo.fastmem, ARM64Reg::W0, EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0,
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
if (using_dcbz_hack) if (using_dcbz_hack)

View File

@ -79,6 +79,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0); fpr.Lock(ARM64Reg::Q0);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
const ARM64Reg VD = fpr.RW(inst.FD, type, false); const ARM64Reg VD = fpr.RW(inst.FD, type, false);
ARM64Reg addr_reg = ARM64Reg::W0; ARM64Reg addr_reg = ARM64Reg::W0;
@ -166,17 +168,19 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update) if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (!jo.memcheck) if (!jo.memcheck)
fprs_in_use[DecodeReg(VD)] = 0; fprs_in_use[DecodeReg(VD)] = 0;
if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{ {
EmitBackpatchRoutine(flags, true, false, VD, XA, BitSet32(0), BitSet32(0)); EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, VD, XA, regs_in_use, fprs_in_use);
} }
else else
{ {
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VD, XA, regs_in_use, fprs_in_use); EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use);
} }
const ARM64Reg VD_again = fpr.RW(inst.FD, type, true); const ARM64Reg VD_again = fpr.RW(inst.FD, type, true);
@ -190,6 +194,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0); fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
} }
void JitArm64::stfXX(UGeckoInstruction inst) void JitArm64::stfXX(UGeckoInstruction inst)
@ -273,6 +279,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
} }
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
ARM64Reg addr_reg = ARM64Reg::W1; ARM64Reg addr_reg = ARM64Reg::W1;
@ -364,6 +372,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!update || early_update) if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (is_immediate) if (is_immediate)
@ -389,21 +399,21 @@ void JitArm64::stfXX(UGeckoInstruction inst)
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
js.fifoBytesSinceCheck += accessSize >> 3; js.fifoBytesSinceCheck += accessSize >> 3;
} }
else if (jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(imm_addr)) else if (PowerPC::IsOptimizableRAMAddress(imm_addr))
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, true, false, V0, XA, BitSet32(0), BitSet32(0)); EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, V0, XA, regs_in_use, fprs_in_use);
} }
else else
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, false, false, V0, XA, regs_in_use, fprs_in_use); EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSafe, V0, XA, regs_in_use, fprs_in_use);
} }
} }
else else
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, V0, XA, regs_in_use, fprs_in_use); EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use);
} }
if (update && !early_update) if (update && !early_update)
@ -418,4 +428,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0); fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
} }

View File

@ -44,6 +44,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3); gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
fpr.Lock(ARM64Reg::Q1); fpr.Lock(ARM64Reg::Q1);
} }
else if (!jo.fastmem_arena)
{
gpr.Lock(ARM64Reg::W2);
}
constexpr ARM64Reg addr_reg = ARM64Reg::W0; constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1; constexpr ARM64Reg scale_reg = ARM64Reg::W1;
@ -82,6 +86,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
// Wipe the registers we are using as temporaries // Wipe the registers we are using as temporaries
if (!update || early_update) if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!jo.fastmem_arena)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
if (!jo.memcheck) if (!jo.memcheck)
fprs_in_use[DecodeReg(VS)] = 0; fprs_in_use[DecodeReg(VS)] = 0;
@ -90,7 +96,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
if (!w) if (!w)
flags |= BackPatchInfo::FLAG_PAIR; flags |= BackPatchInfo::FLAG_PAIR;
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use, EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use,
fprs_in_use); fprs_in_use);
} }
else else
@ -130,6 +136,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3); gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q1); fpr.Unlock(ARM64Reg::Q1);
} }
else if (!jo.fastmem_arena)
{
gpr.Unlock(ARM64Reg::W2);
}
} }
void JitArm64::psq_stXX(UGeckoInstruction inst) void JitArm64::psq_stXX(UGeckoInstruction inst)
@ -189,8 +199,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
} }
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize) if (!js.assumeNoPairedQuantize || !jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2); gpr.Lock(ARM64Reg::W2);
if (!js.assumeNoPairedQuantize && !jo.fastmem_arena)
gpr.Lock(ARM64Reg::W3);
constexpr ARM64Reg scale_reg = ARM64Reg::W0; constexpr ARM64Reg scale_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1; constexpr ARM64Reg addr_reg = ARM64Reg::W1;
@ -229,12 +241,14 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!update || early_update) if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!jo.fastmem_arena)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w) if (!w)
flags |= BackPatchInfo::FLAG_PAIR; flags |= BackPatchInfo::FLAG_PAIR;
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use, EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use,
fprs_in_use); fprs_in_use);
} }
else else
@ -261,9 +275,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0); fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize || !jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
if (!js.assumeNoPairedQuantize && !jo.fastmem_arena)
gpr.Lock(ARM64Reg::W3);
if (!js.assumeNoPairedQuantize) if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W2);
fpr.Unlock(ARM64Reg::Q1); fpr.Unlock(ARM64Reg::Q1);
}
} }

View File

@ -93,10 +93,10 @@ void JitArm64::GenerateAsm()
// set the mem_base based on MSR flags // set the mem_base based on MSR flags
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr)); LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27); FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
MOVP2R(MEM_REG, Memory::physical_base); MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base);
FixupBranch membaseend = B(); FixupBranch membaseend = B();
SetJumpTarget(physmem); SetJumpTarget(physmem);
MOVP2R(MEM_REG, Memory::logical_base); MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base);
SetJumpTarget(membaseend); SetJumpTarget(membaseend);
// iCache[(address >> 2) & iCache_Mask]; // iCache[(address >> 2) & iCache_Mask];
@ -141,10 +141,10 @@ void JitArm64::GenerateAsm()
// set the mem_base based on MSR flags and jump to next block. // set the mem_base based on MSR flags and jump to next block.
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr)); LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27); FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
MOVP2R(MEM_REG, Memory::physical_base); MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base);
BR(ARM64Reg::X0); BR(ARM64Reg::X0);
SetJumpTarget(physmem); SetJumpTarget(physmem);
MOVP2R(MEM_REG, Memory::logical_base); MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base);
BR(ARM64Reg::X0); BR(ARM64Reg::X0);
// Call JIT // Call JIT
@ -510,7 +510,7 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{1}, fprs_to_push, true); gprs_to_push & ~BitSet32{1}, fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
@ -520,8 +520,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@ -538,8 +538,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@ -556,8 +556,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@ -573,8 +573,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@ -591,7 +591,7 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{1}, fprs_to_push, true); gprs_to_push & ~BitSet32{1}, fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
@ -601,8 +601,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@ -619,8 +619,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@ -637,8 +637,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@ -654,8 +654,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@ -699,6 +699,7 @@ void JitArm64::GenerateQuantizedStores()
// X0 is the scale // X0 is the scale
// X1 is the address // X1 is the address
// X2 is a temporary // X2 is a temporary
// X3 is a temporary if jo.fastmem_arena is false (used in EmitBackpatchRoutine)
// X30 is LR // X30 is LR
// Q0 is the register // Q0 is the register
// Q1 is a temporary // Q1 is a temporary
@ -707,6 +708,8 @@ void JitArm64::GenerateQuantizedStores()
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2}; BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2};
if (!jo.memcheck) if (!jo.memcheck)
gprs_to_push &= ~BitSet32{1}; gprs_to_push &= ~BitSet32{1};
if (!jo.fastmem_arena)
gprs_to_push &= ~BitSet32{3};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
ARM64FloatEmitter float_emit(this); ARM64FloatEmitter float_emit(this);
@ -718,8 +721,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -737,8 +740,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -756,8 +759,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -774,8 +777,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -792,8 +795,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -803,8 +806,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -822,8 +825,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -841,8 +844,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -859,8 +862,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -877,8 +880,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
gprs_to_push, fprs_to_push, true); fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }

View File

@ -197,11 +197,12 @@ TranslateResult JitCache_TranslateAddress(u32 address);
constexpr int BAT_INDEX_SHIFT = 17; constexpr int BAT_INDEX_SHIFT = 17;
constexpr u32 BAT_PAGE_SIZE = 1 << BAT_INDEX_SHIFT; constexpr u32 BAT_PAGE_SIZE = 1 << BAT_INDEX_SHIFT;
constexpr u32 BAT_PAGE_COUNT = 1 << (32 - BAT_INDEX_SHIFT);
constexpr u32 BAT_MAPPED_BIT = 0x1; constexpr u32 BAT_MAPPED_BIT = 0x1;
constexpr u32 BAT_PHYSICAL_BIT = 0x2; constexpr u32 BAT_PHYSICAL_BIT = 0x2;
constexpr u32 BAT_WI_BIT = 0x4; constexpr u32 BAT_WI_BIT = 0x4;
constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x7); constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x7);
using BatTable = std::array<u32, 1 << (32 - BAT_INDEX_SHIFT)>; // 128 KB using BatTable = std::array<u32, BAT_PAGE_COUNT>; // 128 KB
extern BatTable ibat_table; extern BatTable ibat_table;
extern BatTable dbat_table; extern BatTable dbat_table;
inline bool TranslateBatAddess(const BatTable& bat_table, u32* address, bool* wi) inline bool TranslateBatAddess(const BatTable& bat_table, u32* address, bool* wi)