Merge pull request #10745 from JosJuice/softmmu
JitArm64: Implement "soft MMU"
This commit is contained in:
commit
59e8aacf42
|
@ -47,6 +47,8 @@ namespace Memory
|
||||||
// Store the MemArena here
|
// Store the MemArena here
|
||||||
u8* physical_base = nullptr;
|
u8* physical_base = nullptr;
|
||||||
u8* logical_base = nullptr;
|
u8* logical_base = nullptr;
|
||||||
|
u8* physical_page_mappings_base = nullptr;
|
||||||
|
u8* logical_page_mappings_base = nullptr;
|
||||||
static bool is_fastmem_arena_initialized = false;
|
static bool is_fastmem_arena_initialized = false;
|
||||||
|
|
||||||
// The MemArena class
|
// The MemArena class
|
||||||
|
@ -223,6 +225,9 @@ static std::array<PhysicalMemoryRegion, 4> s_physical_regions;
|
||||||
|
|
||||||
static std::vector<LogicalMemoryView> logical_mapped_entries;
|
static std::vector<LogicalMemoryView> logical_mapped_entries;
|
||||||
|
|
||||||
|
static std::array<void*, PowerPC::BAT_PAGE_COUNT> s_physical_page_mappings;
|
||||||
|
static std::array<void*, PowerPC::BAT_PAGE_COUNT> s_logical_page_mappings;
|
||||||
|
|
||||||
void Init()
|
void Init()
|
||||||
{
|
{
|
||||||
const auto get_mem1_size = [] {
|
const auto get_mem1_size = [] {
|
||||||
|
@ -280,6 +285,8 @@ void Init()
|
||||||
}
|
}
|
||||||
g_arena.GrabSHMSegment(mem_size);
|
g_arena.GrabSHMSegment(mem_size);
|
||||||
|
|
||||||
|
s_physical_page_mappings.fill(nullptr);
|
||||||
|
|
||||||
// Create an anonymous view of the physical memory
|
// Create an anonymous view of the physical memory
|
||||||
for (const PhysicalMemoryRegion& region : s_physical_regions)
|
for (const PhysicalMemoryRegion& region : s_physical_regions)
|
||||||
{
|
{
|
||||||
|
@ -295,7 +302,16 @@ void Init()
|
||||||
region.physical_address, region.size);
|
region.physical_address, region.size);
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (u32 i = 0; i < region.size; i += PowerPC::BAT_PAGE_SIZE)
|
||||||
|
{
|
||||||
|
const size_t index = (i + region.physical_address) >> PowerPC::BAT_INDEX_SHIFT;
|
||||||
|
s_physical_page_mappings[index] = *region.out_pointer + i;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
physical_page_mappings_base = reinterpret_cast<u8*>(s_physical_page_mappings.data());
|
||||||
|
logical_page_mappings_base = reinterpret_cast<u8*>(s_logical_page_mappings.data());
|
||||||
|
|
||||||
InitMMIO(wii);
|
InitMMIO(wii);
|
||||||
|
|
||||||
|
@ -347,14 +363,14 @@ bool InitFastmemArena()
|
||||||
|
|
||||||
void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
|
void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
|
||||||
{
|
{
|
||||||
if (!is_fastmem_arena_initialized)
|
|
||||||
return;
|
|
||||||
|
|
||||||
for (auto& entry : logical_mapped_entries)
|
for (auto& entry : logical_mapped_entries)
|
||||||
{
|
{
|
||||||
g_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size);
|
g_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size);
|
||||||
}
|
}
|
||||||
logical_mapped_entries.clear();
|
logical_mapped_entries.clear();
|
||||||
|
|
||||||
|
s_logical_page_mappings.fill(nullptr);
|
||||||
|
|
||||||
for (u32 i = 0; i < dbat_table.size(); ++i)
|
for (u32 i = 0; i < dbat_table.size(); ++i)
|
||||||
{
|
{
|
||||||
if (dbat_table[i] & PowerPC::BAT_PHYSICAL_BIT)
|
if (dbat_table[i] & PowerPC::BAT_PHYSICAL_BIT)
|
||||||
|
@ -375,6 +391,9 @@ void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
|
||||||
if (intersection_start < intersection_end)
|
if (intersection_start < intersection_end)
|
||||||
{
|
{
|
||||||
// Found an overlapping region; map it.
|
// Found an overlapping region; map it.
|
||||||
|
|
||||||
|
if (is_fastmem_arena_initialized)
|
||||||
|
{
|
||||||
u32 position = physical_region.shm_position + intersection_start - mapping_address;
|
u32 position = physical_region.shm_position + intersection_start - mapping_address;
|
||||||
u8* base = logical_base + logical_address + intersection_start - translated_address;
|
u8* base = logical_base + logical_address + intersection_start - translated_address;
|
||||||
u32 mapped_size = intersection_end - intersection_start;
|
u32 mapped_size = intersection_end - intersection_start;
|
||||||
|
@ -382,13 +401,18 @@ void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
|
||||||
void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base);
|
void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base);
|
||||||
if (!mapped_pointer)
|
if (!mapped_pointer)
|
||||||
{
|
{
|
||||||
PanicAlertFmt("Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} "
|
PanicAlertFmt(
|
||||||
|
"Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} "
|
||||||
"(size 0x{:08X}) into logical fastmem region at 0x{:08X}.",
|
"(size 0x{:08X}) into logical fastmem region at 0x{:08X}.",
|
||||||
intersection_start, mapped_size, logical_address);
|
intersection_start, mapped_size, logical_address);
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
logical_mapped_entries.push_back({mapped_pointer, mapped_size});
|
logical_mapped_entries.push_back({mapped_pointer, mapped_size});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s_logical_page_mappings[i] =
|
||||||
|
*physical_region.out_pointer + intersection_start - mapping_address;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,16 +21,18 @@ class Mapping;
|
||||||
namespace Memory
|
namespace Memory
|
||||||
{
|
{
|
||||||
// Base is a pointer to the base of the memory map. Yes, some MMU tricks
|
// Base is a pointer to the base of the memory map. Yes, some MMU tricks
|
||||||
// are used to set up a full GC or Wii memory map in process memory. on
|
// are used to set up a full GC or Wii memory map in process memory.
|
||||||
// 32-bit, you have to mask your offsets with 0x3FFFFFFF. This means that
|
|
||||||
// some things are mirrored too many times, but eh... it works.
|
|
||||||
|
|
||||||
// In 64-bit, this might point to "high memory" (above the 32-bit limit),
|
// In 64-bit, this might point to "high memory" (above the 32-bit limit),
|
||||||
// so be sure to load it into a 64-bit register.
|
// so be sure to load it into a 64-bit register.
|
||||||
extern u8* physical_base;
|
extern u8* physical_base;
|
||||||
extern u8* logical_base;
|
extern u8* logical_base;
|
||||||
|
|
||||||
// These are guaranteed to point to "low memory" addresses (sub-32-bit).
|
// This page table is used for a "soft MMU" implementation when
|
||||||
|
// setting up the full memory map in process memory isn't possible.
|
||||||
|
extern u8* physical_page_mappings_base;
|
||||||
|
extern u8* logical_page_mappings_base;
|
||||||
|
|
||||||
|
// The actual memory used for backing the memory map.
|
||||||
extern u8* m_pRAM;
|
extern u8* m_pRAM;
|
||||||
extern u8* m_pEXRAM;
|
extern u8* m_pEXRAM;
|
||||||
extern u8* m_pL1Cache;
|
extern u8* m_pL1Cache;
|
||||||
|
|
|
@ -215,10 +215,27 @@ protected:
|
||||||
// Dump a memory range of code
|
// Dump a memory range of code
|
||||||
void DumpCode(const u8* start, const u8* end);
|
void DumpCode(const u8* start, const u8* end);
|
||||||
|
|
||||||
|
// This enum is used for selecting an implementation of EmitBackpatchRoutine.
|
||||||
|
enum class MemAccessMode
|
||||||
|
{
|
||||||
|
// Always calls the slow C++ code. For performance reasons, should generally only be used if
|
||||||
|
// the guest address is known in advance and IsOptimizableRAMAddress returns false for it.
|
||||||
|
AlwaysSafe,
|
||||||
|
// Only emits fast access code. Must only be used if the guest address is known in advance
|
||||||
|
// and IsOptimizableRAMAddress returns true for it, otherwise Dolphin will likely crash!
|
||||||
|
AlwaysUnsafe,
|
||||||
|
// Best in most cases. If backpatching is possible (!emitting_routine && jo.fastmem_arena):
|
||||||
|
// Tries to run fast access code, and if that fails, uses backpatching to replace the code
|
||||||
|
// with a call to the slow C++ code. Otherwise: Checks whether the fast access code will work,
|
||||||
|
// then branches to either the fast access code or the slow C++ code.
|
||||||
|
Auto,
|
||||||
|
};
|
||||||
|
|
||||||
// This is the core routine for accessing emulated memory, with support for
|
// This is the core routine for accessing emulated memory, with support for
|
||||||
// many different kinds of loads and stores as well as fastmem backpatching.
|
// many different kinds of loads and stores as well as fastmem/backpatching.
|
||||||
//
|
//
|
||||||
// Registers used:
|
// Registers used:
|
||||||
|
//
|
||||||
// addr scratch
|
// addr scratch
|
||||||
// Store: X1 X0
|
// Store: X1 X0
|
||||||
// Load: X0
|
// Load: X0
|
||||||
|
@ -226,15 +243,21 @@ protected:
|
||||||
// Store float: X1 Q0
|
// Store float: X1 Q0
|
||||||
// Load float: X0
|
// Load float: X0
|
||||||
//
|
//
|
||||||
// If fastmem && !do_farcode, the addr argument can be any register.
|
// If mode == AlwaysUnsafe, the addr argument can be any register.
|
||||||
// Otherwise it must be the register listed in the table above.
|
// Otherwise it must be the register listed in the table above.
|
||||||
//
|
//
|
||||||
// Additional scratch registers are used in the following situations:
|
// Additional scratch registers are used in the following situations:
|
||||||
// fastmem && do_farcode && emitting_routine: X2
|
//
|
||||||
// fastmem && do_farcode && emitting_routine && (flags & BackPatchInfo::FLAG_STORE): X0
|
// emitting_routine && mode == Auto: X2
|
||||||
// fastmem && do_farcode && emitting_routine && !(flags & BackPatchInfo::FLAG_STORE): X3
|
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
|
||||||
// !fastmem || do_farcode: X30 (plus lots more unless you set gprs_to_push and fprs_to_push)
|
// emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X3
|
||||||
void EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, Arm64Gen::ARM64Reg RS,
|
// mode != AlwaysSafe && !jo.fastmem_arena: X2
|
||||||
|
// !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30
|
||||||
|
// !emitting_routine && mode == Auto && jo.fastmem_arena: X30
|
||||||
|
//
|
||||||
|
// Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push
|
||||||
|
// may be clobbered if mode != AlwaysUnsafe.
|
||||||
|
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
|
||||||
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),
|
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),
|
||||||
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false);
|
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false);
|
||||||
|
|
||||||
|
|
|
@ -54,19 +54,44 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx)
|
||||||
ERROR_LOG_FMT(DYNA_REC, "Full block: {}", pc_memory);
|
ERROR_LOG_FMT(DYNA_REC, "Full block: {}", pc_memory);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, ARM64Reg RS,
|
void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr,
|
||||||
ARM64Reg addr, BitSet32 gprs_to_push, BitSet32 fprs_to_push,
|
BitSet32 gprs_to_push, BitSet32 fprs_to_push,
|
||||||
bool emitting_routine)
|
bool emitting_routine)
|
||||||
{
|
{
|
||||||
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
|
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
|
||||||
|
|
||||||
|
const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe;
|
||||||
|
const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe;
|
||||||
|
|
||||||
bool in_far_code = false;
|
bool in_far_code = false;
|
||||||
const u8* fastmem_start = GetCodePtr();
|
const u8* fastmem_start = GetCodePtr();
|
||||||
std::optional<FixupBranch> slowmem_fixup;
|
std::optional<FixupBranch> slowmem_fixup;
|
||||||
|
|
||||||
if (fastmem)
|
if (emit_fastmem)
|
||||||
{
|
{
|
||||||
if (do_farcode && emitting_routine)
|
ARM64Reg memory_base = MEM_REG;
|
||||||
|
ARM64Reg memory_offset = addr;
|
||||||
|
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
{
|
||||||
|
const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30;
|
||||||
|
|
||||||
|
memory_base = EncodeRegTo64(temp);
|
||||||
|
memory_offset = ARM64Reg::W2;
|
||||||
|
|
||||||
|
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
|
||||||
|
LDR(memory_base, MEM_REG, ArithOption(temp, true));
|
||||||
|
|
||||||
|
if (emit_slowmem)
|
||||||
|
{
|
||||||
|
FixupBranch pass = CBNZ(memory_base);
|
||||||
|
slowmem_fixup = B();
|
||||||
|
SetJumpTarget(pass);
|
||||||
|
}
|
||||||
|
|
||||||
|
AND(memory_offset, addr, LogicalImm(PowerPC::BAT_PAGE_SIZE - 1, 64));
|
||||||
|
}
|
||||||
|
else if (emit_slowmem && emitting_routine)
|
||||||
{
|
{
|
||||||
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3;
|
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3;
|
||||||
const ARM64Reg temp2 = ARM64Reg::W2;
|
const ARM64Reg temp2 = ARM64Reg::W2;
|
||||||
|
@ -79,11 +104,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
|
||||||
ARM64Reg temp = ARM64Reg::D0;
|
ARM64Reg temp = ARM64Reg::D0;
|
||||||
temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true);
|
temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true);
|
||||||
|
|
||||||
m_float_emit.STR(access_size, temp, MEM_REG, addr);
|
m_float_emit.STR(access_size, temp, memory_base, memory_offset);
|
||||||
}
|
}
|
||||||
else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT))
|
else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT))
|
||||||
{
|
{
|
||||||
m_float_emit.LDR(access_size, EncodeRegToDouble(RS), MEM_REG, addr);
|
m_float_emit.LDR(access_size, EncodeRegToDouble(RS), memory_base, memory_offset);
|
||||||
|
|
||||||
ByteswapAfterLoad(this, &m_float_emit, EncodeRegToDouble(RS), EncodeRegToDouble(RS), flags,
|
ByteswapAfterLoad(this, &m_float_emit, EncodeRegToDouble(RS), EncodeRegToDouble(RS), flags,
|
||||||
true, false);
|
true, false);
|
||||||
|
@ -94,44 +119,44 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
|
||||||
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
|
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
|
||||||
|
|
||||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||||
STR(temp, MEM_REG, addr);
|
STR(temp, memory_base, memory_offset);
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||||
STRH(temp, MEM_REG, addr);
|
STRH(temp, memory_base, memory_offset);
|
||||||
else
|
else
|
||||||
STRB(temp, MEM_REG, addr);
|
STRB(temp, memory_base, memory_offset);
|
||||||
}
|
}
|
||||||
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
||||||
{
|
{
|
||||||
// This literally only stores 32bytes of zeros to the target address
|
// This literally only stores 32bytes of zeros to the target address
|
||||||
ARM64Reg temp = ARM64Reg::X30;
|
ARM64Reg temp = ARM64Reg::X30;
|
||||||
ADD(temp, addr, MEM_REG);
|
ADD(temp, memory_base, memory_offset);
|
||||||
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0);
|
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0);
|
||||||
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16);
|
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||||
LDR(RS, MEM_REG, addr);
|
LDR(RS, memory_base, memory_offset);
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||||
LDRH(RS, MEM_REG, addr);
|
LDRH(RS, memory_base, memory_offset);
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_8)
|
else if (flags & BackPatchInfo::FLAG_SIZE_8)
|
||||||
LDRB(RS, MEM_REG, addr);
|
LDRB(RS, memory_base, memory_offset);
|
||||||
|
|
||||||
ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false);
|
ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const u8* fastmem_end = GetCodePtr();
|
const u8* fastmem_end = GetCodePtr();
|
||||||
|
|
||||||
if (!fastmem || do_farcode)
|
if (emit_slowmem)
|
||||||
{
|
{
|
||||||
const bool memcheck = jo.memcheck && !emitting_routine;
|
const bool memcheck = jo.memcheck && !emitting_routine;
|
||||||
|
|
||||||
if (fastmem && do_farcode)
|
if (emit_fastmem)
|
||||||
{
|
{
|
||||||
in_far_code = true;
|
in_far_code = true;
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
|
|
||||||
if (!emitting_routine)
|
if (jo.fastmem_arena && !emitting_routine)
|
||||||
{
|
{
|
||||||
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
|
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
|
||||||
fastmem_area->fastmem_code = fastmem_start;
|
fastmem_area->fastmem_code = fastmem_start;
|
||||||
|
@ -261,7 +286,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
|
||||||
|
|
||||||
if (in_far_code)
|
if (in_far_code)
|
||||||
{
|
{
|
||||||
if (emitting_routine)
|
if (slowmem_fixup)
|
||||||
{
|
{
|
||||||
FixupBranch done = B();
|
FixupBranch done = B();
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
|
|
|
@ -27,6 +27,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
||||||
{
|
{
|
||||||
// We want to make sure to not get LR as a temp register
|
// We want to make sure to not get LR as a temp register
|
||||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Lock(ARM64Reg::W2);
|
||||||
|
|
||||||
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
|
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
|
||||||
ARM64Reg dest_reg = gpr.R(dest);
|
ARM64Reg dest_reg = gpr.R(dest);
|
||||||
|
@ -121,6 +123,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
||||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||||
if (!update || early_update)
|
if (!update || early_update)
|
||||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
|
||||||
if (!jo.memcheck)
|
if (!jo.memcheck)
|
||||||
regs_in_use[DecodeReg(dest_reg)] = 0;
|
regs_in_use[DecodeReg(dest_reg)] = 0;
|
||||||
|
|
||||||
|
@ -129,10 +133,11 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
||||||
if (is_immediate)
|
if (is_immediate)
|
||||||
mmio_address = PowerPC::IsOptimizableMMIOAccess(imm_addr, access_size);
|
mmio_address = PowerPC::IsOptimizableMMIOAccess(imm_addr, access_size);
|
||||||
|
|
||||||
if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
|
if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
|
||||||
{
|
{
|
||||||
set_addr_reg_if_needed();
|
set_addr_reg_if_needed();
|
||||||
EmitBackpatchRoutine(flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0));
|
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, dest_reg, XA, regs_in_use,
|
||||||
|
fprs_in_use);
|
||||||
}
|
}
|
||||||
else if (mmio_address)
|
else if (mmio_address)
|
||||||
{
|
{
|
||||||
|
@ -142,7 +147,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
set_addr_reg_if_needed();
|
set_addr_reg_if_needed();
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, XA, regs_in_use, fprs_in_use);
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use);
|
||||||
}
|
}
|
||||||
|
|
||||||
gpr.BindToRegister(dest, false, true);
|
gpr.BindToRegister(dest, false, true);
|
||||||
|
@ -156,6 +161,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
||||||
}
|
}
|
||||||
|
|
||||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
|
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Unlock(ARM64Reg::W2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset,
|
void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset,
|
||||||
|
@ -163,6 +170,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
||||||
{
|
{
|
||||||
// We want to make sure to not get LR as a temp register
|
// We want to make sure to not get LR as a temp register
|
||||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Lock(ARM64Reg::W2);
|
||||||
|
|
||||||
ARM64Reg RS = gpr.R(value);
|
ARM64Reg RS = gpr.R(value);
|
||||||
|
|
||||||
|
@ -258,6 +267,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
||||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
||||||
if (!update || early_update)
|
if (!update || early_update)
|
||||||
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
|
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
|
||||||
|
|
||||||
u32 access_size = BackPatchInfo::GetFlagSize(flags);
|
u32 access_size = BackPatchInfo::GetFlagSize(flags);
|
||||||
u32 mmio_address = 0;
|
u32 mmio_address = 0;
|
||||||
|
@ -290,10 +301,10 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
||||||
|
|
||||||
js.fifoBytesSinceCheck += accessSize >> 3;
|
js.fifoBytesSinceCheck += accessSize >> 3;
|
||||||
}
|
}
|
||||||
else if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
|
else if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
|
||||||
{
|
{
|
||||||
set_addr_reg_if_needed();
|
set_addr_reg_if_needed();
|
||||||
EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0));
|
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, RS, XA, regs_in_use, fprs_in_use);
|
||||||
}
|
}
|
||||||
else if (mmio_address)
|
else if (mmio_address)
|
||||||
{
|
{
|
||||||
|
@ -303,7 +314,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
set_addr_reg_if_needed();
|
set_addr_reg_if_needed();
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, RS, XA, regs_in_use, fprs_in_use);
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (update && !early_update)
|
if (update && !early_update)
|
||||||
|
@ -314,6 +325,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
||||||
}
|
}
|
||||||
|
|
||||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Unlock(ARM64Reg::W2);
|
||||||
}
|
}
|
||||||
|
|
||||||
FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
|
FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
|
||||||
|
@ -496,6 +509,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
|
||||||
s32 offset = inst.SIMM_16;
|
s32 offset = inst.SIMM_16;
|
||||||
|
|
||||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Lock(ARM64Reg::W2);
|
||||||
|
|
||||||
// MMU games make use of a >= d despite this being invalid according to the PEM.
|
// MMU games make use of a >= d despite this being invalid according to the PEM.
|
||||||
// Because of this, make sure to not re-read rA after starting doing the loads.
|
// Because of this, make sure to not re-read rA after starting doing the loads.
|
||||||
|
@ -521,13 +536,15 @@ void JitArm64::lmw(UGeckoInstruction inst)
|
||||||
|
|
||||||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
|
||||||
if (i == 31)
|
if (i == 31)
|
||||||
regs_in_use[DecodeReg(addr_reg)] = 0;
|
regs_in_use[DecodeReg(addr_reg)] = 0;
|
||||||
if (!jo.memcheck)
|
if (!jo.memcheck)
|
||||||
regs_in_use[DecodeReg(dest_reg)] = 0;
|
regs_in_use[DecodeReg(dest_reg)] = 0;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, EncodeRegTo64(addr_reg),
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), regs_in_use,
|
||||||
regs_in_use, fprs_in_use);
|
fprs_in_use);
|
||||||
|
|
||||||
gpr.BindToRegister(i, false, true);
|
gpr.BindToRegister(i, false, true);
|
||||||
ASSERT(dest_reg == gpr.R(i));
|
ASSERT(dest_reg == gpr.R(i));
|
||||||
|
@ -537,6 +554,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
|
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Unlock(ARM64Reg::W2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::stmw(UGeckoInstruction inst)
|
void JitArm64::stmw(UGeckoInstruction inst)
|
||||||
|
@ -548,6 +567,8 @@ void JitArm64::stmw(UGeckoInstruction inst)
|
||||||
s32 offset = inst.SIMM_16;
|
s32 offset = inst.SIMM_16;
|
||||||
|
|
||||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Lock(ARM64Reg::W2);
|
||||||
|
|
||||||
ARM64Reg addr_reg = ARM64Reg::W1;
|
ARM64Reg addr_reg = ARM64Reg::W1;
|
||||||
if (a)
|
if (a)
|
||||||
|
@ -571,17 +592,21 @@ void JitArm64::stmw(UGeckoInstruction inst)
|
||||||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
|
||||||
if (i == 31)
|
if (i == 31)
|
||||||
regs_in_use[DecodeReg(addr_reg)] = 0;
|
regs_in_use[DecodeReg(addr_reg)] = 0;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, src_reg, EncodeRegTo64(addr_reg),
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
|
||||||
regs_in_use, fprs_in_use);
|
fprs_in_use);
|
||||||
|
|
||||||
if (i != 31)
|
if (i != 31)
|
||||||
ADD(addr_reg, addr_reg, 4);
|
ADD(addr_reg, addr_reg, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Unlock(ARM64Reg::W2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::dcbx(UGeckoInstruction inst)
|
void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
|
@ -770,8 +795,14 @@ void JitArm64::dcbz(UGeckoInstruction inst)
|
||||||
int a = inst.RA, b = inst.RB;
|
int a = inst.RA, b = inst.RB;
|
||||||
|
|
||||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Lock(ARM64Reg::W2);
|
||||||
|
|
||||||
Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); });
|
Common::ScopeGuard register_guard([&] {
|
||||||
|
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Unlock(ARM64Reg::W2);
|
||||||
|
});
|
||||||
|
|
||||||
constexpr ARM64Reg addr_reg = ARM64Reg::W0;
|
constexpr ARM64Reg addr_reg = ARM64Reg::W0;
|
||||||
constexpr ARM64Reg temp_reg = ARM64Reg::W30;
|
constexpr ARM64Reg temp_reg = ARM64Reg::W30;
|
||||||
|
@ -838,8 +869,10 @@ void JitArm64::dcbz(UGeckoInstruction inst)
|
||||||
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
|
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
|
||||||
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
||||||
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
|
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gprs_to_push[DecodeReg(ARM64Reg::W2)] = 0;
|
||||||
|
|
||||||
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, jo.fastmem, jo.fastmem, ARM64Reg::W0,
|
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0,
|
||||||
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
|
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
|
||||||
|
|
||||||
if (using_dcbz_hack)
|
if (using_dcbz_hack)
|
||||||
|
|
|
@ -79,6 +79,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
||||||
|
|
||||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
||||||
fpr.Lock(ARM64Reg::Q0);
|
fpr.Lock(ARM64Reg::Q0);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Lock(ARM64Reg::W2);
|
||||||
|
|
||||||
const ARM64Reg VD = fpr.RW(inst.FD, type, false);
|
const ARM64Reg VD = fpr.RW(inst.FD, type, false);
|
||||||
ARM64Reg addr_reg = ARM64Reg::W0;
|
ARM64Reg addr_reg = ARM64Reg::W0;
|
||||||
|
@ -166,17 +168,19 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
||||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||||
if (!update || early_update)
|
if (!update || early_update)
|
||||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
|
||||||
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
|
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
|
||||||
if (!jo.memcheck)
|
if (!jo.memcheck)
|
||||||
fprs_in_use[DecodeReg(VD)] = 0;
|
fprs_in_use[DecodeReg(VD)] = 0;
|
||||||
|
|
||||||
if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
|
if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
|
||||||
{
|
{
|
||||||
EmitBackpatchRoutine(flags, true, false, VD, XA, BitSet32(0), BitSet32(0));
|
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, VD, XA, regs_in_use, fprs_in_use);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VD, XA, regs_in_use, fprs_in_use);
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use);
|
||||||
}
|
}
|
||||||
|
|
||||||
const ARM64Reg VD_again = fpr.RW(inst.FD, type, true);
|
const ARM64Reg VD_again = fpr.RW(inst.FD, type, true);
|
||||||
|
@ -190,6 +194,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
||||||
|
|
||||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
|
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
|
||||||
fpr.Unlock(ARM64Reg::Q0);
|
fpr.Unlock(ARM64Reg::Q0);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Unlock(ARM64Reg::W2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::stfXX(UGeckoInstruction inst)
|
void JitArm64::stfXX(UGeckoInstruction inst)
|
||||||
|
@ -273,6 +279,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Lock(ARM64Reg::W2);
|
||||||
|
|
||||||
ARM64Reg addr_reg = ARM64Reg::W1;
|
ARM64Reg addr_reg = ARM64Reg::W1;
|
||||||
|
|
||||||
|
@ -364,6 +372,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
||||||
if (!update || early_update)
|
if (!update || early_update)
|
||||||
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
|
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
|
||||||
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
|
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
|
||||||
|
|
||||||
if (is_immediate)
|
if (is_immediate)
|
||||||
|
@ -389,21 +399,21 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||||
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
|
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
|
||||||
js.fifoBytesSinceCheck += accessSize >> 3;
|
js.fifoBytesSinceCheck += accessSize >> 3;
|
||||||
}
|
}
|
||||||
else if (jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(imm_addr))
|
else if (PowerPC::IsOptimizableRAMAddress(imm_addr))
|
||||||
{
|
{
|
||||||
set_addr_reg_if_needed();
|
set_addr_reg_if_needed();
|
||||||
EmitBackpatchRoutine(flags, true, false, V0, XA, BitSet32(0), BitSet32(0));
|
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, V0, XA, regs_in_use, fprs_in_use);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
set_addr_reg_if_needed();
|
set_addr_reg_if_needed();
|
||||||
EmitBackpatchRoutine(flags, false, false, V0, XA, regs_in_use, fprs_in_use);
|
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSafe, V0, XA, regs_in_use, fprs_in_use);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
set_addr_reg_if_needed();
|
set_addr_reg_if_needed();
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, V0, XA, regs_in_use, fprs_in_use);
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (update && !early_update)
|
if (update && !early_update)
|
||||||
|
@ -418,4 +428,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||||
|
|
||||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||||
fpr.Unlock(ARM64Reg::Q0);
|
fpr.Unlock(ARM64Reg::Q0);
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gpr.Unlock(ARM64Reg::W2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,6 +44,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
|
||||||
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
|
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
|
||||||
fpr.Lock(ARM64Reg::Q1);
|
fpr.Lock(ARM64Reg::Q1);
|
||||||
}
|
}
|
||||||
|
else if (!jo.fastmem_arena)
|
||||||
|
{
|
||||||
|
gpr.Lock(ARM64Reg::W2);
|
||||||
|
}
|
||||||
|
|
||||||
constexpr ARM64Reg addr_reg = ARM64Reg::W0;
|
constexpr ARM64Reg addr_reg = ARM64Reg::W0;
|
||||||
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
|
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
|
||||||
|
@ -82,6 +86,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
|
||||||
// Wipe the registers we are using as temporaries
|
// Wipe the registers we are using as temporaries
|
||||||
if (!update || early_update)
|
if (!update || early_update)
|
||||||
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
|
||||||
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
|
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
|
||||||
if (!jo.memcheck)
|
if (!jo.memcheck)
|
||||||
fprs_in_use[DecodeReg(VS)] = 0;
|
fprs_in_use[DecodeReg(VS)] = 0;
|
||||||
|
@ -90,7 +96,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
|
||||||
if (!w)
|
if (!w)
|
||||||
flags |= BackPatchInfo::FLAG_PAIR;
|
flags |= BackPatchInfo::FLAG_PAIR;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use,
|
||||||
fprs_in_use);
|
fprs_in_use);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -130,6 +136,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
|
||||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
|
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
|
||||||
fpr.Unlock(ARM64Reg::Q1);
|
fpr.Unlock(ARM64Reg::Q1);
|
||||||
}
|
}
|
||||||
|
else if (!jo.fastmem_arena)
|
||||||
|
{
|
||||||
|
gpr.Unlock(ARM64Reg::W2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::psq_stXX(UGeckoInstruction inst)
|
void JitArm64::psq_stXX(UGeckoInstruction inst)
|
||||||
|
@ -189,8 +199,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||||
if (!js.assumeNoPairedQuantize)
|
if (!js.assumeNoPairedQuantize || !jo.fastmem_arena)
|
||||||
gpr.Lock(ARM64Reg::W2);
|
gpr.Lock(ARM64Reg::W2);
|
||||||
|
if (!js.assumeNoPairedQuantize && !jo.fastmem_arena)
|
||||||
|
gpr.Lock(ARM64Reg::W3);
|
||||||
|
|
||||||
constexpr ARM64Reg scale_reg = ARM64Reg::W0;
|
constexpr ARM64Reg scale_reg = ARM64Reg::W0;
|
||||||
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
|
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
|
||||||
|
@ -229,12 +241,14 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
|
||||||
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
||||||
if (!update || early_update)
|
if (!update || early_update)
|
||||||
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
|
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
|
||||||
|
|
||||||
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
|
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
|
||||||
if (!w)
|
if (!w)
|
||||||
flags |= BackPatchInfo::FLAG_PAIR;
|
flags |= BackPatchInfo::FLAG_PAIR;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use,
|
||||||
fprs_in_use);
|
fprs_in_use);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -261,9 +275,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
|
||||||
|
|
||||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||||
fpr.Unlock(ARM64Reg::Q0);
|
fpr.Unlock(ARM64Reg::Q0);
|
||||||
|
if (!js.assumeNoPairedQuantize || !jo.fastmem_arena)
|
||||||
|
gpr.Lock(ARM64Reg::W2);
|
||||||
|
if (!js.assumeNoPairedQuantize && !jo.fastmem_arena)
|
||||||
|
gpr.Lock(ARM64Reg::W3);
|
||||||
if (!js.assumeNoPairedQuantize)
|
if (!js.assumeNoPairedQuantize)
|
||||||
{
|
|
||||||
gpr.Unlock(ARM64Reg::W2);
|
|
||||||
fpr.Unlock(ARM64Reg::Q1);
|
fpr.Unlock(ARM64Reg::Q1);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,10 +93,10 @@ void JitArm64::GenerateAsm()
|
||||||
// set the mem_base based on MSR flags
|
// set the mem_base based on MSR flags
|
||||||
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
|
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
|
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
|
||||||
MOVP2R(MEM_REG, Memory::physical_base);
|
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base);
|
||||||
FixupBranch membaseend = B();
|
FixupBranch membaseend = B();
|
||||||
SetJumpTarget(physmem);
|
SetJumpTarget(physmem);
|
||||||
MOVP2R(MEM_REG, Memory::logical_base);
|
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base);
|
||||||
SetJumpTarget(membaseend);
|
SetJumpTarget(membaseend);
|
||||||
|
|
||||||
// iCache[(address >> 2) & iCache_Mask];
|
// iCache[(address >> 2) & iCache_Mask];
|
||||||
|
@ -141,10 +141,10 @@ void JitArm64::GenerateAsm()
|
||||||
// set the mem_base based on MSR flags and jump to next block.
|
// set the mem_base based on MSR flags and jump to next block.
|
||||||
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
|
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
|
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
|
||||||
MOVP2R(MEM_REG, Memory::physical_base);
|
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base);
|
||||||
BR(ARM64Reg::X0);
|
BR(ARM64Reg::X0);
|
||||||
SetJumpTarget(physmem);
|
SetJumpTarget(physmem);
|
||||||
MOVP2R(MEM_REG, Memory::logical_base);
|
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base);
|
||||||
BR(ARM64Reg::X0);
|
BR(ARM64Reg::X0);
|
||||||
|
|
||||||
// Call JIT
|
// Call JIT
|
||||||
|
@ -510,7 +510,7 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
||||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
|
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
|
||||||
gprs_to_push & ~BitSet32{1}, fprs_to_push, true);
|
gprs_to_push & ~BitSet32{1}, fprs_to_push, true);
|
||||||
|
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
|
@ -520,8 +520,8 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
||||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
|
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -538,8 +538,8 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
||||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
|
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -556,8 +556,8 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
||||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
|
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -573,8 +573,8 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
||||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
|
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -591,7 +591,7 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
constexpr u32 flags =
|
constexpr u32 flags =
|
||||||
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
|
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
|
||||||
gprs_to_push & ~BitSet32{1}, fprs_to_push, true);
|
gprs_to_push & ~BitSet32{1}, fprs_to_push, true);
|
||||||
|
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
|
@ -601,8 +601,8 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
constexpr u32 flags =
|
constexpr u32 flags =
|
||||||
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
|
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -619,8 +619,8 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
constexpr u32 flags =
|
constexpr u32 flags =
|
||||||
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
|
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -637,8 +637,8 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
constexpr u32 flags =
|
constexpr u32 flags =
|
||||||
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
|
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -654,8 +654,8 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
constexpr u32 flags =
|
constexpr u32 flags =
|
||||||
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
|
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -699,6 +699,7 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
// X0 is the scale
|
// X0 is the scale
|
||||||
// X1 is the address
|
// X1 is the address
|
||||||
// X2 is a temporary
|
// X2 is a temporary
|
||||||
|
// X3 is a temporary if jo.fastmem_arena is false (used in EmitBackpatchRoutine)
|
||||||
// X30 is LR
|
// X30 is LR
|
||||||
// Q0 is the register
|
// Q0 is the register
|
||||||
// Q1 is a temporary
|
// Q1 is a temporary
|
||||||
|
@ -707,6 +708,8 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2};
|
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2};
|
||||||
if (!jo.memcheck)
|
if (!jo.memcheck)
|
||||||
gprs_to_push &= ~BitSet32{1};
|
gprs_to_push &= ~BitSet32{1};
|
||||||
|
if (!jo.fastmem_arena)
|
||||||
|
gprs_to_push &= ~BitSet32{3};
|
||||||
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
|
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
|
||||||
ARM64FloatEmitter float_emit(this);
|
ARM64FloatEmitter float_emit(this);
|
||||||
|
|
||||||
|
@ -718,8 +721,8 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
||||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
|
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -737,8 +740,8 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
||||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
|
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -756,8 +759,8 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
||||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
|
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -774,8 +777,8 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
||||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
|
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -792,8 +795,8 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
||||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
|
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -803,8 +806,8 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
constexpr u32 flags =
|
constexpr u32 flags =
|
||||||
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
|
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -822,8 +825,8 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
constexpr u32 flags =
|
constexpr u32 flags =
|
||||||
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
|
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -841,8 +844,8 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
constexpr u32 flags =
|
constexpr u32 flags =
|
||||||
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
|
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -859,8 +862,8 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
constexpr u32 flags =
|
constexpr u32 flags =
|
||||||
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
|
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -877,8 +880,8 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
constexpr u32 flags =
|
constexpr u32 flags =
|
||||||
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
|
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
|
||||||
|
|
||||||
EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
|
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||||
gprs_to_push, fprs_to_push, true);
|
fprs_to_push, true);
|
||||||
|
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
|
|
@ -197,11 +197,12 @@ TranslateResult JitCache_TranslateAddress(u32 address);
|
||||||
|
|
||||||
constexpr int BAT_INDEX_SHIFT = 17;
|
constexpr int BAT_INDEX_SHIFT = 17;
|
||||||
constexpr u32 BAT_PAGE_SIZE = 1 << BAT_INDEX_SHIFT;
|
constexpr u32 BAT_PAGE_SIZE = 1 << BAT_INDEX_SHIFT;
|
||||||
|
constexpr u32 BAT_PAGE_COUNT = 1 << (32 - BAT_INDEX_SHIFT);
|
||||||
constexpr u32 BAT_MAPPED_BIT = 0x1;
|
constexpr u32 BAT_MAPPED_BIT = 0x1;
|
||||||
constexpr u32 BAT_PHYSICAL_BIT = 0x2;
|
constexpr u32 BAT_PHYSICAL_BIT = 0x2;
|
||||||
constexpr u32 BAT_WI_BIT = 0x4;
|
constexpr u32 BAT_WI_BIT = 0x4;
|
||||||
constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x7);
|
constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x7);
|
||||||
using BatTable = std::array<u32, 1 << (32 - BAT_INDEX_SHIFT)>; // 128 KB
|
using BatTable = std::array<u32, BAT_PAGE_COUNT>; // 128 KB
|
||||||
extern BatTable ibat_table;
|
extern BatTable ibat_table;
|
||||||
extern BatTable dbat_table;
|
extern BatTable dbat_table;
|
||||||
inline bool TranslateBatAddess(const BatTable& bat_table, u32* address, bool* wi)
|
inline bool TranslateBatAddess(const BatTable& bat_table, u32* address, bool* wi)
|
||||||
|
|
Loading…
Reference in New Issue