Merge pull request #12320 from JosJuice/jitarm64-mmu-order

PowerPC: Unify "FromJit" MMU functions
This commit is contained in:
Mai 2023-11-30 18:34:32 -05:00 committed by GitHub
commit 5f7e9d3bf1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 213 additions and 277 deletions

View File

@ -445,7 +445,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
BitSet32 registersInUse = CallerSavedRegistersInUse(); BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionPR(PowerPC::ClearDCacheLineFromJit64, &m_mmu, RSCRATCH); ABI_CallFunctionPR(PowerPC::ClearDCacheLineFromJit, &m_mmu, RSCRATCH);
ABI_PopRegistersAndAdjustStack(registersInUse, 0); ABI_PopRegistersAndAdjustStack(registersInUse, 0);
if (emit_fast_path) if (emit_fast_path)

View File

@ -400,16 +400,16 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
switch (accessSize) switch (accessSize)
{ {
case 64: case 64:
ABI_CallFunctionPR(PowerPC::ReadU64FromJit64, &m_jit.m_mmu, reg_addr); ABI_CallFunctionPR(PowerPC::ReadU64FromJit, &m_jit.m_mmu, reg_addr);
break; break;
case 32: case 32:
ABI_CallFunctionPR(PowerPC::ReadU32FromJit64, &m_jit.m_mmu, reg_addr); ABI_CallFunctionPR(PowerPC::ReadU32FromJit, &m_jit.m_mmu, reg_addr);
break; break;
case 16: case 16:
ABI_CallFunctionPR(PowerPC::ReadU16ZXFromJit64, &m_jit.m_mmu, reg_addr); ABI_CallFunctionPR(PowerPC::ReadU16FromJit, &m_jit.m_mmu, reg_addr);
break; break;
case 8: case 8:
ABI_CallFunctionPR(PowerPC::ReadU8ZXFromJit64, &m_jit.m_mmu, reg_addr); ABI_CallFunctionPR(PowerPC::ReadU8FromJit, &m_jit.m_mmu, reg_addr);
break; break;
} }
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
@ -464,16 +464,16 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc
switch (accessSize) switch (accessSize)
{ {
case 64: case 64:
ABI_CallFunctionPC(PowerPC::ReadU64FromJit64, &m_jit.m_mmu, address); ABI_CallFunctionPC(PowerPC::ReadU64FromJit, &m_jit.m_mmu, address);
break; break;
case 32: case 32:
ABI_CallFunctionPC(PowerPC::ReadU32FromJit64, &m_jit.m_mmu, address); ABI_CallFunctionPC(PowerPC::ReadU32FromJit, &m_jit.m_mmu, address);
break; break;
case 16: case 16:
ABI_CallFunctionPC(PowerPC::ReadU16ZXFromJit64, &m_jit.m_mmu, address); ABI_CallFunctionPC(PowerPC::ReadU16FromJit, &m_jit.m_mmu, address);
break; break;
case 8: case 8:
ABI_CallFunctionPC(PowerPC::ReadU8ZXFromJit64, &m_jit.m_mmu, address); ABI_CallFunctionPC(PowerPC::ReadU8FromJit, &m_jit.m_mmu, address);
break; break;
} }
ABI_PopRegistersAndAdjustStack(registersInUse, 0); ABI_PopRegistersAndAdjustStack(registersInUse, 0);
@ -586,19 +586,19 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
switch (accessSize) switch (accessSize)
{ {
case 64: case 64:
ABI_CallFunctionPRR(swap ? PowerPC::WriteU64FromJit64 : PowerPC::WriteU64SwapFromJit64, ABI_CallFunctionPRR(swap ? PowerPC::WriteU64FromJit : PowerPC::WriteU64SwapFromJit,
&m_jit.m_mmu, reg, reg_addr); &m_jit.m_mmu, reg, reg_addr);
break; break;
case 32: case 32:
ABI_CallFunctionPRR(swap ? PowerPC::WriteU32FromJit64 : PowerPC::WriteU32SwapFromJit64, ABI_CallFunctionPRR(swap ? PowerPC::WriteU32FromJit : PowerPC::WriteU32SwapFromJit,
&m_jit.m_mmu, reg, reg_addr); &m_jit.m_mmu, reg, reg_addr);
break; break;
case 16: case 16:
ABI_CallFunctionPRR(swap ? PowerPC::WriteU16FromJit64 : PowerPC::WriteU16SwapFromJit64, ABI_CallFunctionPRR(swap ? PowerPC::WriteU16FromJit : PowerPC::WriteU16SwapFromJit,
&m_jit.m_mmu, reg, reg_addr); &m_jit.m_mmu, reg, reg_addr);
break; break;
case 8: case 8:
ABI_CallFunctionPRR(PowerPC::WriteU8FromJit64, &m_jit.m_mmu, reg, reg_addr); ABI_CallFunctionPRR(PowerPC::WriteU8FromJit, &m_jit.m_mmu, reg, reg_addr);
break; break;
} }
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
@ -668,16 +668,16 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
switch (accessSize) switch (accessSize)
{ {
case 64: case 64:
ABI_CallFunctionPAC(64, PowerPC::WriteU64FromJit64, &m_jit.m_mmu, arg, address); ABI_CallFunctionPAC(64, PowerPC::WriteU64FromJit, &m_jit.m_mmu, arg, address);
break; break;
case 32: case 32:
ABI_CallFunctionPAC(32, PowerPC::WriteU32FromJit64, &m_jit.m_mmu, arg, address); ABI_CallFunctionPAC(32, PowerPC::WriteU32FromJit, &m_jit.m_mmu, arg, address);
break; break;
case 16: case 16:
ABI_CallFunctionPAC(16, PowerPC::WriteU16FromJit64, &m_jit.m_mmu, arg, address); ABI_CallFunctionPAC(16, PowerPC::WriteU16FromJit, &m_jit.m_mmu, arg, address);
break; break;
case 8: case 8:
ABI_CallFunctionPAC(8, PowerPC::WriteU8FromJit64, &m_jit.m_mmu, arg, address); ABI_CallFunctionPAC(8, PowerPC::WriteU8FromJit, &m_jit.m_mmu, arg, address);
break; break;
} }
ABI_PopRegistersAndAdjustStack(registersInUse, 0); ABI_PopRegistersAndAdjustStack(registersInUse, 0);

View File

@ -242,21 +242,23 @@ protected:
// Registers used: // Registers used:
// //
// addr scratch // addr scratch
// Store: X1 X0 // Store: X2 X1
// Load: X0 // Load: X1
// Zero 256: X0 X30 // Zero 256: X1 X30
// Store float: X1 Q0 // Store float: X2 Q0
// Load float: X0 // Load float: X1
// //
// If mode == AlwaysFastAccess, the addr argument can be any register. // If mode == AlwaysFastAccess, the addr argument can be any register.
// Otherwise it must be the register listed in the table above. // Otherwise it must be the register listed in the table above.
// //
// Additional scratch registers are used in the following situations: // Additional scratch registers are used in the following situations:
// //
// emitting_routine && mode == Auto: X2 // emitting_routine && mode == Auto: X0
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3 // emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3
// mode != AlwaysSlowAccess && !jo.fastmem: X2 // mode != AlwaysSlowAccess && !jo.fastmem: X0
// !emitting_routine && mode != AlwaysFastAccess && jo.memcheck &&
// (flags & BackPatchInfo::FLAG_LOAD): X0
// !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30 // !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30
// !emitting_routine && mode == Auto && jo.fastmem: X30 // !emitting_routine && mode == Auto && jo.fastmem: X30
// //

View File

@ -79,7 +79,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30; const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30;
memory_base = EncodeRegTo64(temp); memory_base = EncodeRegTo64(temp);
memory_offset = ARM64Reg::W2; memory_offset = ARM64Reg::W0;
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT); LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
LDR(memory_base, MEM_REG, ArithOption(temp, true)); LDR(memory_base, MEM_REG, ArithOption(temp, true));
@ -95,8 +95,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
} }
else if (emit_slow_access && emitting_routine) else if (emit_slow_access && emitting_routine)
{ {
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3; const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W1 : ARM64Reg::W3;
const ARM64Reg temp2 = ARM64Reg::W2; const ARM64Reg temp2 = ARM64Reg::W0;
slow_access_fixup = CheckIfSafeAddress(addr, temp1, temp2); slow_access_fixup = CheckIfSafeAddress(addr, temp1, temp2);
} }
@ -117,7 +117,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
} }
else if (flags & BackPatchInfo::FLAG_STORE) else if (flags & BackPatchInfo::FLAG_STORE)
{ {
ARM64Reg temp = ARM64Reg::W0; ARM64Reg temp = ARM64Reg::W1;
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true); temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
@ -169,7 +169,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (slow_access_fixup) if (slow_access_fixup)
SetJumpTarget(*slow_access_fixup); SetJumpTarget(*slow_access_fixup);
const ARM64Reg temp_gpr = flags & BackPatchInfo::FLAG_LOAD ? ARM64Reg::W30 : ARM64Reg::W0; const ARM64Reg temp_gpr = ARM64Reg::W1;
const int temp_gpr_index = DecodeReg(temp_gpr); const int temp_gpr_index = DecodeReg(temp_gpr);
BitSet32 gprs_to_push_early = {}; BitSet32 gprs_to_push_early = {};
@ -181,8 +181,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
// If we're already pushing one register in the first PushRegisters call, we can push a // If we're already pushing one register in the first PushRegisters call, we can push a
// second one for free. Let's do so, since it might save one instruction in the second // second one for free. Let's do so, since it might save one instruction in the second
// PushRegisters call. (Do not do this for caller-saved registers which may be in the register // PushRegisters call. (Do not do this for caller-saved registers which may be in the register
// cache, or else EmitMemcheck will not be able to flush the register cache correctly!) // cache, or WriteConditionalExceptionExit won't be able to flush the register cache correctly!)
if (gprs_to_push & gprs_to_push_early) if ((gprs_to_push & gprs_to_push_early).Count() & 1)
gprs_to_push_early[30] = true; gprs_to_push_early[30] = true;
ABI_PushRegisters(gprs_to_push & gprs_to_push_early); ABI_PushRegisters(gprs_to_push & gprs_to_push_early);
@ -203,7 +203,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (flags & BackPatchInfo::FLAG_STORE) if (flags & BackPatchInfo::FLAG_STORE)
{ {
ARM64Reg src_reg = RS; ARM64Reg src_reg = RS;
const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X0 : ARM64Reg::W0; const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X1 : ARM64Reg::W1;
if (flags & BackPatchInfo::FLAG_FLOAT) if (flags & BackPatchInfo::FLAG_FLOAT)
{ {
@ -226,41 +226,38 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (access_size == 64) if (access_size == 64)
{ {
ABI_CallFunction(reverse ? &PowerPC::WriteU64SwapFromJitArm64 : ABI_CallFunction(reverse ? &PowerPC::WriteU64SwapFromJit : &PowerPC::WriteU64FromJit,
&PowerPC::WriteU64FromJitArm64, &m_mmu, src_reg, ARM64Reg::W2);
src_reg, ARM64Reg::W1, &m_mmu);
} }
else if (access_size == 32) else if (access_size == 32)
{ {
ABI_CallFunction(reverse ? &PowerPC::WriteU32SwapFromJitArm64 : ABI_CallFunction(reverse ? &PowerPC::WriteU32SwapFromJit : &PowerPC::WriteU32FromJit,
&PowerPC::WriteU32FromJitArm64, &m_mmu, src_reg, ARM64Reg::W2);
src_reg, ARM64Reg::W1, &m_mmu);
} }
else if (access_size == 16) else if (access_size == 16)
{ {
ABI_CallFunction(reverse ? &PowerPC::WriteU16SwapFromJitArm64 : ABI_CallFunction(reverse ? &PowerPC::WriteU16SwapFromJit : &PowerPC::WriteU16FromJit,
&PowerPC::WriteU16FromJitArm64, &m_mmu, src_reg, ARM64Reg::W2);
src_reg, ARM64Reg::W1, &m_mmu);
} }
else else
{ {
ABI_CallFunction(&PowerPC::WriteU8FromJitArm64, src_reg, ARM64Reg::W1, &m_mmu); ABI_CallFunction(&PowerPC::WriteU8FromJit, &m_mmu, src_reg, ARM64Reg::W2);
} }
} }
else if (flags & BackPatchInfo::FLAG_ZERO_256) else if (flags & BackPatchInfo::FLAG_ZERO_256)
{ {
ABI_CallFunction(&PowerPC::ClearDCacheLineFromJitArm64, ARM64Reg::W0, &m_mmu); ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, ARM64Reg::W1);
} }
else else
{ {
if (access_size == 64) if (access_size == 64)
ABI_CallFunction(&PowerPC::ReadU64FromJitArm64, ARM64Reg::W0, &m_mmu); ABI_CallFunction(&PowerPC::ReadU64FromJit, &m_mmu, ARM64Reg::W1);
else if (access_size == 32) else if (access_size == 32)
ABI_CallFunction(&PowerPC::ReadU32FromJitArm64, ARM64Reg::W0, &m_mmu); ABI_CallFunction(&PowerPC::ReadU32FromJit, &m_mmu, ARM64Reg::W1);
else if (access_size == 16) else if (access_size == 16)
ABI_CallFunction(&PowerPC::ReadU16FromJitArm64, ARM64Reg::W0, &m_mmu); ABI_CallFunction(&PowerPC::ReadU16FromJit, &m_mmu, ARM64Reg::W1);
else else
ABI_CallFunction(&PowerPC::ReadU8FromJitArm64, ARM64Reg::W0, &m_mmu); ABI_CallFunction(&PowerPC::ReadU8FromJit, &m_mmu, ARM64Reg::W1);
} }
m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30); m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);

View File

@ -27,9 +27,9 @@ using namespace Arm64Gen;
void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update) void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update)
{ {
// We want to make sure to not get LR as a temp register // We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem) if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W2); gpr.Lock(ARM64Reg::W0);
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false); gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
ARM64Reg dest_reg = gpr.R(dest); ARM64Reg dest_reg = gpr.R(dest);
@ -42,7 +42,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (offsetReg != -1 && !gpr.IsImm(offsetReg)) if (offsetReg != -1 && !gpr.IsImm(offsetReg))
off_reg = gpr.R(offsetReg); off_reg = gpr.R(offsetReg);
ARM64Reg addr_reg = ARM64Reg::W0; ARM64Reg addr_reg = ARM64Reg::W1;
u32 imm_addr = 0; u32 imm_addr = 0;
bool is_immediate = false; bool is_immediate = false;
@ -123,9 +123,9 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update) if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.memcheck) if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0; regs_in_use[DecodeReg(dest_reg)] = 0;
@ -142,7 +142,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
} }
else if (mmio_address) else if (mmio_address)
{ {
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0; regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
regs_in_use[DecodeReg(dest_reg)] = 0; regs_in_use[DecodeReg(dest_reg)] = 0;
MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, regs_in_use, MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, regs_in_use,
@ -165,18 +165,18 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
MOV(gpr.R(addr), addr_reg); MOV(gpr.R(addr), addr_reg);
} }
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem) if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W2); gpr.Unlock(ARM64Reg::W0);
} }
void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset,
bool update) bool update)
{ {
// We want to make sure to not get LR as a temp register // We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem) if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2); gpr.Lock(ARM64Reg::W0);
ARM64Reg RS = gpr.R(value); ARM64Reg RS = gpr.R(value);
@ -188,7 +188,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
if (dest != -1 && !gpr.IsImm(dest)) if (dest != -1 && !gpr.IsImm(dest))
reg_dest = gpr.R(dest); reg_dest = gpr.R(dest);
ARM64Reg addr_reg = ARM64Reg::W1; ARM64Reg addr_reg = ARM64Reg::W2;
u32 imm_addr = 0; u32 imm_addr = 0;
bool is_immediate = false; bool is_immediate = false;
@ -269,11 +269,11 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!update || early_update) if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0; u32 mmio_address = 0;
@ -290,19 +290,19 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
else else
accessSize = 8; accessSize = 8;
LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); LDR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
ARM64Reg temp = ARM64Reg::W1; ARM64Reg temp = ARM64Reg::W1;
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true); temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
if (accessSize == 32) if (accessSize == 32)
STR(IndexType::Post, temp, ARM64Reg::X0, 4); STR(IndexType::Post, temp, ARM64Reg::X2, 4);
else if (accessSize == 16) else if (accessSize == 16)
STRH(IndexType::Post, temp, ARM64Reg::X0, 2); STRH(IndexType::Post, temp, ARM64Reg::X2, 2);
else else
STRB(IndexType::Post, temp, ARM64Reg::X0, 1); STRB(IndexType::Post, temp, ARM64Reg::X2, 1);
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); STR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
js.fifoBytesSinceCheck += accessSize >> 3; js.fifoBytesSinceCheck += accessSize >> 3;
} }
@ -313,8 +313,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
} }
else if (mmio_address) else if (mmio_address)
{ {
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0; regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
regs_in_use[DecodeReg(RS)] = 0; regs_in_use[DecodeReg(RS)] = 0;
MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
@ -334,9 +334,9 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
MOV(gpr.R(dest), addr_reg); MOV(gpr.R(dest), addr_reg);
} }
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem) if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2); gpr.Unlock(ARM64Reg::W0);
} }
FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp, FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
@ -518,13 +518,13 @@ void JitArm64::lmw(UGeckoInstruction inst)
u32 a = inst.RA, d = inst.RD; u32 a = inst.RA, d = inst.RD;
s32 offset = inst.SIMM_16; s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem) if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W2); gpr.Lock(ARM64Reg::W0);
// MMU games make use of a >= d despite this being invalid according to the PEM. // MMU games make use of a >= d despite this being invalid according to the PEM.
// If a >= d occurs, we must make sure to not re-read rA after starting doing the loads. // If a >= d occurs, we must make sure to not re-read rA after starting doing the loads.
ARM64Reg addr_reg = ARM64Reg::W0; ARM64Reg addr_reg = ARM64Reg::W1;
bool a_is_addr_base_reg = false; bool a_is_addr_base_reg = false;
if (!a) if (!a)
MOVI2R(addr_reg, offset); MOVI2R(addr_reg, offset);
@ -554,8 +554,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(addr_reg)] = 0; regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.fastmem) if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.memcheck) if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0; regs_in_use[DecodeReg(dest_reg)] = 0;
@ -566,9 +566,9 @@ void JitArm64::lmw(UGeckoInstruction inst)
ASSERT(dest_reg == gpr.R(i)); ASSERT(dest_reg == gpr.R(i));
} }
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem) if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W2); gpr.Unlock(ARM64Reg::W0);
if (!a_is_addr_base_reg) if (!a_is_addr_base_reg)
gpr.Unlock(addr_base_reg); gpr.Unlock(addr_base_reg);
} }
@ -581,11 +581,11 @@ void JitArm64::stmw(UGeckoInstruction inst)
u32 a = inst.RA, s = inst.RS; u32 a = inst.RA, s = inst.RS;
s32 offset = inst.SIMM_16; s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem) if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2); gpr.Lock(ARM64Reg::W0);
ARM64Reg addr_reg = ARM64Reg::W1; ARM64Reg addr_reg = ARM64Reg::W2;
bool a_is_addr_base_reg = false; bool a_is_addr_base_reg = false;
if (!a) if (!a)
MOVI2R(addr_reg, offset); MOVI2R(addr_reg, offset);
@ -613,18 +613,18 @@ void JitArm64::stmw(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
regs_in_use[DecodeReg(addr_reg)] = 0; regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.fastmem) if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use, EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
fprs_in_use); fprs_in_use);
} }
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem) if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2); gpr.Unlock(ARM64Reg::W0);
if (!a_is_addr_base_reg) if (!a_is_addr_base_reg)
gpr.Unlock(addr_base_reg); gpr.Unlock(addr_base_reg);
} }
@ -821,17 +821,17 @@ void JitArm64::dcbz(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB; int a = inst.RA, b = inst.RB;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem) if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2); gpr.Lock(ARM64Reg::W0);
Common::ScopeGuard register_guard([&] { Common::ScopeGuard register_guard([&] {
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem) if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2); gpr.Unlock(ARM64Reg::W0);
}); });
constexpr ARM64Reg addr_reg = ARM64Reg::W0; constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg temp_reg = ARM64Reg::W30; constexpr ARM64Reg temp_reg = ARM64Reg::W30;
// HACK: Don't clear any memory in the [0x8000'0000, 0x8000'8000) region. // HACK: Don't clear any memory in the [0x8000'0000, 0x8000'8000) region.
@ -895,11 +895,11 @@ void JitArm64::dcbz(UGeckoInstruction inst)
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0; gprs_to_push[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem) if (!jo.fastmem)
gprs_to_push[DecodeReg(ARM64Reg::W2)] = 0; gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0, EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1,
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
if (using_dcbz_hack) if (using_dcbz_hack)

View File

@ -77,13 +77,13 @@ void JitArm64::lfXX(UGeckoInstruction inst)
const RegType type = const RegType type =
(flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle; (flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0); fpr.Lock(ARM64Reg::Q0);
if (!jo.fastmem) if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W2); gpr.Lock(ARM64Reg::W0);
const ARM64Reg VD = fpr.RW(inst.FD, type, false); const ARM64Reg VD = fpr.RW(inst.FD, type, false);
ARM64Reg addr_reg = ARM64Reg::W0; ARM64Reg addr_reg = ARM64Reg::W1;
if (update) if (update)
{ {
@ -167,9 +167,9 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update) if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (!jo.memcheck) if (!jo.memcheck)
fprs_in_use[DecodeReg(VD)] = 0; fprs_in_use[DecodeReg(VD)] = 0;
@ -192,10 +192,10 @@ void JitArm64::lfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg); MOV(gpr.R(a), addr_reg);
} }
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0); fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem) if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W2); gpr.Unlock(ARM64Reg::W0);
} }
void JitArm64::stfXX(UGeckoInstruction inst) void JitArm64::stfXX(UGeckoInstruction inst)
@ -278,11 +278,11 @@ void JitArm64::stfXX(UGeckoInstruction inst)
V0 = single_reg; V0 = single_reg;
} }
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem) if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2); gpr.Lock(ARM64Reg::W0);
ARM64Reg addr_reg = ARM64Reg::W1; ARM64Reg addr_reg = ARM64Reg::W2;
if (update) if (update)
{ {
@ -369,11 +369,11 @@ void JitArm64::stfXX(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!update || early_update) if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (is_immediate) if (is_immediate)
@ -386,7 +386,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
else else
accessSize = 32; accessSize = 32;
LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); LDR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
if (flags & BackPatchInfo::FLAG_SIZE_64) if (flags & BackPatchInfo::FLAG_SIZE_64)
m_float_emit.REV64(8, ARM64Reg::Q0, V0); m_float_emit.REV64(8, ARM64Reg::Q0, V0);
@ -394,9 +394,9 @@ void JitArm64::stfXX(UGeckoInstruction inst)
m_float_emit.REV32(8, ARM64Reg::D0, V0); m_float_emit.REV32(8, ARM64Reg::D0, V0);
m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0, m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0,
ARM64Reg::X0, accessSize >> 3); ARM64Reg::X2, accessSize >> 3);
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); STR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
js.fifoBytesSinceCheck += accessSize >> 3; js.fifoBytesSinceCheck += accessSize >> 3;
} }
else if (m_mmu.IsOptimizableRAMAddress(imm_addr)) else if (m_mmu.IsOptimizableRAMAddress(imm_addr))
@ -428,8 +428,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
if (want_single && !have_single) if (want_single && !have_single)
fpr.Unlock(V0); fpr.Unlock(V0);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0); fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem) if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2); gpr.Unlock(ARM64Reg::W0);
} }

View File

@ -37,21 +37,21 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
const int i = indexed ? inst.Ix : inst.I; const int i = indexed ? inst.Ix : inst.I;
const int w = indexed ? inst.Wx : inst.W; const int w = indexed ? inst.Wx : inst.W;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0); fpr.Lock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize) if (!js.assumeNoPairedQuantize)
{ {
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3); gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
fpr.Lock(ARM64Reg::Q1); fpr.Lock(ARM64Reg::Q1);
} }
else if (!jo.fastmem) else if (jo.memcheck || !jo.fastmem)
{ {
gpr.Lock(ARM64Reg::W2); gpr.Lock(ARM64Reg::W0);
} }
constexpr ARM64Reg addr_reg = ARM64Reg::W0; constexpr ARM64Reg type_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1; constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg type_reg = ARM64Reg::W2; constexpr ARM64Reg scale_reg = ARM64Reg::W2;
ARM64Reg VS = fpr.RW(inst.RS, RegType::Single, false); ARM64Reg VS = fpr.RW(inst.RS, RegType::Single, false);
if (inst.RA || update) // Always uses the register on update if (inst.RA || update) // Always uses the register on update
@ -85,9 +85,9 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
// Wipe the registers we are using as temporaries // Wipe the registers we are using as temporaries
if (!update || early_update) if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (jo.memcheck || !jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
if (!jo.memcheck) if (!jo.memcheck)
fprs_in_use[DecodeReg(VS)] = 0; fprs_in_use[DecodeReg(VS)] = 0;
@ -134,16 +134,16 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
MOV(gpr.R(inst.RA), addr_reg); MOV(gpr.R(inst.RA), addr_reg);
} }
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0); fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize) if (!js.assumeNoPairedQuantize)
{ {
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3); gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q1); fpr.Unlock(ARM64Reg::Q1);
} }
else if (!jo.fastmem) else if (jo.memcheck || !jo.fastmem)
{ {
gpr.Unlock(ARM64Reg::W2); gpr.Unlock(ARM64Reg::W0);
} }
} }
@ -203,15 +203,15 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
} }
} }
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize || !jo.fastmem) if (!js.assumeNoPairedQuantize || jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W2); gpr.Lock(ARM64Reg::W0);
if (!js.assumeNoPairedQuantize && !jo.fastmem) if (!js.assumeNoPairedQuantize && !jo.fastmem)
gpr.Lock(ARM64Reg::W3); gpr.Lock(ARM64Reg::W3);
constexpr ARM64Reg scale_reg = ARM64Reg::W0; constexpr ARM64Reg type_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1; constexpr ARM64Reg scale_reg = ARM64Reg::W1;
constexpr ARM64Reg type_reg = ARM64Reg::W2; constexpr ARM64Reg addr_reg = ARM64Reg::W2;
if (inst.RA || update) // Always uses the register on update if (inst.RA || update) // Always uses the register on update
{ {
@ -243,11 +243,11 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
// Wipe the registers we are using as temporaries // Wipe the registers we are using as temporaries
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!update || early_update) if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false; gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w) if (!w)
@ -283,10 +283,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
if (js.assumeNoPairedQuantize && !have_single) if (js.assumeNoPairedQuantize && !have_single)
fpr.Unlock(VS); fpr.Unlock(VS);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0); fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize || !jo.fastmem) if (!js.assumeNoPairedQuantize || jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W2); gpr.Unlock(ARM64Reg::W0);
if (!js.assumeNoPairedQuantize && !jo.fastmem) if (!js.assumeNoPairedQuantize && !jo.fastmem)
gpr.Unlock(ARM64Reg::W3); gpr.Unlock(ARM64Reg::W3);
if (!js.assumeNoPairedQuantize) if (!js.assumeNoPairedQuantize)

View File

@ -502,18 +502,19 @@ void JitArm64::GenerateFPRF(bool single)
void JitArm64::GenerateQuantizedLoads() void JitArm64::GenerateQuantizedLoads()
{ {
// X0 is the address // X0 is a temporary
// X1 is the scale // X1 is the address
// X2 is a temporary // X2 is the scale
// X3 is a temporary (used in EmitBackpatchRoutine) // X3 is a temporary (used in EmitBackpatchRoutine)
// X30 is LR // X30 is LR
// Q0 is the return // Q0 is the return
// Q1 is a temporary // Q1 is a temporary
ARM64Reg addr_reg = ARM64Reg::X0; ARM64Reg temp_reg = ARM64Reg::X0;
ARM64Reg scale_reg = ARM64Reg::X1; ARM64Reg addr_reg = ARM64Reg::X1;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{2, 3}; ARM64Reg scale_reg = ARM64Reg::X2;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 3};
if (!jo.memcheck) if (!jo.memcheck)
gprs_to_push &= ~BitSet32{0}; gprs_to_push &= ~BitSet32{1};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
ARM64FloatEmitter float_emit(this); ARM64FloatEmitter float_emit(this);
@ -526,7 +527,7 @@ void JitArm64::GenerateQuantizedLoads()
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{1}, fprs_to_push, true); gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -542,8 +543,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X0, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X0, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
@ -560,8 +561,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
@ -577,8 +578,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
@ -594,8 +595,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
@ -607,7 +608,7 @@ void JitArm64::GenerateQuantizedLoads()
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{1}, fprs_to_push, true); gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -623,8 +624,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
@ -641,8 +642,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
@ -658,8 +659,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
@ -675,8 +676,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
@ -711,18 +712,19 @@ void JitArm64::GenerateQuantizedLoads()
void JitArm64::GenerateQuantizedStores() void JitArm64::GenerateQuantizedStores()
{ {
// X0 is the scale // X0 is a temporary
// X1 is the address // X1 is the scale
// X2 is a temporary // X2 is the address
// X3 is a temporary if jo.fastmem is false (used in EmitBackpatchRoutine) // X3 is a temporary if jo.fastmem is false (used in EmitBackpatchRoutine)
// X30 is LR // X30 is LR
// Q0 is the register // Q0 is the register
// Q1 is a temporary // Q1 is a temporary
ARM64Reg scale_reg = ARM64Reg::X0; ARM64Reg temp_reg = ARM64Reg::X0;
ARM64Reg addr_reg = ARM64Reg::X1; ARM64Reg scale_reg = ARM64Reg::X1;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2}; ARM64Reg addr_reg = ARM64Reg::X2;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1};
if (!jo.memcheck) if (!jo.memcheck)
gprs_to_push &= ~BitSet32{1}; gprs_to_push &= ~BitSet32{2};
if (!jo.fastmem) if (!jo.fastmem)
gprs_to_push &= ~BitSet32{3}; gprs_to_push &= ~BitSet32{3};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
@ -743,8 +745,8 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storePairedU8 = GetCodePtr(); const u8* storePairedU8 = GetCodePtr();
{ {
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
@ -762,8 +764,8 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storePairedS8 = GetCodePtr(); const u8* storePairedS8 = GetCodePtr();
{ {
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
@ -781,8 +783,8 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storePairedU16 = GetCodePtr(); const u8* storePairedU16 = GetCodePtr();
{ {
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
@ -799,8 +801,8 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie
{ {
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
@ -828,8 +830,8 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii
{ {
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
@ -847,8 +849,8 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storeSingleS8 = GetCodePtr(); const u8* storeSingleS8 = GetCodePtr();
{ {
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
@ -866,8 +868,8 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii
{ {
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
@ -884,8 +886,8 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storeSingleS16 = GetCodePtr(); const u8* storeSingleS16 = GetCodePtr();
{ {
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);

View File

@ -1681,100 +1681,51 @@ std::optional<u32> MMU::GetTranslatedAddress(u32 address)
return std::optional<u32>(result.address); return std::optional<u32>(result.address);
} }
void ClearDCacheLineFromJit64(MMU& mmu, u32 address) void ClearDCacheLineFromJit(MMU& mmu, u32 address)
{ {
mmu.ClearDCacheLine(address); mmu.ClearDCacheLine(address);
} }
u32 ReadU8ZXFromJit64(MMU& mmu, u32 address) u32 ReadU8FromJit(MMU& mmu, u32 address)
{ {
return mmu.Read_U8(address); return mmu.Read_U8(address);
} }
u32 ReadU16ZXFromJit64(MMU& mmu, u32 address) u32 ReadU16FromJit(MMU& mmu, u32 address)
{ {
return mmu.Read_U16(address); return mmu.Read_U16(address);
} }
u32 ReadU32FromJit64(MMU& mmu, u32 address) u32 ReadU32FromJit(MMU& mmu, u32 address)
{ {
return mmu.Read_U32(address); return mmu.Read_U32(address);
} }
u64 ReadU64FromJit64(MMU& mmu, u32 address) u64 ReadU64FromJit(MMU& mmu, u32 address)
{ {
return mmu.Read_U64(address); return mmu.Read_U64(address);
} }
void WriteU8FromJit64(MMU& mmu, u32 var, u32 address) void WriteU8FromJit(MMU& mmu, u32 var, u32 address)
{ {
mmu.Write_U8(var, address); mmu.Write_U8(var, address);
} }
void WriteU16FromJit64(MMU& mmu, u32 var, u32 address) void WriteU16FromJit(MMU& mmu, u32 var, u32 address)
{ {
mmu.Write_U16(var, address); mmu.Write_U16(var, address);
} }
void WriteU32FromJit64(MMU& mmu, u32 var, u32 address) void WriteU32FromJit(MMU& mmu, u32 var, u32 address)
{ {
mmu.Write_U32(var, address); mmu.Write_U32(var, address);
} }
void WriteU64FromJit64(MMU& mmu, u64 var, u32 address) void WriteU64FromJit(MMU& mmu, u64 var, u32 address)
{ {
mmu.Write_U64(var, address); mmu.Write_U64(var, address);
} }
void WriteU16SwapFromJit64(MMU& mmu, u32 var, u32 address) void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address)
{ {
mmu.Write_U16_Swap(var, address); mmu.Write_U16_Swap(var, address);
} }
void WriteU32SwapFromJit64(MMU& mmu, u32 var, u32 address) void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address)
{ {
mmu.Write_U32_Swap(var, address); mmu.Write_U32_Swap(var, address);
} }
void WriteU64SwapFromJit64(MMU& mmu, u64 var, u32 address) void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address)
{
mmu.Write_U64_Swap(var, address);
}
void ClearDCacheLineFromJitArm64(u32 address, MMU& mmu)
{
mmu.ClearDCacheLine(address);
}
u8 ReadU8FromJitArm64(u32 address, MMU& mmu)
{
return mmu.Read_U8(address);
}
u16 ReadU16FromJitArm64(u32 address, MMU& mmu)
{
return mmu.Read_U16(address);
}
u32 ReadU32FromJitArm64(u32 address, MMU& mmu)
{
return mmu.Read_U32(address);
}
u64 ReadU64FromJitArm64(u32 address, MMU& mmu)
{
return mmu.Read_U64(address);
}
void WriteU8FromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U8(var, address);
}
void WriteU16FromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U16(var, address);
}
void WriteU32FromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U32(var, address);
}
void WriteU64FromJitArm64(u64 var, u32 address, MMU& mmu)
{
mmu.Write_U64(var, address);
}
void WriteU16SwapFromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U16_Swap(var, address);
}
void WriteU32SwapFromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U32_Swap(var, address);
}
void WriteU64SwapFromJitArm64(u64 var, u32 address, MMU& mmu)
{ {
mmu.Write_U64_Swap(var, address); mmu.Write_U64_Swap(var, address);
} }

View File

@ -328,32 +328,16 @@ private:
BatTable m_dbat_table; BatTable m_dbat_table;
}; };
void ClearDCacheLineFromJit64(MMU& mmu, u32 address); void ClearDCacheLineFromJit(MMU& mmu, u32 address);
u32 ReadU8ZXFromJit64(MMU& mmu, u32 address); // Returns zero-extended 32bit value u32 ReadU8FromJit(MMU& mmu, u32 address); // Returns zero-extended 32bit value
u32 ReadU16ZXFromJit64(MMU& mmu, u32 address); // Returns zero-extended 32bit value u32 ReadU16FromJit(MMU& mmu, u32 address); // Returns zero-extended 32bit value
u32 ReadU32FromJit64(MMU& mmu, u32 address); u32 ReadU32FromJit(MMU& mmu, u32 address);
u64 ReadU64FromJit64(MMU& mmu, u32 address); u64 ReadU64FromJit(MMU& mmu, u32 address);
void WriteU8FromJit64(MMU& mmu, u32 var, u32 address); void WriteU8FromJit(MMU& mmu, u32 var, u32 address);
void WriteU16FromJit64(MMU& mmu, u32 var, u32 address); void WriteU16FromJit(MMU& mmu, u32 var, u32 address);
void WriteU32FromJit64(MMU& mmu, u32 var, u32 address); void WriteU32FromJit(MMU& mmu, u32 var, u32 address);
void WriteU64FromJit64(MMU& mmu, u64 var, u32 address); void WriteU64FromJit(MMU& mmu, u64 var, u32 address);
void WriteU16SwapFromJit64(MMU& mmu, u32 var, u32 address); void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address);
void WriteU32SwapFromJit64(MMU& mmu, u32 var, u32 address); void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address);
void WriteU64SwapFromJit64(MMU& mmu, u64 var, u32 address); void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address);
// The JitArm64 function that calls these has very specific register allocation that's difficult to
// change, so we have a separate set of functions here for it. This can probably be refactored in
// the future.
void ClearDCacheLineFromJitArm64(u32 address, MMU& mmu);
u8 ReadU8FromJitArm64(u32 address, MMU& mmu);
u16 ReadU16FromJitArm64(u32 address, MMU& mmu);
u32 ReadU32FromJitArm64(u32 address, MMU& mmu);
u64 ReadU64FromJitArm64(u32 address, MMU& mmu);
void WriteU8FromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU16FromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU32FromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU64FromJitArm64(u64 var, u32 address, MMU& mmu);
void WriteU16SwapFromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU32SwapFromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU64SwapFromJitArm64(u64 var, u32 address, MMU& mmu);
} // namespace PowerPC } // namespace PowerPC