Merge pull request #12320 from JosJuice/jitarm64-mmu-order

PowerPC: Unify "FromJit" MMU functions
This commit is contained in:
Mai 2023-11-30 18:34:32 -05:00 committed by GitHub
commit 5f7e9d3bf1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 213 additions and 277 deletions

View File

@ -445,7 +445,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionPR(PowerPC::ClearDCacheLineFromJit64, &m_mmu, RSCRATCH);
ABI_CallFunctionPR(PowerPC::ClearDCacheLineFromJit, &m_mmu, RSCRATCH);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
if (emit_fast_path)

View File

@ -400,16 +400,16 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
switch (accessSize)
{
case 64:
ABI_CallFunctionPR(PowerPC::ReadU64FromJit64, &m_jit.m_mmu, reg_addr);
ABI_CallFunctionPR(PowerPC::ReadU64FromJit, &m_jit.m_mmu, reg_addr);
break;
case 32:
ABI_CallFunctionPR(PowerPC::ReadU32FromJit64, &m_jit.m_mmu, reg_addr);
ABI_CallFunctionPR(PowerPC::ReadU32FromJit, &m_jit.m_mmu, reg_addr);
break;
case 16:
ABI_CallFunctionPR(PowerPC::ReadU16ZXFromJit64, &m_jit.m_mmu, reg_addr);
ABI_CallFunctionPR(PowerPC::ReadU16FromJit, &m_jit.m_mmu, reg_addr);
break;
case 8:
ABI_CallFunctionPR(PowerPC::ReadU8ZXFromJit64, &m_jit.m_mmu, reg_addr);
ABI_CallFunctionPR(PowerPC::ReadU8FromJit, &m_jit.m_mmu, reg_addr);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
@ -464,16 +464,16 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc
switch (accessSize)
{
case 64:
ABI_CallFunctionPC(PowerPC::ReadU64FromJit64, &m_jit.m_mmu, address);
ABI_CallFunctionPC(PowerPC::ReadU64FromJit, &m_jit.m_mmu, address);
break;
case 32:
ABI_CallFunctionPC(PowerPC::ReadU32FromJit64, &m_jit.m_mmu, address);
ABI_CallFunctionPC(PowerPC::ReadU32FromJit, &m_jit.m_mmu, address);
break;
case 16:
ABI_CallFunctionPC(PowerPC::ReadU16ZXFromJit64, &m_jit.m_mmu, address);
ABI_CallFunctionPC(PowerPC::ReadU16FromJit, &m_jit.m_mmu, address);
break;
case 8:
ABI_CallFunctionPC(PowerPC::ReadU8ZXFromJit64, &m_jit.m_mmu, address);
ABI_CallFunctionPC(PowerPC::ReadU8FromJit, &m_jit.m_mmu, address);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
@ -586,19 +586,19 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
switch (accessSize)
{
case 64:
ABI_CallFunctionPRR(swap ? PowerPC::WriteU64FromJit64 : PowerPC::WriteU64SwapFromJit64,
ABI_CallFunctionPRR(swap ? PowerPC::WriteU64FromJit : PowerPC::WriteU64SwapFromJit,
&m_jit.m_mmu, reg, reg_addr);
break;
case 32:
ABI_CallFunctionPRR(swap ? PowerPC::WriteU32FromJit64 : PowerPC::WriteU32SwapFromJit64,
ABI_CallFunctionPRR(swap ? PowerPC::WriteU32FromJit : PowerPC::WriteU32SwapFromJit,
&m_jit.m_mmu, reg, reg_addr);
break;
case 16:
ABI_CallFunctionPRR(swap ? PowerPC::WriteU16FromJit64 : PowerPC::WriteU16SwapFromJit64,
ABI_CallFunctionPRR(swap ? PowerPC::WriteU16FromJit : PowerPC::WriteU16SwapFromJit,
&m_jit.m_mmu, reg, reg_addr);
break;
case 8:
ABI_CallFunctionPRR(PowerPC::WriteU8FromJit64, &m_jit.m_mmu, reg, reg_addr);
ABI_CallFunctionPRR(PowerPC::WriteU8FromJit, &m_jit.m_mmu, reg, reg_addr);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
@ -668,16 +668,16 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
switch (accessSize)
{
case 64:
ABI_CallFunctionPAC(64, PowerPC::WriteU64FromJit64, &m_jit.m_mmu, arg, address);
ABI_CallFunctionPAC(64, PowerPC::WriteU64FromJit, &m_jit.m_mmu, arg, address);
break;
case 32:
ABI_CallFunctionPAC(32, PowerPC::WriteU32FromJit64, &m_jit.m_mmu, arg, address);
ABI_CallFunctionPAC(32, PowerPC::WriteU32FromJit, &m_jit.m_mmu, arg, address);
break;
case 16:
ABI_CallFunctionPAC(16, PowerPC::WriteU16FromJit64, &m_jit.m_mmu, arg, address);
ABI_CallFunctionPAC(16, PowerPC::WriteU16FromJit, &m_jit.m_mmu, arg, address);
break;
case 8:
ABI_CallFunctionPAC(8, PowerPC::WriteU8FromJit64, &m_jit.m_mmu, arg, address);
ABI_CallFunctionPAC(8, PowerPC::WriteU8FromJit, &m_jit.m_mmu, arg, address);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, 0);

View File

@ -242,21 +242,23 @@ protected:
// Registers used:
//
// addr scratch
// Store: X1 X0
// Load: X0
// Zero 256: X0 X30
// Store float: X1 Q0
// Load float: X0
// Store: X2 X1
// Load: X1
// Zero 256: X1 X30
// Store float: X2 Q0
// Load float: X1
//
// If mode == AlwaysFastAccess, the addr argument can be any register.
// Otherwise it must be the register listed in the table above.
//
// Additional scratch registers are used in the following situations:
//
// emitting_routine && mode == Auto: X2
// emitting_routine && mode == Auto: X0
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3
// mode != AlwaysSlowAccess && !jo.fastmem: X2
// mode != AlwaysSlowAccess && !jo.fastmem: X0
// !emitting_routine && mode != AlwaysFastAccess && jo.memcheck &&
// (flags & BackPatchInfo::FLAG_LOAD): X0
// !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30
// !emitting_routine && mode == Auto && jo.fastmem: X30
//

View File

@ -79,7 +79,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30;
memory_base = EncodeRegTo64(temp);
memory_offset = ARM64Reg::W2;
memory_offset = ARM64Reg::W0;
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
LDR(memory_base, MEM_REG, ArithOption(temp, true));
@ -95,8 +95,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
}
else if (emit_slow_access && emitting_routine)
{
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3;
const ARM64Reg temp2 = ARM64Reg::W2;
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W1 : ARM64Reg::W3;
const ARM64Reg temp2 = ARM64Reg::W0;
slow_access_fixup = CheckIfSafeAddress(addr, temp1, temp2);
}
@ -117,7 +117,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
ARM64Reg temp = ARM64Reg::W0;
ARM64Reg temp = ARM64Reg::W1;
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
if (flags & BackPatchInfo::FLAG_SIZE_32)
@ -169,7 +169,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (slow_access_fixup)
SetJumpTarget(*slow_access_fixup);
const ARM64Reg temp_gpr = flags & BackPatchInfo::FLAG_LOAD ? ARM64Reg::W30 : ARM64Reg::W0;
const ARM64Reg temp_gpr = ARM64Reg::W1;
const int temp_gpr_index = DecodeReg(temp_gpr);
BitSet32 gprs_to_push_early = {};
@ -181,8 +181,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
// If we're already pushing one register in the first PushRegisters call, we can push a
// second one for free. Let's do so, since it might save one instruction in the second
// PushRegisters call. (Do not do this for caller-saved registers which may be in the register
// cache, or else EmitMemcheck will not be able to flush the register cache correctly!)
if (gprs_to_push & gprs_to_push_early)
// cache, or WriteConditionalExceptionExit won't be able to flush the register cache correctly!)
if ((gprs_to_push & gprs_to_push_early).Count() & 1)
gprs_to_push_early[30] = true;
ABI_PushRegisters(gprs_to_push & gprs_to_push_early);
@ -203,7 +203,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (flags & BackPatchInfo::FLAG_STORE)
{
ARM64Reg src_reg = RS;
const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X0 : ARM64Reg::W0;
const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X1 : ARM64Reg::W1;
if (flags & BackPatchInfo::FLAG_FLOAT)
{
@ -226,41 +226,38 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (access_size == 64)
{
ABI_CallFunction(reverse ? &PowerPC::WriteU64SwapFromJitArm64 :
&PowerPC::WriteU64FromJitArm64,
src_reg, ARM64Reg::W1, &m_mmu);
ABI_CallFunction(reverse ? &PowerPC::WriteU64SwapFromJit : &PowerPC::WriteU64FromJit,
&m_mmu, src_reg, ARM64Reg::W2);
}
else if (access_size == 32)
{
ABI_CallFunction(reverse ? &PowerPC::WriteU32SwapFromJitArm64 :
&PowerPC::WriteU32FromJitArm64,
src_reg, ARM64Reg::W1, &m_mmu);
ABI_CallFunction(reverse ? &PowerPC::WriteU32SwapFromJit : &PowerPC::WriteU32FromJit,
&m_mmu, src_reg, ARM64Reg::W2);
}
else if (access_size == 16)
{
ABI_CallFunction(reverse ? &PowerPC::WriteU16SwapFromJitArm64 :
&PowerPC::WriteU16FromJitArm64,
src_reg, ARM64Reg::W1, &m_mmu);
ABI_CallFunction(reverse ? &PowerPC::WriteU16SwapFromJit : &PowerPC::WriteU16FromJit,
&m_mmu, src_reg, ARM64Reg::W2);
}
else
{
ABI_CallFunction(&PowerPC::WriteU8FromJitArm64, src_reg, ARM64Reg::W1, &m_mmu);
ABI_CallFunction(&PowerPC::WriteU8FromJit, &m_mmu, src_reg, ARM64Reg::W2);
}
}
else if (flags & BackPatchInfo::FLAG_ZERO_256)
{
ABI_CallFunction(&PowerPC::ClearDCacheLineFromJitArm64, ARM64Reg::W0, &m_mmu);
ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, ARM64Reg::W1);
}
else
{
if (access_size == 64)
ABI_CallFunction(&PowerPC::ReadU64FromJitArm64, ARM64Reg::W0, &m_mmu);
ABI_CallFunction(&PowerPC::ReadU64FromJit, &m_mmu, ARM64Reg::W1);
else if (access_size == 32)
ABI_CallFunction(&PowerPC::ReadU32FromJitArm64, ARM64Reg::W0, &m_mmu);
ABI_CallFunction(&PowerPC::ReadU32FromJit, &m_mmu, ARM64Reg::W1);
else if (access_size == 16)
ABI_CallFunction(&PowerPC::ReadU16FromJitArm64, ARM64Reg::W0, &m_mmu);
ABI_CallFunction(&PowerPC::ReadU16FromJit, &m_mmu, ARM64Reg::W1);
else
ABI_CallFunction(&PowerPC::ReadU8FromJitArm64, ARM64Reg::W0, &m_mmu);
ABI_CallFunction(&PowerPC::ReadU8FromJit, &m_mmu, ARM64Reg::W1);
}
m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);

View File

@ -27,9 +27,9 @@ using namespace Arm64Gen;
void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W0);
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
ARM64Reg dest_reg = gpr.R(dest);
@ -42,7 +42,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (offsetReg != -1 && !gpr.IsImm(offsetReg))
off_reg = gpr.R(offsetReg);
ARM64Reg addr_reg = ARM64Reg::W0;
ARM64Reg addr_reg = ARM64Reg::W1;
u32 imm_addr = 0;
bool is_immediate = false;
@ -123,9 +123,9 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;
@ -142,7 +142,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
}
else if (mmio_address)
{
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
regs_in_use[DecodeReg(dest_reg)] = 0;
MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, regs_in_use,
@ -165,18 +165,18 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
MOV(gpr.R(addr), addr_reg);
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
}
void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset,
bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W0);
ARM64Reg RS = gpr.R(value);
@ -188,7 +188,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
if (dest != -1 && !gpr.IsImm(dest))
reg_dest = gpr.R(dest);
ARM64Reg addr_reg = ARM64Reg::W1;
ARM64Reg addr_reg = ARM64Reg::W2;
u32 imm_addr = 0;
bool is_immediate = false;
@ -269,11 +269,11 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
@ -290,19 +290,19 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
else
accessSize = 8;
LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
LDR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
ARM64Reg temp = ARM64Reg::W1;
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
if (accessSize == 32)
STR(IndexType::Post, temp, ARM64Reg::X0, 4);
STR(IndexType::Post, temp, ARM64Reg::X2, 4);
else if (accessSize == 16)
STRH(IndexType::Post, temp, ARM64Reg::X0, 2);
STRH(IndexType::Post, temp, ARM64Reg::X2, 2);
else
STRB(IndexType::Post, temp, ARM64Reg::X0, 1);
STRB(IndexType::Post, temp, ARM64Reg::X2, 1);
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
STR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
js.fifoBytesSinceCheck += accessSize >> 3;
}
@ -313,8 +313,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
}
else if (mmio_address)
{
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
regs_in_use[DecodeReg(RS)] = 0;
MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
@ -334,9 +334,9 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
MOV(gpr.R(dest), addr_reg);
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W0);
}
FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
@ -518,13 +518,13 @@ void JitArm64::lmw(UGeckoInstruction inst)
u32 a = inst.RA, d = inst.RD;
s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W0);
// MMU games make use of a >= d despite this being invalid according to the PEM.
// If a >= d occurs, we must make sure to not re-read rA after starting doing the loads.
ARM64Reg addr_reg = ARM64Reg::W0;
ARM64Reg addr_reg = ARM64Reg::W1;
bool a_is_addr_base_reg = false;
if (!a)
MOVI2R(addr_reg, offset);
@ -554,8 +554,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;
@ -566,9 +566,9 @@ void JitArm64::lmw(UGeckoInstruction inst)
ASSERT(dest_reg == gpr.R(i));
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
if (!a_is_addr_base_reg)
gpr.Unlock(addr_base_reg);
}
@ -581,11 +581,11 @@ void JitArm64::stmw(UGeckoInstruction inst)
u32 a = inst.RA, s = inst.RS;
s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W0);
ARM64Reg addr_reg = ARM64Reg::W1;
ARM64Reg addr_reg = ARM64Reg::W2;
bool a_is_addr_base_reg = false;
if (!a)
MOVI2R(addr_reg, offset);
@ -613,18 +613,18 @@ void JitArm64::stmw(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
fprs_in_use);
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W0);
if (!a_is_addr_base_reg)
gpr.Unlock(addr_base_reg);
}
@ -821,17 +821,17 @@ void JitArm64::dcbz(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W0);
Common::ScopeGuard register_guard([&] {
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W0);
});
constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg temp_reg = ARM64Reg::W30;
// HACK: Don't clear any memory in the [0x8000'0000, 0x8000'8000) region.
@ -895,11 +895,11 @@ void JitArm64::dcbz(UGeckoInstruction inst)
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
gprs_to_push[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem)
gprs_to_push[DecodeReg(ARM64Reg::W2)] = 0;
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0,
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1,
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
if (using_dcbz_hack)

View File

@ -77,13 +77,13 @@ void JitArm64::lfXX(UGeckoInstruction inst)
const RegType type =
(flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W0);
const ARM64Reg VD = fpr.RW(inst.FD, type, false);
ARM64Reg addr_reg = ARM64Reg::W0;
ARM64Reg addr_reg = ARM64Reg::W1;
if (update)
{
@ -167,9 +167,9 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VD)] = 0;
@ -192,10 +192,10 @@ void JitArm64::lfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg);
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
}
void JitArm64::stfXX(UGeckoInstruction inst)
@ -278,11 +278,11 @@ void JitArm64::stfXX(UGeckoInstruction inst)
V0 = single_reg;
}
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W0);
ARM64Reg addr_reg = ARM64Reg::W1;
ARM64Reg addr_reg = ARM64Reg::W2;
if (update)
{
@ -369,11 +369,11 @@ void JitArm64::stfXX(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (is_immediate)
@ -386,7 +386,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
else
accessSize = 32;
LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
LDR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
if (flags & BackPatchInfo::FLAG_SIZE_64)
m_float_emit.REV64(8, ARM64Reg::Q0, V0);
@ -394,9 +394,9 @@ void JitArm64::stfXX(UGeckoInstruction inst)
m_float_emit.REV32(8, ARM64Reg::D0, V0);
m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0,
ARM64Reg::X0, accessSize >> 3);
ARM64Reg::X2, accessSize >> 3);
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
STR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
js.fifoBytesSinceCheck += accessSize >> 3;
}
else if (m_mmu.IsOptimizableRAMAddress(imm_addr))
@ -428,8 +428,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
if (want_single && !have_single)
fpr.Unlock(V0);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W0);
}

View File

@ -37,21 +37,21 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
const int i = indexed ? inst.Ix : inst.I;
const int w = indexed ? inst.Wx : inst.W;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
{
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
fpr.Lock(ARM64Reg::Q1);
}
else if (!jo.fastmem)
else if (jo.memcheck || !jo.fastmem)
{
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W0);
}
constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
constexpr ARM64Reg type_reg = ARM64Reg::W2;
constexpr ARM64Reg type_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg scale_reg = ARM64Reg::W2;
ARM64Reg VS = fpr.RW(inst.RS, RegType::Single, false);
if (inst.RA || update) // Always uses the register on update
@ -85,9 +85,9 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
// Wipe the registers we are using as temporaries
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (jo.memcheck || !jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VS)] = 0;
@ -134,16 +134,16 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
MOV(gpr.R(inst.RA), addr_reg);
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q1);
}
else if (!jo.fastmem)
else if (jo.memcheck || !jo.fastmem)
{
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W0);
}
}
@ -203,15 +203,15 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
}
}
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize || !jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize || jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W0);
if (!js.assumeNoPairedQuantize && !jo.fastmem)
gpr.Lock(ARM64Reg::W3);
constexpr ARM64Reg scale_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg type_reg = ARM64Reg::W2;
constexpr ARM64Reg type_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
constexpr ARM64Reg addr_reg = ARM64Reg::W2;
if (inst.RA || update) // Always uses the register on update
{
@ -243,11 +243,11 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
// Wipe the registers we are using as temporaries
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
@ -283,10 +283,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
if (js.assumeNoPairedQuantize && !have_single)
fpr.Unlock(VS);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize || !jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
if (!js.assumeNoPairedQuantize || jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
if (!js.assumeNoPairedQuantize && !jo.fastmem)
gpr.Unlock(ARM64Reg::W3);
if (!js.assumeNoPairedQuantize)

View File

@ -502,18 +502,19 @@ void JitArm64::GenerateFPRF(bool single)
void JitArm64::GenerateQuantizedLoads()
{
// X0 is the address
// X1 is the scale
// X2 is a temporary
// X0 is a temporary
// X1 is the address
// X2 is the scale
// X3 is a temporary (used in EmitBackpatchRoutine)
// X30 is LR
// Q0 is the return
// Q1 is a temporary
ARM64Reg addr_reg = ARM64Reg::X0;
ARM64Reg scale_reg = ARM64Reg::X1;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{2, 3};
ARM64Reg temp_reg = ARM64Reg::X0;
ARM64Reg addr_reg = ARM64Reg::X1;
ARM64Reg scale_reg = ARM64Reg::X2;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 3};
if (!jo.memcheck)
gprs_to_push &= ~BitSet32{0};
gprs_to_push &= ~BitSet32{1};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
ARM64FloatEmitter float_emit(this);
@ -526,7 +527,7 @@ void JitArm64::GenerateQuantizedLoads()
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{1}, fprs_to_push, true);
gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true);
RET(ARM64Reg::X30);
}
@ -542,8 +543,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(ARM64Reg::X0, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X0, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -560,8 +561,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -577,8 +578,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -594,8 +595,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -607,7 +608,7 @@ void JitArm64::GenerateQuantizedLoads()
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{1}, fprs_to_push, true);
gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true);
RET(ARM64Reg::X30);
}
@ -623,8 +624,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -641,8 +642,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -658,8 +659,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -675,8 +676,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -711,18 +712,19 @@ void JitArm64::GenerateQuantizedLoads()
void JitArm64::GenerateQuantizedStores()
{
// X0 is the scale
// X1 is the address
// X2 is a temporary
// X0 is a temporary
// X1 is the scale
// X2 is the address
// X3 is a temporary if jo.fastmem is false (used in EmitBackpatchRoutine)
// X30 is LR
// Q0 is the register
// Q1 is a temporary
ARM64Reg scale_reg = ARM64Reg::X0;
ARM64Reg addr_reg = ARM64Reg::X1;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2};
ARM64Reg temp_reg = ARM64Reg::X0;
ARM64Reg scale_reg = ARM64Reg::X1;
ARM64Reg addr_reg = ARM64Reg::X2;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1};
if (!jo.memcheck)
gprs_to_push &= ~BitSet32{1};
gprs_to_push &= ~BitSet32{2};
if (!jo.fastmem)
gprs_to_push &= ~BitSet32{3};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
@ -743,8 +745,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedU8 = GetCodePtr();
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
@ -762,8 +764,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedS8 = GetCodePtr();
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
@ -781,8 +783,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedU16 = GetCodePtr();
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
@ -799,8 +801,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
@ -828,8 +830,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
@ -847,8 +849,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleS8 = GetCodePtr();
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
@ -866,8 +868,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
@ -884,8 +886,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleS16 = GetCodePtr();
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);

View File

@ -1681,100 +1681,51 @@ std::optional<u32> MMU::GetTranslatedAddress(u32 address)
return std::optional<u32>(result.address);
}
void ClearDCacheLineFromJit64(MMU& mmu, u32 address)
void ClearDCacheLineFromJit(MMU& mmu, u32 address)
{
mmu.ClearDCacheLine(address);
}
u32 ReadU8ZXFromJit64(MMU& mmu, u32 address)
u32 ReadU8FromJit(MMU& mmu, u32 address)
{
return mmu.Read_U8(address);
}
u32 ReadU16ZXFromJit64(MMU& mmu, u32 address)
u32 ReadU16FromJit(MMU& mmu, u32 address)
{
return mmu.Read_U16(address);
}
u32 ReadU32FromJit64(MMU& mmu, u32 address)
u32 ReadU32FromJit(MMU& mmu, u32 address)
{
return mmu.Read_U32(address);
}
u64 ReadU64FromJit64(MMU& mmu, u32 address)
u64 ReadU64FromJit(MMU& mmu, u32 address)
{
return mmu.Read_U64(address);
}
void WriteU8FromJit64(MMU& mmu, u32 var, u32 address)
void WriteU8FromJit(MMU& mmu, u32 var, u32 address)
{
mmu.Write_U8(var, address);
}
void WriteU16FromJit64(MMU& mmu, u32 var, u32 address)
void WriteU16FromJit(MMU& mmu, u32 var, u32 address)
{
mmu.Write_U16(var, address);
}
void WriteU32FromJit64(MMU& mmu, u32 var, u32 address)
void WriteU32FromJit(MMU& mmu, u32 var, u32 address)
{
mmu.Write_U32(var, address);
}
void WriteU64FromJit64(MMU& mmu, u64 var, u32 address)
void WriteU64FromJit(MMU& mmu, u64 var, u32 address)
{
mmu.Write_U64(var, address);
}
void WriteU16SwapFromJit64(MMU& mmu, u32 var, u32 address)
void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address)
{
mmu.Write_U16_Swap(var, address);
}
void WriteU32SwapFromJit64(MMU& mmu, u32 var, u32 address)
void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address)
{
mmu.Write_U32_Swap(var, address);
}
void WriteU64SwapFromJit64(MMU& mmu, u64 var, u32 address)
{
mmu.Write_U64_Swap(var, address);
}
void ClearDCacheLineFromJitArm64(u32 address, MMU& mmu)
{
mmu.ClearDCacheLine(address);
}
u8 ReadU8FromJitArm64(u32 address, MMU& mmu)
{
return mmu.Read_U8(address);
}
u16 ReadU16FromJitArm64(u32 address, MMU& mmu)
{
return mmu.Read_U16(address);
}
u32 ReadU32FromJitArm64(u32 address, MMU& mmu)
{
return mmu.Read_U32(address);
}
u64 ReadU64FromJitArm64(u32 address, MMU& mmu)
{
return mmu.Read_U64(address);
}
void WriteU8FromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U8(var, address);
}
void WriteU16FromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U16(var, address);
}
void WriteU32FromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U32(var, address);
}
void WriteU64FromJitArm64(u64 var, u32 address, MMU& mmu)
{
mmu.Write_U64(var, address);
}
void WriteU16SwapFromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U16_Swap(var, address);
}
void WriteU32SwapFromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U32_Swap(var, address);
}
void WriteU64SwapFromJitArm64(u64 var, u32 address, MMU& mmu)
void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address)
{
mmu.Write_U64_Swap(var, address);
}

View File

@ -328,32 +328,16 @@ private:
BatTable m_dbat_table;
};
void ClearDCacheLineFromJit64(MMU& mmu, u32 address);
u32 ReadU8ZXFromJit64(MMU& mmu, u32 address); // Returns zero-extended 32bit value
u32 ReadU16ZXFromJit64(MMU& mmu, u32 address); // Returns zero-extended 32bit value
u32 ReadU32FromJit64(MMU& mmu, u32 address);
u64 ReadU64FromJit64(MMU& mmu, u32 address);
void WriteU8FromJit64(MMU& mmu, u32 var, u32 address);
void WriteU16FromJit64(MMU& mmu, u32 var, u32 address);
void WriteU32FromJit64(MMU& mmu, u32 var, u32 address);
void WriteU64FromJit64(MMU& mmu, u64 var, u32 address);
void WriteU16SwapFromJit64(MMU& mmu, u32 var, u32 address);
void WriteU32SwapFromJit64(MMU& mmu, u32 var, u32 address);
void WriteU64SwapFromJit64(MMU& mmu, u64 var, u32 address);
// The JitArm64 function that calls these has very specific register allocation that's difficult to
// change, so we have a separate set of functions here for it. This can probably be refactored in
// the future.
void ClearDCacheLineFromJitArm64(u32 address, MMU& mmu);
u8 ReadU8FromJitArm64(u32 address, MMU& mmu);
u16 ReadU16FromJitArm64(u32 address, MMU& mmu);
u32 ReadU32FromJitArm64(u32 address, MMU& mmu);
u64 ReadU64FromJitArm64(u32 address, MMU& mmu);
void WriteU8FromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU16FromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU32FromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU64FromJitArm64(u64 var, u32 address, MMU& mmu);
void WriteU16SwapFromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU32SwapFromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU64SwapFromJitArm64(u64 var, u32 address, MMU& mmu);
void ClearDCacheLineFromJit(MMU& mmu, u32 address);
u32 ReadU8FromJit(MMU& mmu, u32 address); // Returns zero-extended 32bit value
u32 ReadU16FromJit(MMU& mmu, u32 address); // Returns zero-extended 32bit value
u32 ReadU32FromJit(MMU& mmu, u32 address);
u64 ReadU64FromJit(MMU& mmu, u32 address);
void WriteU8FromJit(MMU& mmu, u32 var, u32 address);
void WriteU16FromJit(MMU& mmu, u32 var, u32 address);
void WriteU32FromJit(MMU& mmu, u32 var, u32 address);
void WriteU64FromJit(MMU& mmu, u64 var, u32 address);
void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address);
void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address);
void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address);
} // namespace PowerPC