diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 7a7461713e..83313c56d9 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -445,7 +445,7 @@ void Jit64::dcbz(UGeckoInstruction inst) MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); - ABI_CallFunctionPR(PowerPC::ClearDCacheLineFromJit64, &m_mmu, RSCRATCH); + ABI_CallFunctionPR(PowerPC::ClearDCacheLineFromJit, &m_mmu, RSCRATCH); ABI_PopRegistersAndAdjustStack(registersInUse, 0); if (emit_fast_path) diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 5e3e12f9dd..c18901eba3 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -400,16 +400,16 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, switch (accessSize) { case 64: - ABI_CallFunctionPR(PowerPC::ReadU64FromJit64, &m_jit.m_mmu, reg_addr); + ABI_CallFunctionPR(PowerPC::ReadU64FromJit, &m_jit.m_mmu, reg_addr); break; case 32: - ABI_CallFunctionPR(PowerPC::ReadU32FromJit64, &m_jit.m_mmu, reg_addr); + ABI_CallFunctionPR(PowerPC::ReadU32FromJit, &m_jit.m_mmu, reg_addr); break; case 16: - ABI_CallFunctionPR(PowerPC::ReadU16ZXFromJit64, &m_jit.m_mmu, reg_addr); + ABI_CallFunctionPR(PowerPC::ReadU16FromJit, &m_jit.m_mmu, reg_addr); break; case 8: - ABI_CallFunctionPR(PowerPC::ReadU8ZXFromJit64, &m_jit.m_mmu, reg_addr); + ABI_CallFunctionPR(PowerPC::ReadU8FromJit, &m_jit.m_mmu, reg_addr); break; } ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); @@ -464,16 +464,16 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc switch (accessSize) { case 64: - ABI_CallFunctionPC(PowerPC::ReadU64FromJit64, &m_jit.m_mmu, address); + ABI_CallFunctionPC(PowerPC::ReadU64FromJit, &m_jit.m_mmu, address); break; case 32: - ABI_CallFunctionPC(PowerPC::ReadU32FromJit64, &m_jit.m_mmu, address); + ABI_CallFunctionPC(PowerPC::ReadU32FromJit, &m_jit.m_mmu, address); break; case 16: - ABI_CallFunctionPC(PowerPC::ReadU16ZXFromJit64, &m_jit.m_mmu, address); + ABI_CallFunctionPC(PowerPC::ReadU16FromJit, &m_jit.m_mmu, address); break; case 8: - ABI_CallFunctionPC(PowerPC::ReadU8ZXFromJit64, &m_jit.m_mmu, address); + ABI_CallFunctionPC(PowerPC::ReadU8FromJit, &m_jit.m_mmu, address); break; } ABI_PopRegistersAndAdjustStack(registersInUse, 0); @@ -586,19 +586,19 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces switch (accessSize) { case 64: - ABI_CallFunctionPRR(swap ? PowerPC::WriteU64FromJit64 : PowerPC::WriteU64SwapFromJit64, + ABI_CallFunctionPRR(swap ? PowerPC::WriteU64FromJit : PowerPC::WriteU64SwapFromJit, &m_jit.m_mmu, reg, reg_addr); break; case 32: - ABI_CallFunctionPRR(swap ? PowerPC::WriteU32FromJit64 : PowerPC::WriteU32SwapFromJit64, + ABI_CallFunctionPRR(swap ? PowerPC::WriteU32FromJit : PowerPC::WriteU32SwapFromJit, &m_jit.m_mmu, reg, reg_addr); break; case 16: - ABI_CallFunctionPRR(swap ? PowerPC::WriteU16FromJit64 : PowerPC::WriteU16SwapFromJit64, + ABI_CallFunctionPRR(swap ? PowerPC::WriteU16FromJit : PowerPC::WriteU16SwapFromJit, &m_jit.m_mmu, reg, reg_addr); break; case 8: - ABI_CallFunctionPRR(PowerPC::WriteU8FromJit64, &m_jit.m_mmu, reg, reg_addr); + ABI_CallFunctionPRR(PowerPC::WriteU8FromJit, &m_jit.m_mmu, reg, reg_addr); break; } ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); @@ -668,16 +668,16 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, switch (accessSize) { case 64: - ABI_CallFunctionPAC(64, PowerPC::WriteU64FromJit64, &m_jit.m_mmu, arg, address); + ABI_CallFunctionPAC(64, PowerPC::WriteU64FromJit, &m_jit.m_mmu, arg, address); break; case 32: - ABI_CallFunctionPAC(32, PowerPC::WriteU32FromJit64, &m_jit.m_mmu, arg, address); + ABI_CallFunctionPAC(32, PowerPC::WriteU32FromJit, &m_jit.m_mmu, arg, address); break; case 16: - ABI_CallFunctionPAC(16, PowerPC::WriteU16FromJit64, &m_jit.m_mmu, arg, address); + ABI_CallFunctionPAC(16, PowerPC::WriteU16FromJit, &m_jit.m_mmu, arg, address); break; case 8: - ABI_CallFunctionPAC(8, PowerPC::WriteU8FromJit64, &m_jit.m_mmu, arg, address); + ABI_CallFunctionPAC(8, PowerPC::WriteU8FromJit, &m_jit.m_mmu, arg, address); break; } ABI_PopRegistersAndAdjustStack(registersInUse, 0); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index ed902d5c11..0096d56654 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -242,21 +242,23 @@ protected: // Registers used: // // addr scratch - // Store: X1 X0 - // Load: X0 - // Zero 256: X0 X30 - // Store float: X1 Q0 - // Load float: X0 + // Store: X2 X1 + // Load: X1 + // Zero 256: X1 X30 + // Store float: X2 Q0 + // Load float: X1 // // If mode == AlwaysFastAccess, the addr argument can be any register. // Otherwise it must be the register listed in the table above. // // Additional scratch registers are used in the following situations: // - // emitting_routine && mode == Auto: X2 + // emitting_routine && mode == Auto: X0 // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 // emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3 - // mode != AlwaysSlowAccess && !jo.fastmem: X2 + // mode != AlwaysSlowAccess && !jo.fastmem: X0 + // !emitting_routine && mode != AlwaysFastAccess && jo.memcheck && + // (flags & BackPatchInfo::FLAG_LOAD): X0 // !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30 // !emitting_routine && mode == Auto && jo.fastmem: X30 // diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 7180eff19c..b576935a90 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -79,7 +79,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30; memory_base = EncodeRegTo64(temp); - memory_offset = ARM64Reg::W2; + memory_offset = ARM64Reg::W0; LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT); LDR(memory_base, MEM_REG, ArithOption(temp, true)); @@ -95,8 +95,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, } else if (emit_slow_access && emitting_routine) { - const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3; - const ARM64Reg temp2 = ARM64Reg::W2; + const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W1 : ARM64Reg::W3; + const ARM64Reg temp2 = ARM64Reg::W0; slow_access_fixup = CheckIfSafeAddress(addr, temp1, temp2); } @@ -117,7 +117,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, } else if (flags & BackPatchInfo::FLAG_STORE) { - ARM64Reg temp = ARM64Reg::W0; + ARM64Reg temp = ARM64Reg::W1; temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true); if (flags & BackPatchInfo::FLAG_SIZE_32) @@ -169,7 +169,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (slow_access_fixup) SetJumpTarget(*slow_access_fixup); - const ARM64Reg temp_gpr = flags & BackPatchInfo::FLAG_LOAD ? ARM64Reg::W30 : ARM64Reg::W0; + const ARM64Reg temp_gpr = ARM64Reg::W1; const int temp_gpr_index = DecodeReg(temp_gpr); BitSet32 gprs_to_push_early = {}; @@ -181,8 +181,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, // If we're already pushing one register in the first PushRegisters call, we can push a // second one for free. Let's do so, since it might save one instruction in the second // PushRegisters call. (Do not do this for caller-saved registers which may be in the register - // cache, or else EmitMemcheck will not be able to flush the register cache correctly!) - if (gprs_to_push & gprs_to_push_early) + // cache, or WriteConditionalExceptionExit won't be able to flush the register cache correctly!) + if ((gprs_to_push & gprs_to_push_early).Count() & 1) gprs_to_push_early[30] = true; ABI_PushRegisters(gprs_to_push & gprs_to_push_early); @@ -203,7 +203,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (flags & BackPatchInfo::FLAG_STORE) { ARM64Reg src_reg = RS; - const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X0 : ARM64Reg::W0; + const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X1 : ARM64Reg::W1; if (flags & BackPatchInfo::FLAG_FLOAT) { @@ -226,41 +226,38 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (access_size == 64) { - ABI_CallFunction(reverse ? &PowerPC::WriteU64SwapFromJitArm64 : - &PowerPC::WriteU64FromJitArm64, - src_reg, ARM64Reg::W1, &m_mmu); + ABI_CallFunction(reverse ? &PowerPC::WriteU64SwapFromJit : &PowerPC::WriteU64FromJit, + &m_mmu, src_reg, ARM64Reg::W2); } else if (access_size == 32) { - ABI_CallFunction(reverse ? &PowerPC::WriteU32SwapFromJitArm64 : - &PowerPC::WriteU32FromJitArm64, - src_reg, ARM64Reg::W1, &m_mmu); + ABI_CallFunction(reverse ? &PowerPC::WriteU32SwapFromJit : &PowerPC::WriteU32FromJit, + &m_mmu, src_reg, ARM64Reg::W2); } else if (access_size == 16) { - ABI_CallFunction(reverse ? &PowerPC::WriteU16SwapFromJitArm64 : - &PowerPC::WriteU16FromJitArm64, - src_reg, ARM64Reg::W1, &m_mmu); + ABI_CallFunction(reverse ? &PowerPC::WriteU16SwapFromJit : &PowerPC::WriteU16FromJit, + &m_mmu, src_reg, ARM64Reg::W2); } else { - ABI_CallFunction(&PowerPC::WriteU8FromJitArm64, src_reg, ARM64Reg::W1, &m_mmu); + ABI_CallFunction(&PowerPC::WriteU8FromJit, &m_mmu, src_reg, ARM64Reg::W2); } } else if (flags & BackPatchInfo::FLAG_ZERO_256) { - ABI_CallFunction(&PowerPC::ClearDCacheLineFromJitArm64, ARM64Reg::W0, &m_mmu); + ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, ARM64Reg::W1); } else { if (access_size == 64) - ABI_CallFunction(&PowerPC::ReadU64FromJitArm64, ARM64Reg::W0, &m_mmu); + ABI_CallFunction(&PowerPC::ReadU64FromJit, &m_mmu, ARM64Reg::W1); else if (access_size == 32) - ABI_CallFunction(&PowerPC::ReadU32FromJitArm64, ARM64Reg::W0, &m_mmu); + ABI_CallFunction(&PowerPC::ReadU32FromJit, &m_mmu, ARM64Reg::W1); else if (access_size == 16) - ABI_CallFunction(&PowerPC::ReadU16FromJitArm64, ARM64Reg::W0, &m_mmu); + ABI_CallFunction(&PowerPC::ReadU16FromJit, &m_mmu, ARM64Reg::W1); else - ABI_CallFunction(&PowerPC::ReadU8FromJitArm64, ARM64Reg::W0, &m_mmu); + ABI_CallFunction(&PowerPC::ReadU8FromJit, &m_mmu, ARM64Reg::W1); } m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 75c6fe884b..483f10f088 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -27,9 +27,9 @@ using namespace Arm64Gen; void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update) { // We want to make sure to not get LR as a temp register - gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Lock(ARM64Reg::W2); + gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); + if (jo.memcheck || !jo.fastmem) + gpr.Lock(ARM64Reg::W0); gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false); ARM64Reg dest_reg = gpr.R(dest); @@ -42,7 +42,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o if (offsetReg != -1 && !gpr.IsImm(offsetReg)) off_reg = gpr.R(offsetReg); - ARM64Reg addr_reg = ARM64Reg::W0; + ARM64Reg addr_reg = ARM64Reg::W1; u32 imm_addr = 0; bool is_immediate = false; @@ -123,9 +123,9 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); if (!update || early_update) + regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; + if (jo.memcheck || !jo.fastmem) regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; - if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; if (!jo.memcheck) regs_in_use[DecodeReg(dest_reg)] = 0; @@ -142,7 +142,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o } else if (mmio_address) { - regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; regs_in_use[DecodeReg(ARM64Reg::W30)] = 0; regs_in_use[DecodeReg(dest_reg)] = 0; MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, regs_in_use, @@ -165,18 +165,18 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o MOV(gpr.R(addr), addr_reg); } - gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W2); + gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); + if (jo.memcheck || !jo.fastmem) + gpr.Unlock(ARM64Reg::W0); } void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, bool update) { // We want to make sure to not get LR as a temp register - gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); if (!jo.fastmem) - gpr.Lock(ARM64Reg::W2); + gpr.Lock(ARM64Reg::W0); ARM64Reg RS = gpr.R(value); @@ -188,7 +188,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s if (dest != -1 && !gpr.IsImm(dest)) reg_dest = gpr.R(dest); - ARM64Reg addr_reg = ARM64Reg::W1; + ARM64Reg addr_reg = ARM64Reg::W2; u32 imm_addr = 0; bool is_immediate = false; @@ -269,11 +269,11 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; - if (!jo.fastmem) regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; + if (!jo.fastmem) + regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 mmio_address = 0; @@ -290,19 +290,19 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s else accessSize = 8; - LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); + LDR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); ARM64Reg temp = ARM64Reg::W1; temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true); if (accessSize == 32) - STR(IndexType::Post, temp, ARM64Reg::X0, 4); + STR(IndexType::Post, temp, ARM64Reg::X2, 4); else if (accessSize == 16) - STRH(IndexType::Post, temp, ARM64Reg::X0, 2); + STRH(IndexType::Post, temp, ARM64Reg::X2, 2); else - STRB(IndexType::Post, temp, ARM64Reg::X0, 1); + STRB(IndexType::Post, temp, ARM64Reg::X2, 1); - STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); + STR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); js.fifoBytesSinceCheck += accessSize >> 3; } @@ -313,8 +313,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s } else if (mmio_address) { - regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; regs_in_use[DecodeReg(ARM64Reg::W30)] = 0; regs_in_use[DecodeReg(RS)] = 0; MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, @@ -334,9 +334,9 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s MOV(gpr.R(dest), addr_reg); } - gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W2); + gpr.Unlock(ARM64Reg::W0); } FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp, @@ -518,13 +518,13 @@ void JitArm64::lmw(UGeckoInstruction inst) u32 a = inst.RA, d = inst.RD; s32 offset = inst.SIMM_16; - gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Lock(ARM64Reg::W2); + gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); + if (jo.memcheck || !jo.fastmem) + gpr.Lock(ARM64Reg::W0); // MMU games make use of a >= d despite this being invalid according to the PEM. // If a >= d occurs, we must make sure to not re-read rA after starting doing the loads. - ARM64Reg addr_reg = ARM64Reg::W0; + ARM64Reg addr_reg = ARM64Reg::W1; bool a_is_addr_base_reg = false; if (!a) MOVI2R(addr_reg, offset); @@ -554,8 +554,8 @@ void JitArm64::lmw(UGeckoInstruction inst) BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); regs_in_use[DecodeReg(addr_reg)] = 0; - if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; + if (jo.memcheck || !jo.fastmem) + regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; if (!jo.memcheck) regs_in_use[DecodeReg(dest_reg)] = 0; @@ -566,9 +566,9 @@ void JitArm64::lmw(UGeckoInstruction inst) ASSERT(dest_reg == gpr.R(i)); } - gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W2); + gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); + if (jo.memcheck || !jo.fastmem) + gpr.Unlock(ARM64Reg::W0); if (!a_is_addr_base_reg) gpr.Unlock(addr_base_reg); } @@ -581,11 +581,11 @@ void JitArm64::stmw(UGeckoInstruction inst) u32 a = inst.RA, s = inst.RS; s32 offset = inst.SIMM_16; - gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); if (!jo.fastmem) - gpr.Lock(ARM64Reg::W2); + gpr.Lock(ARM64Reg::W0); - ARM64Reg addr_reg = ARM64Reg::W1; + ARM64Reg addr_reg = ARM64Reg::W2; bool a_is_addr_base_reg = false; if (!a) MOVI2R(addr_reg, offset); @@ -613,18 +613,18 @@ void JitArm64::stmw(UGeckoInstruction inst) BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; regs_in_use[DecodeReg(addr_reg)] = 0; if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; + regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use, fprs_in_use); } - gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W2); + gpr.Unlock(ARM64Reg::W0); if (!a_is_addr_base_reg) gpr.Unlock(addr_base_reg); } @@ -821,17 +821,17 @@ void JitArm64::dcbz(UGeckoInstruction inst) int a = inst.RA, b = inst.RB; - gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); + gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); if (!jo.fastmem) - gpr.Lock(ARM64Reg::W2); + gpr.Lock(ARM64Reg::W0); Common::ScopeGuard register_guard([&] { - gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); + gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W2); + gpr.Unlock(ARM64Reg::W0); }); - constexpr ARM64Reg addr_reg = ARM64Reg::W0; + constexpr ARM64Reg addr_reg = ARM64Reg::W1; constexpr ARM64Reg temp_reg = ARM64Reg::W30; // HACK: Don't clear any memory in the [0x8000'0000, 0x8000'8000) region. @@ -895,11 +895,11 @@ void JitArm64::dcbz(UGeckoInstruction inst) BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); - gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0; + gprs_to_push[DecodeReg(ARM64Reg::W1)] = 0; if (!jo.fastmem) - gprs_to_push[DecodeReg(ARM64Reg::W2)] = 0; + gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0; - EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0, + EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1, EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); if (using_dcbz_hack) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index a0fa812671..4fe8ca4cb3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -77,13 +77,13 @@ void JitArm64::lfXX(UGeckoInstruction inst) const RegType type = (flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle; - gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); + gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); fpr.Lock(ARM64Reg::Q0); - if (!jo.fastmem) - gpr.Lock(ARM64Reg::W2); + if (jo.memcheck || !jo.fastmem) + gpr.Lock(ARM64Reg::W0); const ARM64Reg VD = fpr.RW(inst.FD, type, false); - ARM64Reg addr_reg = ARM64Reg::W0; + ARM64Reg addr_reg = ARM64Reg::W1; if (update) { @@ -167,9 +167,9 @@ void JitArm64::lfXX(UGeckoInstruction inst) BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); if (!update || early_update) + regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; + if (jo.memcheck || !jo.fastmem) regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; - if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; if (!jo.memcheck) fprs_in_use[DecodeReg(VD)] = 0; @@ -192,10 +192,10 @@ void JitArm64::lfXX(UGeckoInstruction inst) MOV(gpr.R(a), addr_reg); } - gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); + gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); - if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W2); + if (jo.memcheck || !jo.fastmem) + gpr.Unlock(ARM64Reg::W0); } void JitArm64::stfXX(UGeckoInstruction inst) @@ -278,11 +278,11 @@ void JitArm64::stfXX(UGeckoInstruction inst) V0 = single_reg; } - gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); if (!jo.fastmem) - gpr.Lock(ARM64Reg::W2); + gpr.Lock(ARM64Reg::W0); - ARM64Reg addr_reg = ARM64Reg::W1; + ARM64Reg addr_reg = ARM64Reg::W2; if (update) { @@ -369,11 +369,11 @@ void JitArm64::stfXX(UGeckoInstruction inst) BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; - if (!jo.fastmem) regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; + if (!jo.fastmem) + regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; if (is_immediate) @@ -386,7 +386,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) else accessSize = 32; - LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); + LDR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); if (flags & BackPatchInfo::FLAG_SIZE_64) m_float_emit.REV64(8, ARM64Reg::Q0, V0); @@ -394,9 +394,9 @@ void JitArm64::stfXX(UGeckoInstruction inst) m_float_emit.REV32(8, ARM64Reg::D0, V0); m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0, - ARM64Reg::X0, accessSize >> 3); + ARM64Reg::X2, accessSize >> 3); - STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); + STR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); js.fifoBytesSinceCheck += accessSize >> 3; } else if (m_mmu.IsOptimizableRAMAddress(imm_addr)) @@ -428,8 +428,8 @@ void JitArm64::stfXX(UGeckoInstruction inst) if (want_single && !have_single) fpr.Unlock(V0); - gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W2); + gpr.Unlock(ARM64Reg::W0); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 00810502a5..725826dd23 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -37,21 +37,21 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) const int i = indexed ? inst.Ix : inst.I; const int w = indexed ? inst.Wx : inst.W; - gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); + gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); fpr.Lock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) { - gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3); + gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3); fpr.Lock(ARM64Reg::Q1); } - else if (!jo.fastmem) + else if (jo.memcheck || !jo.fastmem) { - gpr.Lock(ARM64Reg::W2); + gpr.Lock(ARM64Reg::W0); } - constexpr ARM64Reg addr_reg = ARM64Reg::W0; - constexpr ARM64Reg scale_reg = ARM64Reg::W1; - constexpr ARM64Reg type_reg = ARM64Reg::W2; + constexpr ARM64Reg type_reg = ARM64Reg::W0; + constexpr ARM64Reg addr_reg = ARM64Reg::W1; + constexpr ARM64Reg scale_reg = ARM64Reg::W2; ARM64Reg VS = fpr.RW(inst.RS, RegType::Single, false); if (inst.RA || update) // Always uses the register on update @@ -85,9 +85,9 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) // Wipe the registers we are using as temporaries if (!update || early_update) + gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; + if (jo.memcheck || !jo.fastmem) gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; - if (!jo.fastmem) - gprs_in_use[DecodeReg(ARM64Reg::W2)] = false; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; if (!jo.memcheck) fprs_in_use[DecodeReg(VS)] = 0; @@ -134,16 +134,16 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) MOV(gpr.R(inst.RA), addr_reg); } - gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); + gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) { - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3); + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3); fpr.Unlock(ARM64Reg::Q1); } - else if (!jo.fastmem) + else if (jo.memcheck || !jo.fastmem) { - gpr.Unlock(ARM64Reg::W2); + gpr.Unlock(ARM64Reg::W0); } } @@ -203,15 +203,15 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) } } - gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); - if (!js.assumeNoPairedQuantize || !jo.fastmem) - gpr.Lock(ARM64Reg::W2); + gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); + if (!js.assumeNoPairedQuantize || jo.memcheck || !jo.fastmem) + gpr.Lock(ARM64Reg::W0); if (!js.assumeNoPairedQuantize && !jo.fastmem) gpr.Lock(ARM64Reg::W3); - constexpr ARM64Reg scale_reg = ARM64Reg::W0; - constexpr ARM64Reg addr_reg = ARM64Reg::W1; - constexpr ARM64Reg type_reg = ARM64Reg::W2; + constexpr ARM64Reg type_reg = ARM64Reg::W0; + constexpr ARM64Reg scale_reg = ARM64Reg::W1; + constexpr ARM64Reg addr_reg = ARM64Reg::W2; if (inst.RA || update) // Always uses the register on update { @@ -243,11 +243,11 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); // Wipe the registers we are using as temporaries - gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; + gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; if (!update || early_update) - gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; - if (!jo.fastmem) gprs_in_use[DecodeReg(ARM64Reg::W2)] = false; + if (!jo.fastmem) + gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; if (!w) @@ -283,10 +283,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) if (js.assumeNoPairedQuantize && !have_single) fpr.Unlock(VS); - gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); - if (!js.assumeNoPairedQuantize || !jo.fastmem) - gpr.Unlock(ARM64Reg::W2); + if (!js.assumeNoPairedQuantize || jo.memcheck || !jo.fastmem) + gpr.Unlock(ARM64Reg::W0); if (!js.assumeNoPairedQuantize && !jo.fastmem) gpr.Unlock(ARM64Reg::W3); if (!js.assumeNoPairedQuantize) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index cb08714a87..cc3f7cc298 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -502,18 +502,19 @@ void JitArm64::GenerateFPRF(bool single) void JitArm64::GenerateQuantizedLoads() { - // X0 is the address - // X1 is the scale - // X2 is a temporary + // X0 is a temporary + // X1 is the address + // X2 is the scale // X3 is a temporary (used in EmitBackpatchRoutine) // X30 is LR // Q0 is the return // Q1 is a temporary - ARM64Reg addr_reg = ARM64Reg::X0; - ARM64Reg scale_reg = ARM64Reg::X1; - BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{2, 3}; + ARM64Reg temp_reg = ARM64Reg::X0; + ARM64Reg addr_reg = ARM64Reg::X1; + ARM64Reg scale_reg = ARM64Reg::X2; + BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 3}; if (!jo.memcheck) - gprs_to_push &= ~BitSet32{0}; + gprs_to_push &= ~BitSet32{1}; BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; ARM64FloatEmitter float_emit(this); @@ -526,7 +527,7 @@ void JitArm64::GenerateQuantizedLoads() BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, - gprs_to_push & ~BitSet32{1}, fprs_to_push, true); + gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true); RET(ARM64Reg::X30); } @@ -542,8 +543,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(ARM64Reg::X0, &m_dequantizeTableS); + ADD(scale_reg, ARM64Reg::X0, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -560,8 +561,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -577,8 +578,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -594,8 +595,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -607,7 +608,7 @@ void JitArm64::GenerateQuantizedLoads() BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, - gprs_to_push & ~BitSet32{1}, fprs_to_push, true); + gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true); RET(ARM64Reg::X30); } @@ -623,8 +624,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -641,8 +642,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -658,8 +659,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -675,8 +676,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -711,18 +712,19 @@ void JitArm64::GenerateQuantizedLoads() void JitArm64::GenerateQuantizedStores() { - // X0 is the scale - // X1 is the address - // X2 is a temporary + // X0 is a temporary + // X1 is the scale + // X2 is the address // X3 is a temporary if jo.fastmem is false (used in EmitBackpatchRoutine) // X30 is LR // Q0 is the register // Q1 is a temporary - ARM64Reg scale_reg = ARM64Reg::X0; - ARM64Reg addr_reg = ARM64Reg::X1; - BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2}; + ARM64Reg temp_reg = ARM64Reg::X0; + ARM64Reg scale_reg = ARM64Reg::X1; + ARM64Reg addr_reg = ARM64Reg::X2; + BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1}; if (!jo.memcheck) - gprs_to_push &= ~BitSet32{1}; + gprs_to_push &= ~BitSet32{2}; if (!jo.fastmem) gprs_to_push &= ~BitSet32{3}; BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; @@ -743,8 +745,8 @@ void JitArm64::GenerateQuantizedStores() } const u8* storePairedU8 = GetCodePtr(); { - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); @@ -762,8 +764,8 @@ void JitArm64::GenerateQuantizedStores() } const u8* storePairedS8 = GetCodePtr(); { - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); @@ -781,8 +783,8 @@ void JitArm64::GenerateQuantizedStores() } const u8* storePairedU16 = GetCodePtr(); { - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); @@ -799,8 +801,8 @@ void JitArm64::GenerateQuantizedStores() } const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie { - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); @@ -828,8 +830,8 @@ void JitArm64::GenerateQuantizedStores() } const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii { - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); @@ -847,8 +849,8 @@ void JitArm64::GenerateQuantizedStores() } const u8* storeSingleS8 = GetCodePtr(); { - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); @@ -866,8 +868,8 @@ void JitArm64::GenerateQuantizedStores() } const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii { - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); @@ -884,8 +886,8 @@ void JitArm64::GenerateQuantizedStores() } const u8* storeSingleS16 = GetCodePtr(); { - const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); - ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS); + ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 735bcae5c0..aa305fe0ab 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1681,100 +1681,51 @@ std::optional MMU::GetTranslatedAddress(u32 address) return std::optional(result.address); } -void ClearDCacheLineFromJit64(MMU& mmu, u32 address) +void ClearDCacheLineFromJit(MMU& mmu, u32 address) { mmu.ClearDCacheLine(address); } -u32 ReadU8ZXFromJit64(MMU& mmu, u32 address) +u32 ReadU8FromJit(MMU& mmu, u32 address) { return mmu.Read_U8(address); } -u32 ReadU16ZXFromJit64(MMU& mmu, u32 address) +u32 ReadU16FromJit(MMU& mmu, u32 address) { return mmu.Read_U16(address); } -u32 ReadU32FromJit64(MMU& mmu, u32 address) +u32 ReadU32FromJit(MMU& mmu, u32 address) { return mmu.Read_U32(address); } -u64 ReadU64FromJit64(MMU& mmu, u32 address) +u64 ReadU64FromJit(MMU& mmu, u32 address) { return mmu.Read_U64(address); } -void WriteU8FromJit64(MMU& mmu, u32 var, u32 address) +void WriteU8FromJit(MMU& mmu, u32 var, u32 address) { mmu.Write_U8(var, address); } -void WriteU16FromJit64(MMU& mmu, u32 var, u32 address) +void WriteU16FromJit(MMU& mmu, u32 var, u32 address) { mmu.Write_U16(var, address); } -void WriteU32FromJit64(MMU& mmu, u32 var, u32 address) +void WriteU32FromJit(MMU& mmu, u32 var, u32 address) { mmu.Write_U32(var, address); } -void WriteU64FromJit64(MMU& mmu, u64 var, u32 address) +void WriteU64FromJit(MMU& mmu, u64 var, u32 address) { mmu.Write_U64(var, address); } -void WriteU16SwapFromJit64(MMU& mmu, u32 var, u32 address) +void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address) { mmu.Write_U16_Swap(var, address); } -void WriteU32SwapFromJit64(MMU& mmu, u32 var, u32 address) +void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address) { mmu.Write_U32_Swap(var, address); } -void WriteU64SwapFromJit64(MMU& mmu, u64 var, u32 address) -{ - mmu.Write_U64_Swap(var, address); -} - -void ClearDCacheLineFromJitArm64(u32 address, MMU& mmu) -{ - mmu.ClearDCacheLine(address); -} -u8 ReadU8FromJitArm64(u32 address, MMU& mmu) -{ - return mmu.Read_U8(address); -} -u16 ReadU16FromJitArm64(u32 address, MMU& mmu) -{ - return mmu.Read_U16(address); -} -u32 ReadU32FromJitArm64(u32 address, MMU& mmu) -{ - return mmu.Read_U32(address); -} -u64 ReadU64FromJitArm64(u32 address, MMU& mmu) -{ - return mmu.Read_U64(address); -} -void WriteU8FromJitArm64(u32 var, u32 address, MMU& mmu) -{ - mmu.Write_U8(var, address); -} -void WriteU16FromJitArm64(u32 var, u32 address, MMU& mmu) -{ - mmu.Write_U16(var, address); -} -void WriteU32FromJitArm64(u32 var, u32 address, MMU& mmu) -{ - mmu.Write_U32(var, address); -} -void WriteU64FromJitArm64(u64 var, u32 address, MMU& mmu) -{ - mmu.Write_U64(var, address); -} -void WriteU16SwapFromJitArm64(u32 var, u32 address, MMU& mmu) -{ - mmu.Write_U16_Swap(var, address); -} -void WriteU32SwapFromJitArm64(u32 var, u32 address, MMU& mmu) -{ - mmu.Write_U32_Swap(var, address); -} -void WriteU64SwapFromJitArm64(u64 var, u32 address, MMU& mmu) +void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address) { mmu.Write_U64_Swap(var, address); } diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 96005041a1..3147802cbb 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -328,32 +328,16 @@ private: BatTable m_dbat_table; }; -void ClearDCacheLineFromJit64(MMU& mmu, u32 address); -u32 ReadU8ZXFromJit64(MMU& mmu, u32 address); // Returns zero-extended 32bit value -u32 ReadU16ZXFromJit64(MMU& mmu, u32 address); // Returns zero-extended 32bit value -u32 ReadU32FromJit64(MMU& mmu, u32 address); -u64 ReadU64FromJit64(MMU& mmu, u32 address); -void WriteU8FromJit64(MMU& mmu, u32 var, u32 address); -void WriteU16FromJit64(MMU& mmu, u32 var, u32 address); -void WriteU32FromJit64(MMU& mmu, u32 var, u32 address); -void WriteU64FromJit64(MMU& mmu, u64 var, u32 address); -void WriteU16SwapFromJit64(MMU& mmu, u32 var, u32 address); -void WriteU32SwapFromJit64(MMU& mmu, u32 var, u32 address); -void WriteU64SwapFromJit64(MMU& mmu, u64 var, u32 address); - -// The JitArm64 function that calls these has very specific register allocation that's difficult to -// change, so we have a separate set of functions here for it. This can probably be refactored in -// the future. -void ClearDCacheLineFromJitArm64(u32 address, MMU& mmu); -u8 ReadU8FromJitArm64(u32 address, MMU& mmu); -u16 ReadU16FromJitArm64(u32 address, MMU& mmu); -u32 ReadU32FromJitArm64(u32 address, MMU& mmu); -u64 ReadU64FromJitArm64(u32 address, MMU& mmu); -void WriteU8FromJitArm64(u32 var, u32 address, MMU& mmu); -void WriteU16FromJitArm64(u32 var, u32 address, MMU& mmu); -void WriteU32FromJitArm64(u32 var, u32 address, MMU& mmu); -void WriteU64FromJitArm64(u64 var, u32 address, MMU& mmu); -void WriteU16SwapFromJitArm64(u32 var, u32 address, MMU& mmu); -void WriteU32SwapFromJitArm64(u32 var, u32 address, MMU& mmu); -void WriteU64SwapFromJitArm64(u64 var, u32 address, MMU& mmu); +void ClearDCacheLineFromJit(MMU& mmu, u32 address); +u32 ReadU8FromJit(MMU& mmu, u32 address); // Returns zero-extended 32bit value +u32 ReadU16FromJit(MMU& mmu, u32 address); // Returns zero-extended 32bit value +u32 ReadU32FromJit(MMU& mmu, u32 address); +u64 ReadU64FromJit(MMU& mmu, u32 address); +void WriteU8FromJit(MMU& mmu, u32 var, u32 address); +void WriteU16FromJit(MMU& mmu, u32 var, u32 address); +void WriteU32FromJit(MMU& mmu, u32 var, u32 address); +void WriteU64FromJit(MMU& mmu, u64 var, u32 address); +void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address); +void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address); +void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address); } // namespace PowerPC