From 8c905e152adba4f446375ce05daeb8b901dcc361 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Mon, 28 Jun 2021 18:23:57 +0200 Subject: [PATCH 01/13] JitArm64: Make WriteConditionalExceptionExit more flexible You can now specify an already allocated register for it to use as a temporary register, and it also supports being called while in farcode. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 30 ++++++++++++++++------- Source/Core/Core/PowerPC/JitArm64/Jit.h | 1 + 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index bbba42c90f..3bd330040f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -501,21 +501,33 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external, bool always void JitArm64::WriteConditionalExceptionExit(int exception) { ARM64Reg WA = gpr.GetReg(); - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); - FixupBranch noException = TBZ(WA, IntLog2(exception)); + WriteConditionalExceptionExit(exception, WA); + gpr.Unlock(WA); +} - FixupBranch handleException = B(); - SwitchToFarCode(); - SetJumpTarget(handleException); +void JitArm64::WriteConditionalExceptionExit(int exception, ARM64Reg temp_reg) +{ + LDR(IndexType::Unsigned, temp_reg, PPC_REG, PPCSTATE_OFF(Exceptions)); + FixupBranch no_exception = TBZ(temp_reg, IntLog2(exception)); - gpr.Flush(FlushMode::MaintainState, WA); + const bool switch_to_far_code = !IsInFarCode(); + + if (switch_to_far_code) + { + FixupBranch handle_exception = B(); + SwitchToFarCode(); + SetJumpTarget(handle_exception); + } + + gpr.Flush(FlushMode::MaintainState, temp_reg); fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); WriteExceptionExit(js.compilerPC, false, true); - SwitchToNearCode(); - SetJumpTarget(noException); - gpr.Unlock(WA); + if (switch_to_far_code) + SwitchToNearCode(); + + SetJumpTarget(no_exception); } bool JitArm64::HandleFunctionHooking(u32 address) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 8d9b140dd0..2d63743300 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -266,6 +266,7 @@ protected: void WriteExceptionExit(Arm64Gen::ARM64Reg dest, bool only_external = false, bool always_exception = false); void WriteConditionalExceptionExit(int exception); + void WriteConditionalExceptionExit(int exception, Arm64Gen::ARM64Reg temp_reg); void FakeLKExit(u32 exit_address_after_return); void WriteBLRExit(Arm64Gen::ARM64Reg dest); From ab1ceee16f029572d0856ec5f233b83dba921cc1 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Mon, 28 Jun 2021 18:42:08 +0200 Subject: [PATCH 02/13] JitArm64: Implement memcheck for lXX/stX without update --- Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp | 6 ++++++ Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index db1f18759a..8dfcadcc09 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -15,6 +15,7 @@ #include "Common/Swap.h" #include "Core/HW/Memmap.h" +#include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/Jit_Util.h" #include "Core/PowerPC/JitArmCommon/BackPatch.h" @@ -120,6 +121,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR if (!fastmem || do_farcode) { + const bool memcheck = jo.memcheck && !emitting_routine; + if (fastmem && do_farcode) { in_far_code = true; @@ -223,6 +226,9 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30); ABI_PopRegisters(gprs_to_push); + + if (memcheck) + WriteConditionalExceptionExit(EXCEPTION_DSI, ARM64Reg::W0); } if (in_far_code) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 6fb5cad056..8398d1b677 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -306,7 +306,6 @@ void JitArm64::lXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck); u32 a = inst.RA, b = inst.RB, d = inst.RD; s32 offset = inst.SIMM_16; @@ -378,6 +377,8 @@ void JitArm64::lXX(UGeckoInstruction inst) break; } + FALLBACK_IF(jo.memcheck && update); + SafeLoadToReg(d, update ? a : (a ? a : -1), offsetReg, flags, offset, update); } @@ -385,7 +386,6 @@ void JitArm64::stX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck); u32 a = inst.RA, b = inst.RB, s = inst.RS; s32 offset = inst.SIMM_16; @@ -444,6 +444,8 @@ void JitArm64::stX(UGeckoInstruction inst) break; } + FALLBACK_IF(jo.memcheck && update); + SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, flags, offset); if (update) From 96190887ce8cf523d45b9cfb0eb17a15e801f218 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Thu, 1 Jul 2021 19:51:55 +0200 Subject: [PATCH 03/13] JitArm64: Don't update dest reg when load triggers exception Fixes a problem introduced in the previous commit. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 10 ++-- Source/Core/Core/PowerPC/JitArm64/Jit.h | 5 +- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 47 ++++++++++++++----- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 3 +- 4 files changed, 47 insertions(+), 18 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 3bd330040f..f494f128f6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -498,14 +498,15 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external, bool always B(dispatcher); } -void JitArm64::WriteConditionalExceptionExit(int exception) +void JitArm64::WriteConditionalExceptionExit(int exception, u64 increment_sp_on_exit) { ARM64Reg WA = gpr.GetReg(); - WriteConditionalExceptionExit(exception, WA); + WriteConditionalExceptionExit(exception, WA, increment_sp_on_exit); gpr.Unlock(WA); } -void JitArm64::WriteConditionalExceptionExit(int exception, ARM64Reg temp_reg) +void JitArm64::WriteConditionalExceptionExit(int exception, ARM64Reg temp_reg, + u64 increment_sp_on_exit) { LDR(IndexType::Unsigned, temp_reg, PPC_REG, PPCSTATE_OFF(Exceptions)); FixupBranch no_exception = TBZ(temp_reg, IntLog2(exception)); @@ -519,6 +520,9 @@ void JitArm64::WriteConditionalExceptionExit(int exception, ARM64Reg temp_reg) SetJumpTarget(handle_exception); } + if (increment_sp_on_exit != 0) + ADDI2R(ARM64Reg::SP, ARM64Reg::SP, increment_sp_on_exit, temp_reg); + gpr.Flush(FlushMode::MaintainState, temp_reg); fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 2d63743300..a1368066cd 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -265,8 +265,9 @@ protected: bool always_exception = false); void WriteExceptionExit(Arm64Gen::ARM64Reg dest, bool only_external = false, bool always_exception = false); - void WriteConditionalExceptionExit(int exception); - void WriteConditionalExceptionExit(int exception, Arm64Gen::ARM64Reg temp_reg); + void WriteConditionalExceptionExit(int exception, u64 increment_sp_on_exit = 0); + void WriteConditionalExceptionExit(int exception, Arm64Gen::ARM64Reg temp_reg, + u64 increment_sp_on_exit = 0); void FakeLKExit(u32 exit_address_after_return); void WriteBLRExit(Arm64Gen::ARM64Reg dest); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 8dfcadcc09..4923e713cb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -57,6 +57,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR ARM64Reg addr, BitSet32 gprs_to_push, BitSet32 fprs_to_push, bool emitting_routine) { + const u32 access_size = BackPatchInfo::GetFlagSize(flags); + bool in_far_code = false; const u8* fastmem_start = GetCodePtr(); std::optional slowmem_fixup; @@ -76,11 +78,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR ARM64Reg temp = ARM64Reg::D0; temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true); - m_float_emit.STR(BackPatchInfo::GetFlagSize(flags), temp, MEM_REG, addr); + m_float_emit.STR(access_size, temp, MEM_REG, addr); } else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT)) { - m_float_emit.LDR(BackPatchInfo::GetFlagSize(flags), EncodeRegToDouble(RS), MEM_REG, addr); + m_float_emit.LDR(access_size, EncodeRegToDouble(RS), MEM_REG, addr); ByteswapAfterLoad(this, &m_float_emit, EncodeRegToDouble(RS), EncodeRegToDouble(RS), flags, true, false); @@ -139,12 +141,22 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR if (slowmem_fixup) SetJumpTarget(*slowmem_fixup); - ABI_PushRegisters(gprs_to_push); + const ARM64Reg temp_reg = flags & BackPatchInfo::FLAG_LOAD ? ARM64Reg::W30 : ARM64Reg::W0; + const int temp_reg_index = DecodeReg(temp_reg); + + if (memcheck && (flags & BackPatchInfo::FLAG_LOAD)) + { + ABI_PushRegisters(gprs_to_push & BitSet32{temp_reg_index}); + ABI_PushRegisters(gprs_to_push & ~BitSet32{temp_reg_index}); + } + else + { + ABI_PushRegisters(gprs_to_push); + } m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30); if (flags & BackPatchInfo::FLAG_STORE) { - const u32 access_size = BackPatchInfo::GetFlagSize(flags); ARM64Reg src_reg = RS; const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X0 : ARM64Reg::W0; @@ -188,8 +200,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR } else { - const u32 access_size = BackPatchInfo::GetFlagSize(flags); - if (access_size == 64) MOVP2R(ARM64Reg::X8, &PowerPC::Read_U64); else if (access_size == 32) @@ -200,8 +210,24 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR MOVP2R(ARM64Reg::X8, &PowerPC::Read_U8); BLR(ARM64Reg::X8); + } + m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30); + if (memcheck && (flags & BackPatchInfo::FLAG_LOAD)) + ABI_PopRegisters(gprs_to_push & ~BitSet32{temp_reg_index}); + else + ABI_PopRegisters(gprs_to_push); + + if (memcheck) + { + const u64 early_push_size = flags & BackPatchInfo::FLAG_LOAD ? 16 : 0; + WriteConditionalExceptionExit(EXCEPTION_DSI, temp_reg, early_push_size); + } + + if (flags & BackPatchInfo::FLAG_LOAD) + { ARM64Reg src_reg = access_size == 64 ? ARM64Reg::X0 : ARM64Reg::W0; + ASSERT(!gprs_to_push[DecodeReg(src_reg)]); if (flags & BackPatchInfo::FLAG_PAIR) { @@ -222,13 +248,10 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR } ByteswapAfterLoad(this, &m_float_emit, RS, src_reg, flags, false, false); + + if (memcheck) + ABI_PopRegisters(gprs_to_push & BitSet32{temp_reg_index}); } - - m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30); - ABI_PopRegisters(gprs_to_push); - - if (memcheck) - WriteConditionalExceptionExit(EXCEPTION_DSI, ARM64Reg::W0); } if (in_far_code) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 8398d1b677..38e693ca07 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -113,7 +113,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; - regs_in_use[DecodeReg(dest_reg)] = 0; + if (!jo.memcheck) + regs_in_use[DecodeReg(dest_reg)] = 0; u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 mmio_address = 0; From e316d0e94feacf89a22bc1f841e76c8945326f4d Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 11 Jul 2021 00:00:03 +0200 Subject: [PATCH 04/13] JitArm64: Don't update dest reg when load triggers exception, part 2 If a host register has been newly allocated for the destination guest register, and the load triggers an exception, we must make sure to not write the old value in the host register into ppcState. This commit achieves this by not marking the register as dirty until after the load is done. --- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 5 ++- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 41 +++++++++++-------- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 14 +++++-- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 38e693ca07..685d96c35a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -26,7 +26,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o // We want to make sure to not get LR as a temp register gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); - gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg); + gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false); ARM64Reg dest_reg = gpr.R(dest); ARM64Reg up_reg = ARM64Reg::INVALID_REG; ARM64Reg off_reg = ARM64Reg::INVALID_REG; @@ -135,6 +135,9 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, XA, regs_in_use, fprs_in_use); } + gpr.BindToRegister(dest, false, true); + ASSERT(dest_reg == gpr.R(dest)); + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 45249d92ed..24aae47c28 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -343,25 +343,29 @@ void Arm64GPRCache::SetImmediate(const GuestRegInfo& guest_reg, u32 imm) reg.LoadToImm(imm); } -void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool do_load) +void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool do_load, bool set_dirty) { OpArg& reg = guest_reg.reg; const size_t bitsize = guest_reg.bitsize; reg.ResetLastUsed(); - reg.SetDirty(true); const RegType reg_type = reg.GetType(); if (reg_type == RegType::NotLoaded || reg_type == RegType::Discarded) { const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); reg.Load(host_reg); + reg.SetDirty(set_dirty); if (do_load) { ASSERT_MSG(DYNA_REC, reg_type != RegType::Discarded, "Attempted to load a discarded value"); m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset)); } } + else if (set_dirty) + { + reg.SetDirty(true); + } } void Arm64GPRCache::GetAllocationOrder() @@ -570,26 +574,15 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) return ARM64Reg::INVALID_REG; } -ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) +ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type, bool set_dirty) { OpArg& reg = m_guest_registers[preg]; - bool was_dirty = reg.IsDirty(); - IncrementAllUsed(); reg.ResetLastUsed(); - reg.SetDirty(true); - - // If not loaded at all, just alloc a new one. - if (reg.GetType() == RegType::NotLoaded || reg.GetType() == RegType::Discarded) - { - reg.Load(GetReg(), type); - return reg.GetReg(); - } - // Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty. - if ((type == RegType::LowerPair || type == RegType::LowerPairSingle) && was_dirty) + if (reg.IsDirty() && (type == RegType::LowerPair || type == RegType::LowerPairSingle)) { // We must *not* change host_reg as this register might still be in use. So it's fine to // store this register, but it's *not* fine to convert it to double. So for double conversion, @@ -612,6 +605,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) m_jit->ConvertSingleToDoubleLower(preg, flush_reg, flush_reg, scratch_reg); m_float_emit->STR(64, IndexType::Unsigned, flush_reg, PPC_REG, u32(PPCSTATE_OFF_PS1(preg))); Unlock(scratch_reg); + reg.Load(host_reg, RegType::LowerPairSingle); break; } else @@ -619,6 +613,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) m_jit->ConvertSingleToDoublePair(preg, flush_reg, host_reg, flush_reg); m_float_emit->STR(128, IndexType::Unsigned, flush_reg, PPC_REG, u32(PPCSTATE_OFF_PS0(preg))); + reg.SetDirty(false); } break; case RegType::Register: @@ -627,6 +622,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) // It would take longer to do an insert to a temporary and a 64bit store than to just do this. m_float_emit->STR(128, IndexType::Unsigned, flush_reg, PPC_REG, static_cast(PPCSTATE_OFF_PS0(preg))); + reg.SetDirty(false); break; case RegType::DuplicatedSingle: flush_reg = GetReg(); @@ -636,6 +632,8 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) // Store PSR1 (which is equal to PSR0) in memory. m_float_emit->STR(64, IndexType::Unsigned, flush_reg, PPC_REG, static_cast(PPCSTATE_OFF_PS1(preg))); + reg.Load(host_reg, reg.GetType() == RegType::DuplicatedSingle ? RegType::LowerPairSingle : + RegType::LowerPair); break; default: // All other types doesn't store anything in PSR1. @@ -646,7 +644,18 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) Unlock(flush_reg); } - reg.Load(reg.GetReg(), type); + if (reg.GetType() == RegType::NotLoaded || reg.GetType() == RegType::Discarded) + { + // If not loaded at all, just alloc a new one. + reg.Load(GetReg(), type); + reg.SetDirty(set_dirty); + } + else if (set_dirty) + { + reg.Load(reg.GetReg(), type); + reg.SetDirty(true); + } + return reg.GetReg(); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 2ecbcbbffa..16678a04b1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -266,9 +266,15 @@ public: // Gets the immediate that a register is set to, only valid for guest GPRs u32 GetImm(size_t preg) const { return GetGuestGPROpArg(preg).GetImm(); } // Binds a guest GPR to a host register, optionally loading its value - void BindToRegister(size_t preg, bool do_load) { BindToRegister(GetGuestGPR(preg), do_load); } + void BindToRegister(size_t preg, bool do_load, bool set_dirty = true) + { + BindToRegister(GetGuestGPR(preg), do_load, set_dirty); + } // Binds a guest CR to a host register, optionally loading its value - void BindCRToRegister(size_t preg, bool do_load) { BindToRegister(GetGuestCR(preg), do_load); } + void BindCRToRegister(size_t preg, bool do_load, bool set_dirty = true) + { + BindToRegister(GetGuestCR(preg), do_load, set_dirty); + } BitSet32 GetCallerSavedUsed() const override; void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG) @@ -307,7 +313,7 @@ private: Arm64Gen::ARM64Reg R(const GuestRegInfo& guest_reg); void SetImmediate(const GuestRegInfo& guest_reg, u32 imm); - void BindToRegister(const GuestRegInfo& guest_reg, bool do_load); + void BindToRegister(const GuestRegInfo& guest_reg, bool do_load, bool set_dirty = true); void FlushRegisters(BitSet32 regs, bool maintain_state, Arm64Gen::ARM64Reg tmp_reg); void FlushCRRegisters(BitSet32 regs, bool maintain_state, Arm64Gen::ARM64Reg tmp_reg); @@ -326,7 +332,7 @@ public: // Will dump an immediate to the host register as well Arm64Gen::ARM64Reg R(size_t preg, RegType type); - Arm64Gen::ARM64Reg RW(size_t preg, RegType type); + Arm64Gen::ARM64Reg RW(size_t preg, RegType type, bool set_dirty = true); BitSet32 GetCallerSavedUsed() const override; From 662ae570a0b8bbdfa73caabc37e1f2148d73f0b1 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 2 Jul 2021 18:34:56 +0200 Subject: [PATCH 05/13] JitArm64: Make EmitBackpatchRoutine support saving W0 Being able to preserve the address register is useful for the next commit, and W0 is the address register used for loads. Saving the address register used for stores, W1, was already supported. --- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 4923e713cb..13cc4722a7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -6,6 +6,7 @@ #include #include +#include "Common/Align.h" #include "Common/BitSet.h" #include "Common/CommonFuncs.h" #include "Common/CommonTypes.h" @@ -144,15 +145,21 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR const ARM64Reg temp_reg = flags & BackPatchInfo::FLAG_LOAD ? ARM64Reg::W30 : ARM64Reg::W0; const int temp_reg_index = DecodeReg(temp_reg); - if (memcheck && (flags & BackPatchInfo::FLAG_LOAD)) - { - ABI_PushRegisters(gprs_to_push & BitSet32{temp_reg_index}); - ABI_PushRegisters(gprs_to_push & ~BitSet32{temp_reg_index}); - } - else - { - ABI_PushRegisters(gprs_to_push); - } + BitSet32 gprs_to_push_early = {}; + if (memcheck) + gprs_to_push_early[temp_reg_index] = true; + if (flags & BackPatchInfo::FLAG_LOAD) + gprs_to_push_early[0] = true; + + // If we're already pushing one register in the first PushRegisters call, we can push a + // second one for free. Let's do so, since it might save one instruction in the second + // PushRegisters call. (Do not do this for caller-saved registers which may be in the register + // cache, or else EmitMemcheck will not be able to flush the register cache correctly!) + if (gprs_to_push & gprs_to_push_early) + gprs_to_push_early[30] = true; + + ABI_PushRegisters(gprs_to_push & gprs_to_push_early); + ABI_PushRegisters(gprs_to_push & ~gprs_to_push_early); m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30); if (flags & BackPatchInfo::FLAG_STORE) @@ -213,21 +220,17 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR } m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30); - if (memcheck && (flags & BackPatchInfo::FLAG_LOAD)) - ABI_PopRegisters(gprs_to_push & ~BitSet32{temp_reg_index}); - else - ABI_PopRegisters(gprs_to_push); + ABI_PopRegisters(gprs_to_push & ~gprs_to_push_early); if (memcheck) { - const u64 early_push_size = flags & BackPatchInfo::FLAG_LOAD ? 16 : 0; + const u64 early_push_size = Common::AlignUp(gprs_to_push_early.Count(), 2) * 8; WriteConditionalExceptionExit(EXCEPTION_DSI, temp_reg, early_push_size); } if (flags & BackPatchInfo::FLAG_LOAD) { ARM64Reg src_reg = access_size == 64 ? ARM64Reg::X0 : ARM64Reg::W0; - ASSERT(!gprs_to_push[DecodeReg(src_reg)]); if (flags & BackPatchInfo::FLAG_PAIR) { @@ -248,10 +251,9 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR } ByteswapAfterLoad(this, &m_float_emit, RS, src_reg, flags, false, false); - - if (memcheck) - ABI_PopRegisters(gprs_to_push & BitSet32{temp_reg_index}); } + + ABI_PopRegisters(gprs_to_push & gprs_to_push_early); } if (in_far_code) From b4ffdce80008024449684b8339e18b6a61d87cab Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 2 Jul 2021 19:06:11 +0200 Subject: [PATCH 06/13] JitArm64: Implement memcheck for lXX/stX with update --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 +- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 90 +++++++++++-------- 2 files changed, 52 insertions(+), 40 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index a1368066cd..f8247ae105 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -221,7 +221,7 @@ protected: BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); // Loadstore routines void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); - void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset); + void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, bool update); // If lookup succeeds, writes upper 15 bits of physical address to addr_out. If not, // jumps to the returned FixupBranch. Clobbers tmp and the 17 lower bits of addr_out. Arm64Gen::FixupBranch BATAddressLookup(Arm64Gen::ARM64Reg addr_out, Arm64Gen::ARM64Reg addr_in, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 685d96c35a..1c25864a44 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -101,18 +101,24 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o ARM64Reg XA = EncodeRegTo64(addr_reg); - if (is_immediate) - MOVI2R(XA, imm_addr); + bool addr_reg_set = !is_immediate; + const auto set_addr_reg_if_needed = [&] { + if (!addr_reg_set) + MOVI2R(XA, imm_addr); + }; - if (update) + const bool early_update = !jo.memcheck && dest != static_cast(addr); + if (update && early_update) { gpr.BindToRegister(addr, false); + set_addr_reg_if_needed(); MOV(gpr.R(addr), addr_reg); } BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + if (!update || early_update) + regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; if (!jo.memcheck) regs_in_use[DecodeReg(dest_reg)] = 0; @@ -123,6 +129,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) { + set_addr_reg_if_needed(); EmitBackpatchRoutine(flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0)); } else if (mmio_address) @@ -132,16 +139,25 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o } else { + set_addr_reg_if_needed(); EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, XA, regs_in_use, fprs_in_use); } gpr.BindToRegister(dest, false, true); ASSERT(dest_reg == gpr.R(dest)); + if (update && !early_update) + { + gpr.BindToRegister(addr, false); + set_addr_reg_if_needed(); + MOV(gpr.R(addr), addr_reg); + } + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); } -void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset) +void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, + bool update) { // We want to make sure to not get LR as a temp register gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); @@ -156,11 +172,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s if (dest != -1 && !gpr.IsImm(dest)) reg_dest = gpr.R(dest); - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; - regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; - ARM64Reg addr_reg = ARM64Reg::W1; u32 imm_addr = 0; @@ -226,6 +237,26 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s ARM64Reg XA = EncodeRegTo64(addr_reg); + bool addr_reg_set = !is_immediate; + const auto set_addr_reg_if_needed = [&] { + if (!addr_reg_set) + MOVI2R(XA, imm_addr); + }; + + const bool early_update = !jo.memcheck && value != static_cast(dest); + if (update && early_update) + { + gpr.BindToRegister(dest, false); + set_addr_reg_if_needed(); + MOV(gpr.R(dest), addr_reg); + } + + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + if (!update || early_update) + regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; + u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 mmio_address = 0; if (is_immediate) @@ -259,7 +290,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s } else if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) { - MOVI2R(XA, imm_addr); + set_addr_reg_if_needed(); EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0)); } else if (mmio_address) @@ -269,12 +300,17 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s } else { - if (is_immediate) - MOVI2R(XA, imm_addr); - + set_addr_reg_if_needed(); EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, RS, XA, regs_in_use, fprs_in_use); } + if (update && !early_update) + { + gpr.BindToRegister(dest, false); + set_addr_reg_if_needed(); + MOV(gpr.R(dest), addr_reg); + } + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); } @@ -381,8 +417,6 @@ void JitArm64::lXX(UGeckoInstruction inst) break; } - FALLBACK_IF(jo.memcheck && update); - SafeLoadToReg(d, update ? a : (a ? a : -1), offsetReg, flags, offset, update); } @@ -448,29 +482,7 @@ void JitArm64::stX(UGeckoInstruction inst) break; } - FALLBACK_IF(jo.memcheck && update); - - SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, flags, offset); - - if (update) - { - gpr.BindToRegister(a, false); - - ARM64Reg WA = gpr.GetReg(); - ARM64Reg RB = {}; - ARM64Reg RA = gpr.R(a); - if (regOffset != -1) - RB = gpr.R(regOffset); - if (regOffset == -1) - { - ADDI2R(RA, RA, offset, WA); - } - else - { - ADD(RA, RA, RB); - } - gpr.Unlock(WA); - } + SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, flags, offset, update); } void JitArm64::lmw(UGeckoInstruction inst) From 4fe15e788fd4d0f6bad5335ed04740ceb6eb2d58 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 3 Jul 2021 11:57:12 +0200 Subject: [PATCH 07/13] JitArm64: Implement memcheck for lmw/stmw --- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 118 +++++++++--------- 1 file changed, 60 insertions(+), 58 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 1c25864a44..cbaf81f441 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -489,95 +489,97 @@ void JitArm64::lmw(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(!jo.fastmem || jo.memcheck); - u32 a = inst.RA; + u32 a = inst.RA, d = inst.RD; + s32 offset = inst.SIMM_16; - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); + gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); + + // MMU games make use of a >= d despite this being invalid according to the PEM. + // Because of this, make sure to not re-read rA after starting doing the loads. + ARM64Reg addr_reg = ARM64Reg::W0; if (a) { - ADDI2R(WA, gpr.R(a), inst.SIMM_16, WA); - ADD(XA, XA, MEM_REG); + if (gpr.IsImm(a)) + MOVI2R(addr_reg, gpr.GetImm(a) + offset); + else + ADDI2R(addr_reg, gpr.R(a), offset, addr_reg); } else { - ADDI2R(XA, MEM_REG, (u32)(s32)(s16)inst.SIMM_16, XA); + MOVI2R(addr_reg, offset); } - for (int i = inst.RD; i < 32; i++) + // TODO: This doesn't handle rollback on DSI correctly + constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_32; + for (u32 i = d; i < 32; i++) { - int remaining = 32 - i; - if (remaining >= 4) - { - gpr.BindToRegister(i + 3, false); - gpr.BindToRegister(i + 2, false); - gpr.BindToRegister(i + 1, false); - gpr.BindToRegister(i, false); - ARM64Reg RX4 = gpr.R(i + 3); - ARM64Reg RX3 = gpr.R(i + 2); - ARM64Reg RX2 = gpr.R(i + 1); - ARM64Reg RX1 = gpr.R(i); - LDP(IndexType::Post, EncodeRegTo64(RX1), EncodeRegTo64(RX3), XA, 16); - REV32(EncodeRegTo64(RX1), EncodeRegTo64(RX1)); - REV32(EncodeRegTo64(RX3), EncodeRegTo64(RX3)); - LSR(EncodeRegTo64(RX2), EncodeRegTo64(RX1), 32); - LSR(EncodeRegTo64(RX4), EncodeRegTo64(RX3), 32); - i += 3; - } - else if (remaining >= 2) - { - gpr.BindToRegister(i + 1, false); - gpr.BindToRegister(i, false); - ARM64Reg RX2 = gpr.R(i + 1); - ARM64Reg RX1 = gpr.R(i); - LDP(IndexType::Post, RX1, RX2, XA, 8); - REV32(RX1, RX1); - REV32(RX2, RX2); - ++i; - } - else - { - gpr.BindToRegister(i, false); - ARM64Reg RX = gpr.R(i); - LDR(IndexType::Post, RX, XA, 4); - REV32(RX, RX); - } + gpr.BindToRegister(i, false, false); + ARM64Reg dest_reg = gpr.R(i); + + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + if (i == 31) + regs_in_use[DecodeReg(addr_reg)] = 0; + if (!jo.memcheck) + regs_in_use[DecodeReg(dest_reg)] = 0; + + EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, EncodeRegTo64(addr_reg), + regs_in_use, fprs_in_use); + + gpr.BindToRegister(i, false, true); + ASSERT(dest_reg == gpr.R(i)); + + if (i != 31) + ADD(addr_reg, addr_reg, 4); } - gpr.Unlock(WA); + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); } void JitArm64::stmw(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(!jo.fastmem || jo.memcheck); - u32 a = inst.RA; + u32 a = inst.RA, s = inst.RS; + s32 offset = inst.SIMM_16; - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); - ARM64Reg WB = gpr.GetReg(); + gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + ARM64Reg addr_reg = ARM64Reg::W1; if (a) { - ADDI2R(WA, gpr.R(a), inst.SIMM_16, WA); - ADD(XA, XA, MEM_REG); + if (gpr.IsImm(a)) + MOVI2R(addr_reg, gpr.GetImm(a) + offset); + else + ADDI2R(addr_reg, gpr.R(a), offset, addr_reg); } else { - ADDI2R(XA, MEM_REG, (u32)(s32)(s16)inst.SIMM_16, XA); + MOVI2R(addr_reg, offset); } - for (int i = inst.RD; i < 32; i++) + // TODO: This doesn't handle rollback on DSI correctly + constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_SIZE_32; + for (u32 i = s; i < 32; i++) { - ARM64Reg RX = gpr.R(i); - REV32(WB, RX); - STR(IndexType::Unsigned, WB, XA, (i - inst.RD) * 4); + ARM64Reg src_reg = gpr.R(i); + + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + if (i == 31) + regs_in_use[DecodeReg(addr_reg)] = 0; + + EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, src_reg, EncodeRegTo64(addr_reg), + regs_in_use, fprs_in_use); + + if (i != 31) + ADD(addr_reg, addr_reg, 4); } - gpr.Unlock(WA, WB); + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); } void JitArm64::dcbx(UGeckoInstruction inst) From 1c8ddcdda128a8a26804348620f259873bf7a510 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Wed, 21 Jul 2021 17:55:03 +0200 Subject: [PATCH 08/13] JitArm64: Implement memcheck for dcbz --- Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index cbaf81f441..d61f79af7e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -763,7 +763,6 @@ void JitArm64::dcbz(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(jo.memcheck || !jo.fastmem_arena); FALLBACK_IF(SConfig::GetInstance().bLowDCBZHack); int a = inst.RA, b = inst.RB; @@ -816,7 +815,7 @@ void JitArm64::dcbz(UGeckoInstruction inst) BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0; - EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, true, true, ARM64Reg::W0, + EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, jo.fastmem, jo.fastmem, ARM64Reg::W0, EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); From 8c96e60cd1dfb0e1055fa4071066db64deada69d Mon Sep 17 00:00:00 2001 From: JosJuice Date: Wed, 21 Jul 2021 19:59:18 +0200 Subject: [PATCH 09/13] JitArm64: Implement memcheck for lfXX/stfXX without update --- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 79add2f227..d06ce7bdb0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -21,7 +21,6 @@ void JitArm64::lfXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); - FALLBACK_IF(jo.memcheck); u32 a = inst.RA, b = inst.RB; @@ -71,6 +70,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) break; } + FALLBACK_IF(jo.memcheck && update); + u32 imm_addr = 0; bool is_immediate = false; @@ -80,7 +81,7 @@ void JitArm64::lfXX(UGeckoInstruction inst) gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); fpr.Lock(ARM64Reg::Q0); - const ARM64Reg VD = fpr.RW(inst.FD, type); + const ARM64Reg VD = fpr.RW(inst.FD, type, false); ARM64Reg addr_reg = ARM64Reg::W0; if (update) @@ -165,7 +166,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; - fprs_in_use[DecodeReg(VD)] = 0; + if (!jo.memcheck) + fprs_in_use[DecodeReg(VD)] = 0; if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) { @@ -176,6 +178,9 @@ void JitArm64::lfXX(UGeckoInstruction inst) EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VD, XA, regs_in_use, fprs_in_use); } + const ARM64Reg VD_again = fpr.RW(inst.FD, type, true); + ASSERT(VD == VD_again); + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); } @@ -184,7 +189,6 @@ void JitArm64::stfXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); - FALLBACK_IF(jo.memcheck); u32 a = inst.RA, b = inst.RB; @@ -244,6 +248,8 @@ void JitArm64::stfXX(UGeckoInstruction inst) break; } + FALLBACK_IF(jo.memcheck && update); + u32 imm_addr = 0; bool is_immediate = false; From 89301b1f91624f93fad0bd0d770140bd6110d9fa Mon Sep 17 00:00:00 2001 From: JosJuice Date: Wed, 21 Jul 2021 20:25:29 +0200 Subject: [PATCH 10/13] JitArm64: Implement memcheck for lfXX/stfXX with update --- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 54 ++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index d06ce7bdb0..3e03ef35ef 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -70,8 +70,6 @@ void JitArm64::lfXX(UGeckoInstruction inst) break; } - FALLBACK_IF(jo.memcheck && update); - u32 imm_addr = 0; bool is_immediate = false; @@ -156,7 +154,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) if (is_immediate) MOVI2R(XA, imm_addr); - if (update) + const bool early_update = !jo.memcheck; + if (update && early_update) { gpr.BindToRegister(a, false); MOV(gpr.R(a), addr_reg); @@ -164,7 +163,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + if (!update || early_update) + regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; if (!jo.memcheck) fprs_in_use[DecodeReg(VD)] = 0; @@ -181,6 +181,12 @@ void JitArm64::lfXX(UGeckoInstruction inst) const ARM64Reg VD_again = fpr.RW(inst.FD, type, true); ASSERT(VD == VD_again); + if (update && !early_update) + { + gpr.BindToRegister(a, false); + MOV(gpr.R(a), addr_reg); + } + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); } @@ -248,8 +254,6 @@ void JitArm64::stfXX(UGeckoInstruction inst) break; } - FALLBACK_IF(jo.memcheck && update); - u32 imm_addr = 0; bool is_immediate = false; @@ -340,26 +344,25 @@ void JitArm64::stfXX(UGeckoInstruction inst) ARM64Reg XA = EncodeRegTo64(addr_reg); - if (is_immediate && !(jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr))) - { - MOVI2R(XA, imm_addr); + bool addr_reg_set = !is_immediate; + const auto set_addr_reg_if_needed = [&] { + if (!addr_reg_set) + MOVI2R(XA, imm_addr); + }; - if (update) - { - gpr.BindToRegister(a, false); - MOV(gpr.R(a), addr_reg); - } - } - else if (!is_immediate && update) + const bool early_update = !jo.memcheck; + if (update && early_update) { gpr.BindToRegister(a, false); + set_addr_reg_if_needed(); MOV(gpr.R(a), addr_reg); } BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; - regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; + if (!update || early_update) + regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; if (is_immediate) @@ -384,28 +387,31 @@ void JitArm64::stfXX(UGeckoInstruction inst) STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); js.fifoBytesSinceCheck += accessSize >> 3; - - if (update) - { - // Chance of this happening is fairly low, but support it - gpr.BindToRegister(a, false); - MOVI2R(gpr.R(a), imm_addr); - } } else if (jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(imm_addr)) { + set_addr_reg_if_needed(); EmitBackpatchRoutine(flags, true, false, V0, XA, BitSet32(0), BitSet32(0)); } else { + set_addr_reg_if_needed(); EmitBackpatchRoutine(flags, false, false, V0, XA, regs_in_use, fprs_in_use); } } else { + set_addr_reg_if_needed(); EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, V0, XA, regs_in_use, fprs_in_use); } + if (update && !early_update) + { + gpr.BindToRegister(a, false); + set_addr_reg_if_needed(); + MOV(gpr.R(a), addr_reg); + } + if (want_single && !have_single) fpr.Unlock(V0); From 9e43796912db01e5ec619e23b5a5bca28a7e48ed Mon Sep 17 00:00:00 2001 From: JosJuice Date: Thu, 22 Jul 2021 10:24:38 +0200 Subject: [PATCH 11/13] JitArm64: Allow passing temp FPR to EmitMemcheck Small optimization. If the caller already has an FPR that it isn't using, might as well pass it on to fpr.Flush. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 15 ++++++++------- Source/Core/Core/PowerPC/JitArm64/Jit.h | 3 ++- .../Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp | 10 ++++++---- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index f494f128f6..af9ec677c7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -501,15 +501,16 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external, bool always void JitArm64::WriteConditionalExceptionExit(int exception, u64 increment_sp_on_exit) { ARM64Reg WA = gpr.GetReg(); - WriteConditionalExceptionExit(exception, WA, increment_sp_on_exit); + WriteConditionalExceptionExit(exception, WA, Arm64Gen::ARM64Reg::INVALID_REG, + increment_sp_on_exit); gpr.Unlock(WA); } -void JitArm64::WriteConditionalExceptionExit(int exception, ARM64Reg temp_reg, +void JitArm64::WriteConditionalExceptionExit(int exception, ARM64Reg temp_gpr, ARM64Reg temp_fpr, u64 increment_sp_on_exit) { - LDR(IndexType::Unsigned, temp_reg, PPC_REG, PPCSTATE_OFF(Exceptions)); - FixupBranch no_exception = TBZ(temp_reg, IntLog2(exception)); + LDR(IndexType::Unsigned, temp_gpr, PPC_REG, PPCSTATE_OFF(Exceptions)); + FixupBranch no_exception = TBZ(temp_gpr, IntLog2(exception)); const bool switch_to_far_code = !IsInFarCode(); @@ -521,10 +522,10 @@ void JitArm64::WriteConditionalExceptionExit(int exception, ARM64Reg temp_reg, } if (increment_sp_on_exit != 0) - ADDI2R(ARM64Reg::SP, ARM64Reg::SP, increment_sp_on_exit, temp_reg); + ADDI2R(ARM64Reg::SP, ARM64Reg::SP, increment_sp_on_exit, temp_gpr); - gpr.Flush(FlushMode::MaintainState, temp_reg); - fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); + gpr.Flush(FlushMode::MaintainState, temp_gpr); + fpr.Flush(FlushMode::MaintainState, temp_fpr); WriteExceptionExit(js.compilerPC, false, true); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index f8247ae105..3ef1955e37 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -266,7 +266,8 @@ protected: void WriteExceptionExit(Arm64Gen::ARM64Reg dest, bool only_external = false, bool always_exception = false); void WriteConditionalExceptionExit(int exception, u64 increment_sp_on_exit = 0); - void WriteConditionalExceptionExit(int exception, Arm64Gen::ARM64Reg temp_reg, + void WriteConditionalExceptionExit(int exception, Arm64Gen::ARM64Reg temp_gpr, + Arm64Gen::ARM64Reg temp_fpr = Arm64Gen::ARM64Reg::INVALID_REG, u64 increment_sp_on_exit = 0); void FakeLKExit(u32 exit_address_after_return); void WriteBLRExit(Arm64Gen::ARM64Reg dest); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 13cc4722a7..8bcfac7cbf 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -142,12 +142,12 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR if (slowmem_fixup) SetJumpTarget(*slowmem_fixup); - const ARM64Reg temp_reg = flags & BackPatchInfo::FLAG_LOAD ? ARM64Reg::W30 : ARM64Reg::W0; - const int temp_reg_index = DecodeReg(temp_reg); + const ARM64Reg temp_gpr = flags & BackPatchInfo::FLAG_LOAD ? ARM64Reg::W30 : ARM64Reg::W0; + const int temp_gpr_index = DecodeReg(temp_gpr); BitSet32 gprs_to_push_early = {}; if (memcheck) - gprs_to_push_early[temp_reg_index] = true; + gprs_to_push_early[temp_gpr_index] = true; if (flags & BackPatchInfo::FLAG_LOAD) gprs_to_push_early[0] = true; @@ -224,8 +224,10 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR if (memcheck) { + const ARM64Reg temp_fpr = fprs_to_push[0] ? ARM64Reg::INVALID_REG : ARM64Reg::Q0; const u64 early_push_size = Common::AlignUp(gprs_to_push_early.Count(), 2) * 8; - WriteConditionalExceptionExit(EXCEPTION_DSI, temp_reg, early_push_size); + + WriteConditionalExceptionExit(EXCEPTION_DSI, temp_gpr, temp_fpr, early_push_size); } if (flags & BackPatchInfo::FLAG_LOAD) From 61c73061e93309640923e537408bd4c4fa2b7fdf Mon Sep 17 00:00:00 2001 From: JosJuice Date: Tue, 12 Oct 2021 17:32:57 +0200 Subject: [PATCH 12/13] JitArm64: Implement memcheck for psq_lXX/psq_stXX without update --- .../JitArm64/JitArm64_LoadStorePaired.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 141db6f487..7f22e5e8fc 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -8,6 +8,7 @@ #include "Core/Core.h" #include "Core/CoreTiming.h" +#include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/PPCTables.h" @@ -19,7 +20,6 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStorePairedOff); - FALLBACK_IF(jo.memcheck); // If we have a fastmem arena, the asm routines assume address translation is on. FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem_arena && !MSR.DR); @@ -36,6 +36,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) const int i = indexed ? inst.Ix : inst.I; const int w = indexed ? inst.Wx : inst.W; + FALLBACK_IF(jo.memcheck && update); + gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); fpr.Lock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) @@ -47,7 +49,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) constexpr ARM64Reg addr_reg = ARM64Reg::W0; constexpr ARM64Reg scale_reg = ARM64Reg::W1; constexpr ARM64Reg type_reg = ARM64Reg::W2; - ARM64Reg VS = fpr.RW(inst.RS, RegType::Single); + ARM64Reg VS = fpr.RW(inst.RS, RegType::Single, false); if (inst.RA || update) // Always uses the register on update { @@ -80,7 +82,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) // Wipe the registers we are using as temporaries gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; - fprs_in_use[DecodeReg(VS)] = 0; + if (!jo.memcheck) + fprs_in_use[DecodeReg(VS)] = 0; u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; if (!w) @@ -99,6 +102,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) LDR(EncodeRegTo64(type_reg), ARM64Reg::X30, ArithOption(EncodeRegTo64(type_reg), true)); BLR(EncodeRegTo64(type_reg)); + WriteConditionalExceptionExit(EXCEPTION_DSI, ARM64Reg::X30, ARM64Reg::Q1); + m_float_emit.ORR(EncodeRegToDouble(VS), ARM64Reg::D0, ARM64Reg::D0); } @@ -108,6 +113,9 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) m_float_emit.INS(32, VS, 1, ARM64Reg::Q0, 0); } + const ARM64Reg VS_again = fpr.RW(inst.RS, RegType::Single, true); + ASSERT(VS == VS_again); + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) @@ -121,7 +129,6 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStorePairedOff); - FALLBACK_IF(jo.memcheck); // If we have a fastmem arena, the asm routines assume address translation is on. FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem_arena && !MSR.DR); @@ -137,6 +144,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) const int i = indexed ? inst.Ix : inst.I; const int w = indexed ? inst.Wx : inst.W; + FALLBACK_IF(jo.memcheck && update); + if (!js.assumeNoPairedQuantize) fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1); @@ -229,6 +238,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) MOVP2R(ARM64Reg::X30, w ? single_store_quantized : paired_store_quantized); LDR(EncodeRegTo64(type_reg), ARM64Reg::X30, ArithOption(EncodeRegTo64(type_reg), true)); BLR(EncodeRegTo64(type_reg)); + + WriteConditionalExceptionExit(EXCEPTION_DSI, ARM64Reg::X30, ARM64Reg::Q1); } if (js.assumeNoPairedQuantize && !have_single) From 5490797867ebfd4c5581944930d5bb87c69f915c Mon Sep 17 00:00:00 2001 From: JosJuice Date: Tue, 12 Oct 2021 20:09:59 +0200 Subject: [PATCH 13/13] JitArm64: Implement memcheck for psq_lXX/psq_stXX with update --- .../JitArm64/JitArm64_LoadStorePaired.cpp | 28 +++++++++---- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 40 ++++++++++--------- 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 7f22e5e8fc..5a4daa58cc 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -36,8 +36,6 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) const int i = indexed ? inst.Ix : inst.I; const int w = indexed ? inst.Wx : inst.W; - FALLBACK_IF(jo.memcheck && update); - gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); fpr.Lock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) @@ -68,7 +66,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) MOVI2R(addr_reg, (u32)offset); } - if (update) + const bool early_update = !jo.memcheck; + if (update && early_update) { gpr.BindToRegister(inst.RA, false); MOV(gpr.R(inst.RA), addr_reg); @@ -80,7 +79,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); // Wipe the registers we are using as temporaries - gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; + if (!update || early_update) + gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; if (!jo.memcheck) fprs_in_use[DecodeReg(VS)] = 0; @@ -116,6 +116,12 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) const ARM64Reg VS_again = fpr.RW(inst.RS, RegType::Single, true); ASSERT(VS == VS_again); + if (update && !early_update) + { + gpr.BindToRegister(inst.RA, false); + MOV(gpr.R(inst.RA), addr_reg); + } + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) @@ -144,8 +150,6 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) const int i = indexed ? inst.Ix : inst.I; const int w = indexed ? inst.Wx : inst.W; - FALLBACK_IF(jo.memcheck && update); - if (!js.assumeNoPairedQuantize) fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1); @@ -207,7 +211,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) MOVI2R(addr_reg, (u32)offset); } - if (update) + const bool early_update = !jo.memcheck; + if (update && early_update) { gpr.BindToRegister(inst.RA, false); MOV(gpr.R(inst.RA), addr_reg); @@ -220,7 +225,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) // Wipe the registers we are using as temporaries gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; - gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; + if (!update || early_update) + gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; if (!w) @@ -242,6 +248,12 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) WriteConditionalExceptionExit(EXCEPTION_DSI, ARM64Reg::X30, ARM64Reg::Q1); } + if (update && !early_update) + { + gpr.BindToRegister(inst.RA, false); + MOV(gpr.R(inst.RA), addr_reg); + } + if (js.assumeNoPairedQuantize && !have_single) fpr.Unlock(VS); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 5f5b8826fd..bf650baf15 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -495,7 +495,9 @@ void JitArm64::GenerateQuantizedLoads() // Q1 is a temporary ARM64Reg addr_reg = ARM64Reg::X0; ARM64Reg scale_reg = ARM64Reg::X1; - BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2, 3}; + BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{2, 3}; + if (!jo.memcheck) + gprs_to_push &= ~BitSet32{0}; BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; ARM64FloatEmitter float_emit(this); @@ -524,8 +526,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(addr_reg, &m_dequantizeTableS); - ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -542,8 +544,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(addr_reg, &m_dequantizeTableS); - ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -559,8 +561,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(addr_reg, &m_dequantizeTableS); - ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -576,8 +578,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(addr_reg, &m_dequantizeTableS); - ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -605,8 +607,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(addr_reg, &m_dequantizeTableS); - ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -623,8 +625,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(addr_reg, &m_dequantizeTableS); - ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -640,8 +642,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(addr_reg, &m_dequantizeTableS); - ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -657,8 +659,8 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(addr_reg, &m_dequantizeTableS); - ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); + MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); @@ -701,7 +703,9 @@ void JitArm64::GenerateQuantizedStores() // Q1 is a temporary ARM64Reg scale_reg = ARM64Reg::X0; ARM64Reg addr_reg = ARM64Reg::X1; - BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1, 2}; + BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2}; + if (!jo.memcheck) + gprs_to_push &= ~BitSet32{1}; BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; ARM64FloatEmitter float_emit(this);