From d91d6fcdc5c63dc3d7d161537f08bf3cc2b57616 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Wed, 21 Jul 2021 18:07:28 +0200 Subject: [PATCH 1/2] JitArm64: Fix dcbz backpatch The dcbz instruction needs to lock W30 so that the slowmem code will push and pop it when calling into C++. Also, the slowmem code expects that the address is present in W0, so replace the use of W0 as a scratch register in the fastmem code with the now locked W30. --- Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp | 7 ++++--- Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index edb6c3ede8..b45bbfc35a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -104,9 +104,10 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR else if (flags & BackPatchInfo::FLAG_ZERO_256) { // This literally only stores 32bytes of zeros to the target address - ADD(addr, addr, MEM_REG); - STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, addr, 0); - STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, addr, 16); + ARM64Reg temp = ARM64Reg::X30; + ADD(temp, addr, MEM_REG); + STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0); + STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16); } else { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 7fcabb5095..f716229ebf 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -597,7 +597,7 @@ void JitArm64::dcbz(UGeckoInstruction inst) int a = inst.RA, b = inst.RB; - gpr.Lock(ARM64Reg::W0); + gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); ARM64Reg addr_reg = ARM64Reg::W0; @@ -651,7 +651,7 @@ void JitArm64::dcbz(UGeckoInstruction inst) EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, true, true, ARM64Reg::W0, EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); - gpr.Unlock(ARM64Reg::W0); + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); } void JitArm64::eieio(UGeckoInstruction inst) From 1df3456267c2e501a1252656c3a304b66773abe6 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Wed, 21 Jul 2021 19:24:41 +0200 Subject: [PATCH 2/2] JitArm64: Remove a comment in dcbz implementation This implementation is pretty efficient in my opinion. And "As long as we aren't falling back to interpreter we're winning a lot" applies to basically every instruction to some degree anyway. --- Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index f716229ebf..c5e3d7f6fa 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -641,9 +641,6 @@ void JitArm64::dcbz(UGeckoInstruction inst) } } - // We don't care about being /too/ terribly efficient here - // As long as we aren't falling back to interpreter we're winning a lot - BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;