From a307d9d9b8ba1612839c3507317440f00548d849 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 2 Nov 2024 23:15:22 +0100 Subject: [PATCH] JitArm64_LoadStore: Optimize zero stores in stX The value being stored must be loaded into a register. In the case of an immediate value, this means it must be materialized. The value is eventually byteswapped before performing the store. This can be simplified for the value 0 for two reasons: - ARM64 has a dedicated zero register, so does not need to be materialized. - Byteswapping zero is still zero, so we can skip this step. We could skip byteswapping for other values by immediately materializing the byteswapped value in a register, but the benefits are not so clear there (if the value needs to be materialized anyway, it is better to do it up front). Before: 0x5280001b mov w27, #0x0 ; =0 0xb9404fba ldr w26, [x29, #0x4c] 0x12881862 mov w2, #-0x40c4 ; =-16580 0x0b020342 add w2, w26, w2 0x5ac00b61 rev w1, w27 0xb8226b81 str w1, [x28, x2] After: 0xb9404fbb ldr w27, [x29, #0x4c] 0x12881862 mov w2, #-0x40c4 ; =-16580 0x0b020362 add w2, w27, w2 0xb8226b9f str wzr, [x28, x2] --- Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp | 3 ++- Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 8e5cb47940..ebcb8142b7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -181,7 +181,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s if (!jo.fastmem) gpr.Lock(ARM64Reg::W0); - ARM64Reg RS = gpr.R(value); + // Don't materialize zero. + ARM64Reg RS = gpr.IsImm(value, 0) ? ARM64Reg::WZR : gpr.R(value); ARM64Reg reg_dest = ARM64Reg::INVALID_REG; ARM64Reg reg_off = ARM64Reg::INVALID_REG; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp index 4beb74ff1b..19274d2793 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp @@ -257,6 +257,12 @@ void ByteswapAfterLoad(ARM64XEmitter* emit, ARM64FloatEmitter* float_emit, ARM64 ARM64Reg ByteswapBeforeStore(ARM64XEmitter* emit, ARM64FloatEmitter* float_emit, ARM64Reg tmp_reg, ARM64Reg src_reg, u32 flags, bool want_reversed) { + // Byteswapping zero is still zero. + // We'd typically expect a writable register to be passed in, but recognize + // WZR for optimization purposes. + if ((flags & BackPatchInfo::FLAG_FLOAT) == 0 && src_reg == ARM64Reg::WZR) + return ARM64Reg::WZR; + ARM64Reg dst_reg = src_reg; if (want_reversed == !(flags & BackPatchInfo::FLAG_REVERSE))