From 6e2910b25e693a85c6c364610d19db6db75f516f Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 21 May 2024 09:53:16 -0700 Subject: [PATCH] [a64] Optimize memory-address calculation The LSL can be embedded into the ADD to remove an additional instruction. What was `cset`+`lsl`+`add` should now just be `cset`+`add ... LSL 12` --- src/xenia/cpu/backend/a64/a64_seq_memory.cc | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/xenia/cpu/backend/a64/a64_seq_memory.cc b/src/xenia/cpu/backend/a64/a64_seq_memory.cc index 773537f1a..8b66c1c68 100644 --- a/src/xenia/cpu/backend/a64/a64_seq_memory.cc +++ b/src/xenia/cpu/backend/a64/a64_seq_memory.cc @@ -27,7 +27,7 @@ template XReg ComputeMemoryAddressOffset(A64Emitter& e, const T& guest, const T& offset, WReg address_register = W3) { assert_true(offset.is_constant); - int32_t offset_const = static_cast(offset.constant()); + const int32_t offset_const = static_cast(offset.constant()); if (guest.is_constant) { uint32_t address = static_cast(guest.constant()); @@ -53,8 +53,7 @@ XReg ComputeMemoryAddressOffset(A64Emitter& e, const T& guest, const T& offset, e.MOV(W0, 0xE0000000 - offset_const); e.CMP(guest.reg().toW(), W0); e.CSET(W0, Cond::HS); - e.LSL(W0, W0, 12); - e.ADD(W0, W0, guest.reg().toW()); + e.ADD(W0, guest.reg().toW(), W0, LSL, 12); } else { // Clear the top 32 bits, as they are likely garbage. // TODO(benvanik): find a way to avoid doing this. @@ -76,7 +75,7 @@ XReg ComputeMemoryAddress(A64Emitter& e, const T& guest, // TODO(benvanik): figure out how to do this without a temp. // Since the constant is often 0x8... if we tried to use that as a // displacement it would be sign extended and mess things up. - uint32_t address = static_cast(guest.constant()); + const uint32_t address = static_cast(guest.constant()); if (address < 0x80000000) { e.MOV(W0, address); e.ADD(address_register.toX(), e.GetMembaseReg(), X0); @@ -98,8 +97,7 @@ XReg ComputeMemoryAddress(A64Emitter& e, const T& guest, e.MOV(W0, 0xE0000000); e.CMP(guest.reg().toW(), W0); e.CSET(W0, Cond::HS); - e.LSL(W0, W0, 12); - e.ADD(W0, W0, guest.reg().toW()); + e.ADD(W0, guest.reg().toW(), W0, LSL, 12); } else { // Clear the top 32 bits, as they are likely garbage. // TODO(benvanik): find a way to avoid doing this. @@ -107,7 +105,6 @@ XReg ComputeMemoryAddress(A64Emitter& e, const T& guest, } e.ADD(address_register.toX(), e.GetMembaseReg(), X0); return address_register.toX(); - // return e.GetMembaseReg() + e.rax; } } @@ -192,8 +189,7 @@ struct ATOMIC_COMPARE_EXCHANGE_I32 e.MOV(W3, 0xE0000000); e.CMP(i.src1.reg().toW(), W3); e.CSET(W1, Cond::HS); - e.LSL(W1, W1, 12); - e.ADD(W1, W1, i.src1.reg().toW()); + e.ADD(W1, i.src1.reg().toW(), W1, LSL, 12); } else { e.MOV(W1, i.src1.reg().toW()); } @@ -221,8 +217,7 @@ struct ATOMIC_COMPARE_EXCHANGE_I64 e.MOV(W3, 0xE0000000); e.CMP(i.src1.reg(), X3); e.CSET(W1, Cond::HS); - e.LSL(W1, W1, 12); - e.ADD(W1, W1, i.src1.reg().toW()); + e.ADD(W1, i.src1.reg().toW(), W1, LSL, 12); } else { e.MOV(W1, i.src1.reg().toW()); }