From 47665fddb87e6e622c655768c674057904e39069 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Wed, 8 May 2024 09:26:47 -0700 Subject: [PATCH] [a64] Compute memory offsets as 32-bit registers Additionally fixes some instruction forms to use the more general `STR` instruction with an offset --- src/xenia/cpu/backend/a64/a64_seq_memory.cc | 83 +++++++++------------ 1 file changed, 35 insertions(+), 48 deletions(-) diff --git a/src/xenia/cpu/backend/a64/a64_seq_memory.cc b/src/xenia/cpu/backend/a64/a64_seq_memory.cc index a23b708eb..f1d6af164 100644 --- a/src/xenia/cpu/backend/a64/a64_seq_memory.cc +++ b/src/xenia/cpu/backend/a64/a64_seq_memory.cc @@ -25,7 +25,7 @@ volatile int anchor_memory = 0; template XReg ComputeMemoryAddressOffset(A64Emitter& e, const T& guest, const T& offset, - XReg address_register = X3) { + WReg address_register = W3) { assert_true(offset.is_constant); int32_t offset_const = static_cast(offset.constant()); @@ -33,24 +33,24 @@ XReg ComputeMemoryAddressOffset(A64Emitter& e, const T& guest, const T& offset, uint32_t address = static_cast(guest.constant()); address += offset_const; if (address < 0x80000000) { - e.ADD(address_register, e.GetMembaseReg(), address); - return address_register; + e.ADD(address_register.toX(), e.GetMembaseReg(), address); + return address_register.toX(); } else { if (address >= 0xE0000000 && xe::memory::allocation_granularity() > 0x1000) { - e.MOV(address_register, address + 0x1000); + e.MOV(W0, address + 0x1000); } else { - e.MOV(address_register, address); + e.MOV(W0, address); } - e.ADD(address_register, e.GetMembaseReg(), address_register); - return address_register; + e.ADD(address_register.toX(), e.GetMembaseReg(), X0); + return address_register.toX(); } } else { if (xe::memory::allocation_granularity() > 0x1000) { // Emulate the 4 KB physical address offset in 0xE0000000+ when can't do // it via memory mapping. - e.MOV(address_register.toW(), 0xE0000000 - offset_const); - e.CMP(guest.reg().toW(), address_register.toW()); + e.MOV(W0, 0xE0000000 - offset_const); + e.CMP(guest.reg().toW(), W0); e.CSET(W0, Cond::HS); e.LSL(W0, W0, 12); e.ADD(W0, W0, guest.reg().toW()); @@ -59,53 +59,52 @@ XReg ComputeMemoryAddressOffset(A64Emitter& e, const T& guest, const T& offset, // TODO(benvanik): find a way to avoid doing this. e.MOV(W0, guest.reg().toW()); } - e.ADD(address_register, e.GetMembaseReg(), X0); + e.ADD(address_register.toX(), e.GetMembaseReg(), X0); e.MOV(X0, offset_const); - e.ADD(address_register, address_register, X0); - return address_register; + e.ADD(address_register.toX(), address_register.toX(), X0); + return address_register.toX(); } } // Note: most *should* be aligned, but needs to be checked! template XReg ComputeMemoryAddress(A64Emitter& e, const T& guest, - XReg address_register = X3) { + WReg address_register = W3) { if (guest.is_constant) { // TODO(benvanik): figure out how to do this without a temp. // Since the constant is often 0x8... if we tried to use that as a // displacement it would be sign extended and mess things up. uint32_t address = static_cast(guest.constant()); if (address < 0x80000000) { - e.ADD(address_register, e.GetMembaseReg(), address); - return address_register; + e.ADD(address_register.toX(), e.GetMembaseReg(), address); + return address_register.toX(); } else { if (address >= 0xE0000000 && xe::memory::allocation_granularity() > 0x1000) { - e.MOV(address_register, address + 0x1000); + e.MOV(W0, address + 0x1000u); } else { - e.MOV(address_register, address); + e.MOV(W0, address); } - e.ADD(address_register, e.GetMembaseReg(), address_register); - return address_register; + e.ADD(address_register.toX(), e.GetMembaseReg(), X0); + return address_register.toX(); } } else { if (xe::memory::allocation_granularity() > 0x1000) { // Emulate the 4 KB physical address offset in 0xE0000000+ when can't do // it via memory mapping. - e.MOV(address_register.toW(), 0xE0000000); - e.CMP(guest.reg().toW(), address_register.toW()); - e.CSET(X0, Cond::HS); - e.LSL(X0, X0, 12); - e.ADD(X0, X0, guest); - e.MOV(W0, W0); + e.MOV(W0, 0xE0000000); + e.CMP(guest.reg().toW(), W0); + e.CSET(W0, Cond::HS); + e.LSL(W0, W0, 12); + e.ADD(W0, W0, guest.reg().toW()); } else { // Clear the top 32 bits, as they are likely garbage. // TODO(benvanik): find a way to avoid doing this. e.MOV(W0, guest.reg().toW()); } - e.ADD(address_register, e.GetMembaseReg(), X0); - return address_register; + e.ADD(address_register.toX(), e.GetMembaseReg(), X0); + return address_register.toX(); // return e.GetMembaseReg() + e.rax; } } @@ -402,8 +401,7 @@ struct LOAD_CONTEXT_I64 struct LOAD_CONTEXT_F32 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { - e.ADD(X0, e.GetContextReg(), i.src1.value); - e.LD1(List{i.dest.reg().toQ().Selem()[0]}, X0); + e.LDR(i.dest, e.GetContextReg(), i.src1.value); if (IsTracingData()) { // e.lea(e.GetNativeParam(1), e.dword[addr]); // e.mov(e.GetNativeParam(0), i.src1.value); @@ -414,8 +412,7 @@ struct LOAD_CONTEXT_F32 struct LOAD_CONTEXT_F64 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { - e.ADD(X0, e.GetContextReg(), i.src1.value); - e.LD1(List{i.dest.reg().toQ().Delem()[0]}, X0); + e.LDR(i.dest, e.GetContextReg(), i.src1.value); // e.vmovsd(i.dest, e.qword[addr]); if (IsTracingData()) { // e.lea(e.GetNativeParam(1), e.qword[addr]); @@ -519,8 +516,7 @@ struct STORE_CONTEXT_F32 e.MOV(W0, i.src2.value->constant.i32); e.STR(W0, e.GetContextReg(), i.src1.value); } else { - e.ADD(X0, e.GetContextReg(), i.src1.value); - e.ST1(List{i.src2.reg().toQ().Selem()[0]}, X0); + e.STR(i.src2, e.GetContextReg(), i.src1.value); } if (IsTracingData()) { // e.lea(e.GetNativeParam(1), e.dword[addr]); @@ -533,18 +529,11 @@ struct STORE_CONTEXT_F64 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { - if (i.src2.is_constant) { - // e.MovMem64(addr, i.src2.value->constant.i64); - } else { - // e.vmovsd(e.qword[addr], i.src2); - } - if (i.src2.is_constant) { e.MOV(X0, i.src2.value->constant.i64); e.STR(X0, e.GetContextReg(), i.src1.value); } else { - e.ADD(X0, e.GetContextReg(), i.src1.value); - e.ST1(List{i.src2.reg().toQ().Delem()[0]}, X0); + e.STR(i.src2, e.GetContextReg(), i.src1.value); } if (IsTracingData()) { // e.lea(e.GetNativeParam(1), e.qword[addr]); @@ -659,7 +648,7 @@ struct LOAD_OFFSET_I32 auto addr_reg = ComputeMemoryAddressOffset(e, i.src1, i.src2); if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { e.LDR(i.dest, addr_reg); - e.REV(i.dest.reg().toX(), i.dest.reg().toX()); + e.REV(i.dest, i.dest); } else { e.LDR(i.dest, addr_reg); } @@ -821,8 +810,7 @@ struct LOAD_I64 : Sequence> { struct LOAD_F32 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { auto addr_reg = ComputeMemoryAddress(e, i.src1); - // e.vmovss(i.dest, e.dword[addr]); - e.LD1(List{i.dest.reg().toQ().Selem()[0]}, addr_reg); + e.LDR(i.dest, addr_reg); if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { assert_always("not implemented yet"); } @@ -836,8 +824,7 @@ struct LOAD_F32 : Sequence> { struct LOAD_F64 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { auto addr_reg = ComputeMemoryAddress(e, i.src1); - // e.vmovsd(i.dest, e.qword[addr]); - e.LD1(List{i.dest.reg().toQ().Delem()[0]}, addr_reg); + e.LDR(i.dest, addr_reg); if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { assert_always("not implemented yet"); } @@ -965,7 +952,7 @@ struct STORE_F32 : Sequence> { e.MOV(W0, i.src2.value->constant.i32); e.STR(W0, addr_reg); } else { - e.ST1(List{i.src2.reg().toQ().Selem()[0]}, addr_reg); + e.STR(i.src2, addr_reg); } } if (IsTracingData()) { @@ -987,7 +974,7 @@ struct STORE_F64 : Sequence> { e.MOV(X0, i.src2.value->constant.i64); e.STR(X0, addr_reg); } else { - e.ST1(List{i.src2.reg().toQ().Delem()[0]}, addr_reg); + e.STR(i.src2, addr_reg); } } if (IsTracingData()) {