diff --git a/src/xenia/cpu/backend/a64/a64_emitter.cc b/src/xenia/cpu/backend/a64/a64_emitter.cc index 0a83495d2..aba1fdd9a 100644 --- a/src/xenia/cpu/backend/a64/a64_emitter.cc +++ b/src/xenia/cpu/backend/a64/a64_emitter.cc @@ -137,21 +137,11 @@ bool A64Emitter::Emit(GuestFunction* function, HIRBuilder* builder, void* A64Emitter::Emplace(const EmitFunctionInfo& func_info, GuestFunction* function) { - // To avoid changing xbyak, we do a switcharoo here. - // top_ points to the Xbyak buffer, and since we are in AutoGrow mode - // it has pending relocations. We copy the top_ to our buffer, swap the - // pointer, relocate, then return the original scratch pointer for use. - // top_ is used by Xbyak's ready() as both write base pointer and the absolute - // address base, which would not work on platforms not supporting writable - // executable memory, but Xenia doesn't use absolute label addresses in the - // generated code. - - // uint8_t* old_address = top_; + // Copy the current oaknut instruction-buffer into the code-cache uint32_t* old_address = CodeBlock::ptr(); void* new_execute_address; void* new_write_address; - // assert_true(func_info.code_size.total == size_); assert_true(func_info.code_size.total == offset()); if (function) { @@ -162,15 +152,9 @@ void* A64Emitter::Emplace(const EmitFunctionInfo& func_info, code_cache_->PlaceHostCode(0, CodeBlock::ptr(), func_info, new_execute_address, new_write_address); } - // top_ = reinterpret_cast(new_write_address); - // set_wptr(reinterpret_cast(new_write_address)); - // ready(); - - // top_ = old_address; + // Reset the oaknut instruction-buffer set_wptr(reinterpret_cast(old_address)); - - // reset(); label_lookup_.clear(); return new_execute_address; @@ -357,7 +341,7 @@ void A64Emitter::MarkSourceOffset(const Instr* i) { } void A64Emitter::EmitGetCurrentThreadId() { - // rsi must point to context. We could fetch from the stack if needed. + // X27 must point to context. We could fetch from the stack if needed. LDRH(W0, GetContextReg(), offsetof(ppc::PPCContext, thread_id)); } @@ -442,14 +426,11 @@ void A64Emitter::Call(const hir::Instr* instr, GuestFunction* function) { // TODO(benvanik): is it worth it to do this? It removes the need for // a ResolveFunction call, but makes the table less useful. assert_zero(uint64_t(fn->machine_code()) & 0xFFFFFFFF00000000); - // mov(eax, uint32_t(uint64_t(fn->machine_code()))); MOV(X16, uint32_t(uint64_t(fn->machine_code()))); } else if (code_cache_->has_indirection_table()) { // Load the pointer to the indirection table maintained in A64CodeCache. // The target dword will either contain the address of the generated code // or a thunk to ResolveAddress. - // mov(ebx, function->address()); - // mov(eax, dword[ebx]); MOV(W17, function->address()); LDR(W16, X17); } else { @@ -476,10 +457,8 @@ void A64Emitter::Call(const hir::Instr* instr, GuestFunction* function) { BR(X16); } else { // Return address is from the previous SET_RETURN_ADDRESS. - // mov(rcx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]); LDR(X0, SP, StackLayout::GUEST_CALL_RET_ADDR); - // call(rax); BLR(X16); } } @@ -488,8 +467,6 @@ void A64Emitter::CallIndirect(const hir::Instr* instr, const oaknut::XReg& reg) { // Check if return. if (instr->flags & hir::CALL_POSSIBLE_RETURN) { - // cmp(reg.cvt32(), dword[rsp + StackLayout::GUEST_RET_ADDR]); - // je(epilog_label(), CodeGenerator::T_NEAR); LDR(W16, SP, StackLayout::GUEST_RET_ADDR); CMP(reg.toW(), W16); B(oaknut::Cond::EQ, epilog_label()); @@ -622,8 +599,6 @@ void A64Emitter::CallNativeSafe(void* fn) { } void A64Emitter::SetReturnAddress(uint64_t value) { - // mov(rax, value); - // mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], rax); MOV(X0, value); STR(X0, SP, StackLayout::GUEST_CALL_RET_ADDR); } diff --git a/src/xenia/cpu/backend/a64/a64_emitter.h b/src/xenia/cpu/backend/a64/a64_emitter.h index fef334dce..6c75e56ec 100644 --- a/src/xenia/cpu/backend/a64/a64_emitter.h +++ b/src/xenia/cpu/backend/a64/a64_emitter.h @@ -205,7 +205,7 @@ class A64Emitter : public oaknut::CodeBlock, public oaknut::CodeGenerator { std::byte* GetVConstPtr() const; std::byte* GetVConstPtr(VConst id) const; - static constexpr uintptr_t GetVConstOffset(VConst id){ + static constexpr uintptr_t GetVConstOffset(VConst id) { return sizeof(vec128_t) * id; } void LoadConstantV(oaknut::QReg dest, float v); diff --git a/src/xenia/cpu/backend/a64/a64_op.h b/src/xenia/cpu/backend/a64/a64_op.h index 2eaea627c..2b2f58932 100644 --- a/src/xenia/cpu/backend/a64/a64_op.h +++ b/src/xenia/cpu/backend/a64/a64_op.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2018 Xenia Developers. All rights reserved. * + * Copyright 2024 Xenia Developers. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ diff --git a/src/xenia/cpu/backend/a64/a64_seq_memory.cc b/src/xenia/cpu/backend/a64/a64_seq_memory.cc index be9baa4fc..8892227b2 100644 --- a/src/xenia/cpu/backend/a64/a64_seq_memory.cc +++ b/src/xenia/cpu/backend/a64/a64_seq_memory.cc @@ -290,7 +290,6 @@ struct LOAD_LOCAL_I8 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { e.LDRB(i.dest, SP, i.src1.constant()); - // e.mov(i.dest, e.byte[e.rsp + i.src1.constant()]); // e.TraceLoadI8(DATA_LOCAL, i.src1.constant, i.dest); } }; @@ -404,7 +403,6 @@ struct LOAD_CONTEXT_I8 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { e.LDRB(i.dest, e.GetContextReg(), i.src1.value); - // e.mov(i.dest, e.byte[addr]); if (IsTracingData()) { e.MOV(e.GetNativeParam(0), i.src1.value); e.LDRB(e.GetNativeParam(1).toW(), e.GetContextReg(), i.src1.value); @@ -1084,63 +1082,7 @@ struct CACHE_CONTROL } size_t cache_line_size = i.src2.value; - // RegExp addr; - // uint32_t address_constant; - // if (i.src1.is_constant) { - // // TODO(benvanik): figure out how to do this without a temp. - // // Since the constant is often 0x8... if we tried to use that as a - // // displacement it would be sign extended and mess things up. - // address_constant = static_cast(i.src1.constant()); - // if (address_constant < 0x80000000) { - // addr = e.GetMembaseReg() + address_constant; - // } else { - // if (address_constant >= 0xE0000000 && - // xe::memory::allocation_granularity() > 0x1000) { - // // e.mov(e.eax, address_constant + 0x1000); - // } else { - // // e.mov(e.eax, address_constant); - // } - // addr = e.GetMembaseReg() + e.rax; - // } - // } else { - // if (xe::memory::allocation_granularity() > 0x1000) { - // // Emulate the 4 KB physical address offset in 0xE0000000+ when can't - // do - // // it via memory mapping. - // // e.cmp(i.src1.reg().cvt32(), 0xE0000000); - // // e.setae(e.al); - // // e.movzx(e.eax, e.al); - // // e.shl(e.eax, 12); - // // e.add(e.eax, i.src1.reg().cvt32()); - // } else { - // // Clear the top 32 bits, as they are likely garbage. - // // TODO(benvanik): find a way to avoid doing this. - // // e.mov(e.eax, i.src1.reg().cvt32()); - // } - // addr = e.GetMembaseReg() + e.rax; - // } - // if (is_clflush) { - // // e.clflush(e.ptr[addr]); - // } - // if (is_prefetch) { - // // e.prefetcht0(e.ptr[addr]); - // } - - // if (cache_line_size >= 128) { - // // Prefetch the other 64 bytes of the 128-byte cache line. - // if (i.src1.is_constant && address_constant < 0x80000000) { - // addr = e.GetMembaseReg() + (address_constant ^ 64); - // } else { - // // e.xor_(e.eax, 64); - // } - // if (is_clflush) { - // // e.clflush(e.ptr[addr]); - // } - // if (is_prefetch) { - // // e.prefetcht0(e.ptr[addr]); - // } - // assert_true(cache_line_size == 128); - // } + // TODO(wunkolo): Arm64 cache-control } }; EMITTER_OPCODE_TABLE(OPCODE_CACHE_CONTROL, CACHE_CONTROL); @@ -1151,10 +1093,6 @@ EMITTER_OPCODE_TABLE(OPCODE_CACHE_CONTROL, CACHE_CONTROL); struct MEMORY_BARRIER : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { - // mfence on x64 flushes all writes before any later instructions - // e.mfence(); - - // This is equivalent to DMB SY e.DMB(BarrierOp::SY); } }; diff --git a/src/xenia/cpu/backend/a64/a64_sequences.cc b/src/xenia/cpu/backend/a64/a64_sequences.cc index c3c0741e8..db334602b 100644 --- a/src/xenia/cpu/backend/a64/a64_sequences.cc +++ b/src/xenia/cpu/backend/a64/a64_sequences.cc @@ -295,10 +295,8 @@ struct CONVERT_I32_F32 static void Emit(A64Emitter& e, const EmitArgType& i) { // TODO(benvanik): saturation check? cvtt* (trunc?) if (i.instr->flags == ROUND_TO_ZERO) { - // e.vcvttss2si(i.dest, i.src1); e.FCVTZS(i.dest, i.src1.reg().toS()); } else { - // e.vcvtss2si(i.dest, i.src1); e.FCVTNS(i.dest, i.src1.reg().toS()); } } @@ -307,13 +305,10 @@ struct CONVERT_I32_F64 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { // Intel returns 0x80000000 if the double value does not fit within an int32 - // ARM64 and PPC saturates the value instead. - // e.vminsd(e.xmm0, i.src1, e.GetVConstPtr(XMMIntMaxPD)); + // ARM64 and PPC saturates the value instead if (i.instr->flags == ROUND_TO_ZERO) { - // e.vcvttsd2si(i.dest, e.xmm0); e.FCVTZS(i.dest, i.src1.reg().toD()); } else { - // e.vcvtsd2si(i.dest, e.xmm0); e.FCVTNS(i.dest, i.src1.reg().toD()); } } @@ -322,10 +317,8 @@ struct CONVERT_I64_F64 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { if (i.instr->flags == ROUND_TO_ZERO) { - // e.vcvttsd2si(i.dest, i.src1); e.FCVTZS(i.dest, i.src1.reg().toD()); } else { - // e.vcvtsd2si(i.dest, i.src1); e.FCVTNS(i.dest, i.src1.reg().toD()); } } @@ -333,24 +326,18 @@ struct CONVERT_I64_F64 struct CONVERT_F32_I32 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { - // TODO(benvanik): saturation check? cvtt* (trunc?) - // e.vcvtsi2ss(i.dest, i.src1); e.SCVTF(i.dest.reg().toS(), i.src1); } }; struct CONVERT_F32_F64 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { - // TODO(benvanik): saturation check? cvtt* (trunc?) - // e.vcvtsd2ss(i.dest, i.src1); e.FCVT(i.dest.reg().toS(), i.src1.reg().toD()); } }; struct CONVERT_F64_I64 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { - // TODO(benvanik): saturation check? cvtt* (trunc?) - // e.vcvtsi2sd(i.dest, i.src1); e.SCVTF(i.dest.reg().toD(), i.src1); } }; @@ -372,19 +359,15 @@ struct ROUND_F32 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { switch (i.instr->flags) { case ROUND_TO_ZERO: - // e.vroundss(i.dest, i.src1, 0b00000011); e.FRINTZ(i.dest.reg().toS(), i.src1.reg().toS()); break; case ROUND_TO_NEAREST: - // e.vroundss(i.dest, i.src1, 0b00000000); e.FRINTN(i.dest.reg().toS(), i.src1.reg().toS()); break; case ROUND_TO_MINUS_INFINITY: - // e.vroundss(i.dest, i.src1, 0b00000001); e.FRINTM(i.dest.reg().toS(), i.src1.reg().toS()); break; case ROUND_TO_POSITIVE_INFINITY: - // e.vroundss(i.dest, i.src1, 0b00000010); e.FRINTP(i.dest.reg().toS(), i.src1.reg().toS()); break; } @@ -394,19 +377,15 @@ struct ROUND_F64 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { switch (i.instr->flags) { case ROUND_TO_ZERO: - // e.vroundsd(i.dest, i.src1, 0b00000011); e.FRINTZ(i.dest, i.src1); break; case ROUND_TO_NEAREST: - // e.vroundsd(i.dest, i.src1, 0b00000000); e.FRINTN(i.dest, i.src1); break; case ROUND_TO_MINUS_INFINITY: - // e.vroundsd(i.dest, i.src1, 0b00000001); e.FRINTM(i.dest, i.src1); break; case ROUND_TO_POSITIVE_INFINITY: - // e.vroundsd(i.dest, i.src1, 0b00000010); e.FRINTP(i.dest, i.src1); break; } @@ -416,19 +395,15 @@ struct ROUND_V128 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { switch (i.instr->flags) { case ROUND_TO_ZERO: - // e.vroundps(i.dest, i.src1, 0b00000011); e.FRINTZ(i.dest.reg().S4(), i.src1.reg().S4()); break; case ROUND_TO_NEAREST: - // e.vroundps(i.dest, i.src1, 0b00000000); e.FRINTN(i.dest.reg().S4(), i.src1.reg().S4()); break; case ROUND_TO_MINUS_INFINITY: - // e.vroundps(i.dest, i.src1, 0b00000001); e.FRINTM(i.dest.reg().S4(), i.src1.reg().S4()); break; case ROUND_TO_POSITIVE_INFINITY: - // e.vroundps(i.dest, i.src1, 0b00000010); e.FRINTP(i.dest.reg().S4(), i.src1.reg().S4()); break; } @@ -684,12 +659,12 @@ struct SELECT_F64 static void Emit(A64Emitter& e, const EmitArgType& i) { // dest = src1 != 0 ? src2 : src3 - DReg src2 = i.src2.is_constant ? D2 : i.src2; + const DReg src2 = i.src2.is_constant ? D2 : i.src2; if (i.src2.is_constant) { e.LoadConstantV(src2.toQ(), i.src2.constant()); } - DReg src3 = i.src3.is_constant ? D3 : i.src3; + const DReg src3 = i.src3.is_constant ? D3 : i.src3; if (i.src3.is_constant) { e.LoadConstantV(src3.toQ(), i.src3.constant()); } @@ -703,12 +678,12 @@ struct SELECT_V128_I8 static void Emit(A64Emitter& e, const EmitArgType& i) { // dest = src1 != 0 ? src2 : src3 - QReg src2 = i.src2.is_constant ? Q2 : i.src2; + const QReg src2 = i.src2.is_constant ? Q2 : i.src2; if (i.src2.is_constant) { e.LoadConstantV(src2, i.src2.constant()); } - QReg src3 = i.src3.is_constant ? Q3 : i.src3; + const QReg src3 = i.src3.is_constant ? Q3 : i.src3; if (i.src3.is_constant) { e.LoadConstantV(src3, i.src3.constant()); } @@ -730,12 +705,12 @@ struct SELECT_V128_V128 e.MOV(src1.B16(), i.src1.reg().B16()); } - const QReg src2 = i.src2.is_constant ? Q1 : i.src2; + const QReg src2 = i.src2.is_constant ? Q2 : i.src2; if (i.src2.is_constant) { e.LoadConstantV(src2, i.src2.constant()); } - const QReg src3 = i.src3.is_constant ? Q2 : i.src3; + const QReg src3 = i.src3.is_constant ? Q3 : i.src3; if (i.src3.is_constant) { e.LoadConstantV(src3, i.src3.constant()); } @@ -1123,24 +1098,20 @@ void EmitAddXX(A64Emitter& e, const ARGS& i) { SEQ::EmitCommutativeBinaryOp( e, i, [](A64Emitter& e, REG dest_src, REG src) { - // e.add(dest_src, src); e.ADD(dest_src, dest_src, src); }, [](A64Emitter& e, REG dest_src, int32_t constant) { - // e.add(dest_src, constant); e.MOV(REG(1), constant); e.ADD(dest_src, dest_src, REG(1)); }); } struct ADD_I8 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { - // EmitAddXX(e, i); EmitAddXX(e, i); } }; struct ADD_I16 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { - // EmitAddXX(e, i); EmitAddXX(e, i); } }; @@ -1158,7 +1129,6 @@ struct ADD_F32 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { EmitCommutativeBinaryVOp( e, i, [](A64Emitter& e, SReg dest, SReg src1, SReg src2) { - // e.vaddss(dest, src1, src2); e.FADD(dest, src1, src2); }); } @@ -1167,7 +1137,6 @@ struct ADD_F64 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { EmitCommutativeBinaryVOp( e, i, [](A64Emitter& e, DReg dest, DReg src1, DReg src2) { - // e.vaddsd(dest, src1, src2); e.FADD(dest, src1, src2); }); } @@ -1176,7 +1145,6 @@ struct ADD_V128 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { EmitCommutativeBinaryVOp( e, i, [](A64Emitter& e, QReg dest, QReg src1, QReg src2) { - // e.vaddps(dest, src1, src2); e.FADD(dest.S4(), src1.S4(), src2.S4()); }); } @@ -2030,7 +1998,7 @@ struct POW2_F32 : Sequence> { } static void Emit(A64Emitter& e, const EmitArgType& i) { assert_always(); - e.ADD(e.GetNativeParam(0), XSP, e.StashV(0, i.src1.reg().toQ())); + e.ADD(e.GetNativeParam(0), SP, e.StashV(0, i.src1.reg().toQ())); e.CallNativeSafe(reinterpret_cast(EmulatePow2)); e.FMOV(i.dest, S0); } @@ -2044,7 +2012,7 @@ struct POW2_F64 : Sequence> { } static void Emit(A64Emitter& e, const EmitArgType& i) { assert_always(); - e.ADD(e.GetNativeParam(0), XSP, e.StashV(0, i.src1.reg().toQ())); + e.ADD(e.GetNativeParam(0), SP, e.StashV(0, i.src1.reg().toQ())); e.CallNativeSafe(reinterpret_cast(EmulatePow2)); e.FMOV(i.dest, D0); } @@ -2059,7 +2027,7 @@ struct POW2_V128 : Sequence> { return vld1q_f32(values); } static void Emit(A64Emitter& e, const EmitArgType& i) { - e.ADD(e.GetNativeParam(0), XSP, e.StashV(0, i.src1.reg().toQ())); + e.ADD(e.GetNativeParam(0), SP, e.StashV(0, i.src1.reg().toQ())); e.CallNativeSafe(reinterpret_cast(EmulatePow2)); e.MOV(i.dest.reg().B16(), Q0.B16()); } @@ -2082,9 +2050,9 @@ struct LOG2_F32 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { assert_always(); if (i.src1.is_constant) { - e.ADD(e.GetNativeParam(0), XSP, e.StashConstantV(0, i.src1.constant())); + e.ADD(e.GetNativeParam(0), SP, e.StashConstantV(0, i.src1.constant())); } else { - e.ADD(e.GetNativeParam(0), XSP, e.StashV(0, i.src1.reg().toQ())); + e.ADD(e.GetNativeParam(0), SP, e.StashV(0, i.src1.reg().toQ())); } e.CallNativeSafe(reinterpret_cast(EmulateLog2)); e.FMOV(i.dest, S0); @@ -2100,9 +2068,9 @@ struct LOG2_F64 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { assert_always(); if (i.src1.is_constant) { - e.ADD(e.GetNativeParam(0), XSP, e.StashConstantV(0, i.src1.constant())); + e.ADD(e.GetNativeParam(0), SP, e.StashConstantV(0, i.src1.constant())); } else { - e.ADD(e.GetNativeParam(0), XSP, e.StashV(0, i.src1.reg().toQ())); + e.ADD(e.GetNativeParam(0), SP, e.StashV(0, i.src1.reg().toQ())); } e.CallNativeSafe(reinterpret_cast(EmulateLog2)); e.FMOV(i.dest, D0); @@ -2119,9 +2087,9 @@ struct LOG2_V128 : Sequence> { } static void Emit(A64Emitter& e, const EmitArgType& i) { if (i.src1.is_constant) { - e.ADD(e.GetNativeParam(0), XSP, e.StashConstantV(0, i.src1.constant())); + e.ADD(e.GetNativeParam(0), SP, e.StashConstantV(0, i.src1.constant())); } else { - e.ADD(e.GetNativeParam(0), XSP, e.StashV(0, i.src1.reg().toQ())); + e.ADD(e.GetNativeParam(0), SP, e.StashV(0, i.src1.reg().toQ())); } e.CallNativeSafe(reinterpret_cast(EmulateLog2)); e.MOV(i.dest.reg().B16(), Q0.B16()); @@ -2455,7 +2423,7 @@ struct SHL_V128 : Sequence> { } else { e.MOV(e.GetNativeParam(1), i.src2.reg().toX()); } - e.ADD(e.GetNativeParam(0), XSP, e.StashV(0, i.src1)); + e.ADD(e.GetNativeParam(0), SP, e.StashV(0, i.src1)); e.CallNativeSafe(reinterpret_cast(EmulateShlV128)); e.MOV(i.dest.reg().B16(), Q0.B16()); } @@ -2534,7 +2502,7 @@ struct SHR_V128 : Sequence> { } else { e.MOV(e.GetNativeParam(1), i.src2.reg().toX()); } - e.ADD(e.GetNativeParam(0), XSP, e.StashV(0, i.src1)); + e.ADD(e.GetNativeParam(0), SP, e.StashV(0, i.src1)); e.CallNativeSafe(reinterpret_cast(EmulateShrV128)); e.MOV(i.dest.reg().B16(), Q0.B16()); }