diff --git a/src/xenia/cpu/backend/a64/a64_emitter.cc b/src/xenia/cpu/backend/a64/a64_emitter.cc index aba1fdd9a..925e8bb9f 100644 --- a/src/xenia/cpu/backend/a64/a64_emitter.cc +++ b/src/xenia/cpu/backend/a64/a64_emitter.cc @@ -58,8 +58,6 @@ using xe::cpu::hir::Instr; using namespace xe::literals; using namespace oaknut::util; -static const size_t kMaxCodeSize = 1_MiB; - static const size_t kStashOffset = 32; // static const size_t kStashOffsetHigh = 32 + 32; @@ -73,8 +71,7 @@ const uint8_t A64Emitter::fpr_reg_map_[A64Emitter::FPR_COUNT] = { }; A64Emitter::A64Emitter(A64Backend* backend) - : CodeBlock(kMaxCodeSize), - CodeGenerator(CodeBlock::ptr()), + : VectorCodeGenerator(assembly_buffer), processor_(backend->processor()), backend_(backend), code_cache_(backend->code_cache()) { @@ -138,23 +135,22 @@ bool A64Emitter::Emit(GuestFunction* function, HIRBuilder* builder, void* A64Emitter::Emplace(const EmitFunctionInfo& func_info, GuestFunction* function) { // Copy the current oaknut instruction-buffer into the code-cache - uint32_t* old_address = CodeBlock::ptr(); void* new_execute_address; void* new_write_address; assert_true(func_info.code_size.total == offset()); if (function) { - code_cache_->PlaceGuestCode(function->address(), CodeBlock::ptr(), + code_cache_->PlaceGuestCode(function->address(), assembly_buffer.data(), func_info, function, new_execute_address, new_write_address); } else { - code_cache_->PlaceHostCode(0, CodeBlock::ptr(), func_info, + code_cache_->PlaceHostCode(0, assembly_buffer.data(), func_info, new_execute_address, new_write_address); } // Reset the oaknut instruction-buffer - set_wptr(reinterpret_cast(old_address)); + assembly_buffer.clear(); label_lookup_.clear(); return new_execute_address; @@ -224,7 +220,8 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) { // Call count. MOV(W0, 1); - MOVP2R(X5, low_address(&trace_header->function_call_count)); + MOV(X5, reinterpret_cast( + low_address(&trace_header->function_call_count))); LDADDAL(X0, X0, X5); // Get call history slot. @@ -234,8 +231,8 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) { AND(W0, W0, 0b00000011); // Record call history value into slot (guest addr in W1). - MOV(X5, uint32_t( - uint64_t(low_address(&trace_header->function_caller_history)))); + MOV(X5, reinterpret_cast( + low_address(&trace_header->function_caller_history))); STR(W1, X5, X0, oaknut::IndexExt::LSL, 2); // Calling thread. Load X0 with thread ID. @@ -243,7 +240,8 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) { MOV(W5, 1); LSL(W0, W5, W0); - MOVP2R(X5, low_address(&trace_header->function_thread_use)); + MOV(X5, reinterpret_cast( + low_address(&trace_header->function_thread_use))); LDSET(W0, WZR, X5); } @@ -334,8 +332,9 @@ void A64Emitter::MarkSourceOffset(const Instr* i) { const uint32_t instruction_index = (entry->guest_address - trace_data_->start_address()) / 4; MOV(X0, 1); - MOVP2R(X1, low_address(trace_data_->instruction_execute_counts() + - instruction_index * 8)); + MOV(X1, reinterpret_cast( + low_address(trace_data_->instruction_execute_counts() + + instruction_index * 8))); LDADDAL(X0, ZR, X1); } } @@ -803,11 +802,9 @@ void A64Emitter::FreeConstData(uintptr_t data) { memory::DeallocationType::kRelease); } -std::byte* A64Emitter::GetVConstPtr() const { - return reinterpret_cast(backend_->emitter_data()); -} +uintptr_t A64Emitter::GetVConstPtr() const { return backend_->emitter_data(); } -std::byte* A64Emitter::GetVConstPtr(VConst id) const { +uintptr_t A64Emitter::GetVConstPtr(VConst id) const { // Load through fixed constant table setup by PlaceConstData. // It's important that the pointer is not signed, as it will be sign-extended. return GetVConstPtr() + GetVConstOffset(id); diff --git a/src/xenia/cpu/backend/a64/a64_emitter.h b/src/xenia/cpu/backend/a64/a64_emitter.h index 6c75e56ec..629c67a4b 100644 --- a/src/xenia/cpu/backend/a64/a64_emitter.h +++ b/src/xenia/cpu/backend/a64/a64_emitter.h @@ -122,7 +122,7 @@ enum A64EmitterFeatureFlags { kA64EmitF16C = 1 << 1, }; -class A64Emitter : public oaknut::CodeBlock, public oaknut::CodeGenerator { +class A64Emitter : public oaknut::VectorCodeGenerator { public: A64Emitter(A64Backend* backend); virtual ~A64Emitter(); @@ -203,8 +203,8 @@ class A64Emitter : public oaknut::CodeBlock, public oaknut::CodeGenerator { static bool ConstantFitsIn32Reg(uint64_t v); void MovMem64(const oaknut::XRegSp& addr, intptr_t offset, uint64_t v); - std::byte* GetVConstPtr() const; - std::byte* GetVConstPtr(VConst id) const; + uintptr_t GetVConstPtr() const; + uintptr_t GetVConstPtr(VConst id) const; static constexpr uintptr_t GetVConstOffset(VConst id) { return sizeof(vec128_t) * id; } @@ -239,6 +239,8 @@ class A64Emitter : public oaknut::CodeBlock, public oaknut::CodeGenerator { A64CodeCache* code_cache_ = nullptr; uint32_t feature_flags_ = 0; + std::vector assembly_buffer; + oaknut::Label* epilog_label_ = nullptr; // Convert from plain-text label-names into oaknut-labels diff --git a/src/xenia/cpu/backend/a64/a64_seq_vector.cc b/src/xenia/cpu/backend/a64/a64_seq_vector.cc index b19ea6b3e..a309fcc02 100644 --- a/src/xenia/cpu/backend/a64/a64_seq_vector.cc +++ b/src/xenia/cpu/backend/a64/a64_seq_vector.cc @@ -83,10 +83,10 @@ struct LOAD_VECTOR_SHL_I8 if (i.src1.is_constant) { auto sh = i.src1.constant(); assert_true(sh < xe::countof(lvsl_table)); - e.MOVP2R(X0, &lvsl_table[sh]); + e.MOV(X0, reinterpret_cast(&lvsl_table[sh])); e.LDR(i.dest, X0); } else { - e.MOVP2R(X0, lvsl_table); + e.MOV(X0, reinterpret_cast(lvsl_table)); e.AND(X1, i.src1.reg().toX(), 0xf); e.LDR(i.dest, X0, X1, IndexExt::LSL, 4); } @@ -121,10 +121,10 @@ struct LOAD_VECTOR_SHR_I8 if (i.src1.is_constant) { auto sh = i.src1.constant(); assert_true(sh < xe::countof(lvsr_table)); - e.MOVP2R(X0, &lvsr_table[sh]); + e.MOV(X0, reinterpret_cast(&lvsr_table[sh])); e.LDR(i.dest, X0); } else { - e.MOVP2R(X0, lvsr_table); + e.MOV(X0, reinterpret_cast(lvsr_table)); e.AND(X1, i.src1.reg().toX(), 0xf); e.LDR(i.dest, X0, X1, IndexExt::LSL, 4); } @@ -1007,7 +1007,7 @@ struct EXTRACT_I32 e.AND(X0, i.src2.reg().toX(), 0b11); e.LSL(X0, X0, 4); - e.MOVP2R(X1, extract_table_32); + e.MOV(X1, reinterpret_cast(extract_table_32)); e.LDR(Q0, X1, X0); // Byte-table lookup @@ -1335,7 +1335,7 @@ struct PACK : Sequence> { } const XReg VConstData = X3; - e.MOVP2R(VConstData, e.GetVConstPtr()); + e.MOV(VConstData, e.GetVConstPtr()); // Saturate to [3,3....] so that only values between 3...[00] and 3...[FF] // are valid - max before min to pack NaN as zero (5454082B is heavily @@ -1435,7 +1435,7 @@ struct PACK : Sequence> { e.LoadConstantV(src, i.src1.constant()); } const XReg VConstData = X3; - e.MOVP2R(VConstData, e.GetVConstPtr()); + e.MOV(VConstData, e.GetVConstPtr()); // Saturate e.LDR(Q1, VConstData, e.GetVConstOffset(VPackSHORT_Min)); @@ -1456,7 +1456,7 @@ struct PACK : Sequence> { e.LoadConstantV(src, i.src1.constant()); } const XReg VConstData = X3; - e.MOVP2R(VConstData, e.GetVConstPtr()); + e.MOV(VConstData, e.GetVConstPtr()); // Saturate e.LDR(Q1, VConstData, e.GetVConstOffset(VPackSHORT_Min)); @@ -1478,7 +1478,7 @@ struct PACK : Sequence> { e.LoadConstantV(src, i.src1.constant()); } const XReg VConstData = X3; - e.MOVP2R(VConstData, e.GetVConstPtr()); + e.MOV(VConstData, e.GetVConstPtr()); // Saturate. e.LDR(Q1, VConstData, e.GetVConstOffset(VPackUINT_2101010_MinUnpacked)); @@ -1519,7 +1519,7 @@ struct PACK : Sequence> { e.LoadConstantV(src, i.src1.constant()); } const XReg VConstData = X3; - e.MOVP2R(VConstData, e.GetVConstPtr()); + e.MOV(VConstData, e.GetVConstPtr()); // Saturate. e.LDR(Q1, VConstData, e.GetVConstOffset(VPackULONG_4202020_MinUnpacked)); @@ -1740,7 +1740,7 @@ struct UNPACK : Sequence> { static void EmitD3DCOLOR(A64Emitter& e, const EmitArgType& i) { // ARGB (WXYZ) -> RGBA (XYZW) const XReg VConstData = X3; - e.MOVP2R(VConstData, e.GetVConstPtr()); + e.MOV(VConstData, e.GetVConstPtr()); QReg src(0); @@ -1849,7 +1849,7 @@ struct UNPACK : Sequence> { // (VD.w) = 1.0 (games splat W after unpacking to get vectors of 1.0f) // src is (xx,xx,xx,VALUE) const XReg VConstData = X3; - e.MOVP2R(VConstData, e.GetVConstPtr()); + e.MOV(VConstData, e.GetVConstPtr()); QReg src(0); if (i.src1.is_constant) { @@ -1892,7 +1892,7 @@ struct UNPACK : Sequence> { // src is (xx,xx,VALUE,VALUE) const XReg VConstData = X3; - e.MOVP2R(VConstData, e.GetVConstPtr()); + e.MOV(VConstData, e.GetVConstPtr()); QReg src(0); if (i.src1.is_constant) { @@ -1928,7 +1928,7 @@ struct UNPACK : Sequence> { } static void EmitUINT_2101010(A64Emitter& e, const EmitArgType& i) { const XReg VConstData = X3; - e.MOVP2R(VConstData, e.GetVConstPtr()); + e.MOV(VConstData, e.GetVConstPtr()); QReg src(0); if (i.src1.is_constant) { @@ -1972,7 +1972,7 @@ struct UNPACK : Sequence> { } static void EmitULONG_4202020(A64Emitter& e, const EmitArgType& i) { const XReg VConstData = X3; - e.MOVP2R(VConstData, e.GetVConstPtr()); + e.MOV(VConstData, e.GetVConstPtr()); QReg src(0); if (i.src1.is_constant) { diff --git a/src/xenia/cpu/backend/a64/a64_sequences.cc b/src/xenia/cpu/backend/a64/a64_sequences.cc index db334602b..c88e201e5 100644 --- a/src/xenia/cpu/backend/a64/a64_sequences.cc +++ b/src/xenia/cpu/backend/a64/a64_sequences.cc @@ -2758,7 +2758,7 @@ struct SET_ROUNDING_MODE_I32 e.AND(W1, i.src1, 0b111); // Use the low 3 bits as an index into a LUT - e.MOVP2R(X0, fpcr_table); + e.MOV(X0, reinterpret_cast(fpcr_table)); e.LDRB(W0, X0, X1); // Replace FPCR bits with new value