From e394840344cb7ab1212bac53bc6d8578507e5904 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 9 Nov 2024 13:25:55 +0100 Subject: [PATCH] sh4: pass sh4 context to dynarecs, sh4cycles and caches Reorder RuntimeBlockInfo members to save space --- core/hw/sh4/dyna/blockmanager.h | 12 ++--- core/hw/sh4/dyna/driver.cpp | 13 ++--- core/hw/sh4/dyna/ngen.h | 2 +- core/hw/sh4/interpr/sh4_interpreter.cpp | 3 ++ core/hw/sh4/sh4_cache.h | 16 +++++- core/hw/sh4/sh4_cycles.h | 17 +++--- core/rec-ARM/rec_arm.cpp | 31 ++++++----- core/rec-ARM64/rec_arm64.cpp | 70 ++++++++++++++----------- core/rec-x64/rec_x64.cpp | 44 ++++++++-------- core/rec-x64/xbyak_base.h | 6 ++- core/rec-x86/rec_x86.cpp | 58 ++++++++++---------- core/rec-x86/rec_x86.h | 4 +- core/rec-x86/x86_ops.cpp | 10 ++-- 13 files changed, 163 insertions(+), 123 deletions(-) diff --git a/core/hw/sh4/dyna/blockmanager.h b/core/hw/sh4/dyna/blockmanager.h index 333190489..255f42803 100644 --- a/core/hw/sh4/dyna/blockmanager.h +++ b/core/hw/sh4/dyna/blockmanager.h @@ -16,8 +16,8 @@ struct RuntimeBlockInfo bool Setup(u32 pc,fpscr_t fpu_cfg); u32 addr; - DynarecCodeEntryPtr code; u32 vaddr; + DynarecCodeEntryPtr code; u32 host_code_size; //in bytes u32 sh4_code_size; //in bytes @@ -27,8 +27,8 @@ struct RuntimeBlockInfo u32 guest_opcodes; u32 host_opcodes; // set by host code generator, optional bool has_fpu_op; - u32 blockcheck_failures; bool temp_block; + u32 blockcheck_failures; u32 BranchBlock; //if not 0xFFFFFFFF then jump target u32 NextBlock; //if not 0xFFFFFFFF then next block (by position) @@ -42,8 +42,11 @@ struct RuntimeBlockInfo BlockEndType BlockType; bool has_jcond; + bool read_only; std::vector oplist; + //predecessors references + std::vector pre_refs; bool containsCode(const void *ptr) { @@ -56,16 +59,11 @@ struct RuntimeBlockInfo return 0; } - //predecessors references - std::vector pre_refs; - void AddRef(const RuntimeBlockInfoPtr& other); void RemRef(const RuntimeBlockInfoPtr& other); void Discard(); void SetProtectedFlags(); - - bool read_only; }; void bm_WriteBlockMap(const std::string& file); diff --git a/core/hw/sh4/dyna/driver.cpp b/core/hw/sh4/dyna/driver.cpp index d3595897b..7981513ce 100644 --- a/core/hw/sh4/dyna/driver.cpp +++ b/core/hw/sh4/dyna/driver.cpp @@ -92,7 +92,7 @@ void Sh4Recompiler::clear_temp_cache(bool full) void Sh4Recompiler::ResetCache() { - INFO_LOG(DYNAREC, "recSh4:Dynarec Cache clear at %08X free space %d", Sh4cntx.pc, codeBuffer.getFreeSpace()); + INFO_LOG(DYNAREC, "recSh4:Dynarec Cache clear at %08X free space %d", getContext()->pc, codeBuffer.getFreeSpace()); codeBuffer.reset(false); bm_ResetCache(); smc_hotspots.clear(); @@ -103,12 +103,13 @@ void Sh4Recompiler::Run() { RestoreHostRoundingMode(); - u8 *sh4_dyna_rcb = (u8 *)&Sh4cntx + sizeof(Sh4cntx); - INFO_LOG(DYNAREC, "cntx // fpcb offset: %td // pc offset: %td // pc %08X", (u8*)&sh4rcb.fpcb - sh4_dyna_rcb, (u8*)&sh4rcb.cntx.pc - sh4_dyna_rcb, sh4rcb.cntx.pc); + u8 *sh4_dyna_rcb = (u8 *)getContext() + sizeof(Sh4cntx); + INFO_LOG(DYNAREC, "cntx // fpcb offset: %td // pc offset: %td // pc %08X", (u8*)&sh4rcb.fpcb - sh4_dyna_rcb, + (u8*)&getContext()->pc - sh4_dyna_rcb, getContext()->pc); sh4Dynarec->mainloop(sh4_dyna_rcb); - ctx->CpuRunning = false; + getContext()->CpuRunning = false; } void AnalyseBlock(RuntimeBlockInfo* blk); @@ -355,7 +356,7 @@ void Sh4Recompiler::Init() bm_Init(); if (addrspace::virtmemEnabled()) - verify(&mem_b[0] == ((u8*)p_sh4rcb->cntx.sq_buffer + 512 + 0x0C000000)); + verify(&mem_b[0] == ((u8*)getContext()->sq_buffer + sizeof(Sh4Context) + 0x0C000000)); // Call the platform-specific magic to make the pages RWX CodeCache = nullptr; @@ -369,7 +370,7 @@ void Sh4Recompiler::Init() verify(CodeCache != nullptr); TempCodeCache = CodeCache + CODE_SIZE; - sh4Dynarec->init(codeBuffer); + sh4Dynarec->init(*getContext(), codeBuffer); bm_ResetCache(); } diff --git a/core/hw/sh4/dyna/ngen.h b/core/hw/sh4/dyna/ngen.h index 88c8f1d25..110cf14ea 100644 --- a/core/hw/sh4/dyna/ngen.h +++ b/core/hw/sh4/dyna/ngen.h @@ -85,7 +85,7 @@ class Sh4Dynarec { public: // Initialize the dynarec, which should keep a reference to the passed code buffer to generate code later. - virtual void init(Sh4CodeBuffer& codeBuffer) = 0; + virtual void init(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) = 0; // Compile the given block. // If smc_checks is true, add self-modifying code detection. // If optimize is true, use fast memory accesses if possible, that will be rewritten if they fail. diff --git a/core/hw/sh4/interpr/sh4_interpreter.cpp b/core/hw/sh4/interpr/sh4_interpreter.cpp index 543e43c63..3e45e9d88 100644 --- a/core/hw/sh4/interpr/sh4_interpreter.cpp +++ b/core/hw/sh4/interpr/sh4_interpreter.cpp @@ -187,6 +187,9 @@ void Sh4Interpreter::Init() { ctx = &p_sh4rcb->cntx; memset(ctx, 0, sizeof(*ctx)); + sh4cycles.init(ctx); + icache.init(ctx); + ocache.init(ctx); } void Sh4Interpreter::Term() diff --git a/core/hw/sh4/sh4_cache.h b/core/hw/sh4/sh4_cache.h index f753de753..e99fe7f46 100644 --- a/core/hw/sh4/sh4_cache.h +++ b/core/hw/sh4/sh4_cache.h @@ -55,6 +55,11 @@ static bool translatedArea(u32 area) class Sh4ICache { public: + void init(Sh4Context *ctx) { + this->ctx = ctx; + sh4cycles.init(ctx); + } + u16 ReadMem(u32 address) { bool cacheOn = false; @@ -177,7 +182,7 @@ private: return MmuError::BADADDR; const u32 area = address >> 29; - const bool userMode = p_sh4rcb->cntx.sr.MD == 0; + const bool userMode = ctx->sr.MD == 0; if (userMode) { @@ -222,6 +227,7 @@ private: std::array lines; Sh4Cycles sh4cycles; + Sh4Context *ctx = nullptr; }; extern Sh4ICache icache; @@ -232,6 +238,11 @@ extern Sh4ICache icache; class Sh4OCache { public: + void init(Sh4Context *ctx) { + this->ctx = ctx; + sh4cycles.init(ctx); + } + template T ReadMem(u32 address) { @@ -515,7 +526,7 @@ private: return lookup; } const u32 area = address >> 29; - const bool userMode = p_sh4rcb->cntx.sr.MD == 0; + const bool userMode = ctx->sr.MD == 0; // kernel mem protected in user mode if (userMode && (address & 0x80000000)) @@ -591,6 +602,7 @@ private: u64 writeBackBufferCycles = 0; u64 writeThroughBufferCycles = 0; Sh4Cycles sh4cycles; + Sh4Context *ctx = nullptr; }; extern Sh4OCache ocache; diff --git a/core/hw/sh4/sh4_cycles.h b/core/hw/sh4/sh4_cycles.h index ef203b907..246a1bd4d 100644 --- a/core/hw/sh4/sh4_cycles.h +++ b/core/hw/sh4/sh4_cycles.h @@ -27,24 +27,28 @@ class Sh4Cycles public: Sh4Cycles(int cpuRatio = 1) : cpuRatio(cpuRatio) {} + void init(Sh4Context *ctx) { + this->ctx = ctx; + } + void executeCycles(u16 op) { - Sh4cntx.cycle_counter -= countCycles(op); + ctx->cycle_counter -= countCycles(op); } void addCycles(int cycles) const { - Sh4cntx.cycle_counter -= cycles; + ctx->cycle_counter -= cycles; } void addReadAccessCycles(u32 addr, u32 size) const { - Sh4cntx.cycle_counter -= readAccessCycles(addr, size); + ctx->cycle_counter -= readAccessCycles(addr, size); } void addWriteAccessCycles(u32 addr, u32 size) const { - Sh4cntx.cycle_counter -= writeAccessCycles(addr, size); + ctx->cycle_counter -= writeAccessCycles(addr, size); } int countCycles(u16 op); @@ -55,8 +59,8 @@ public: memOps = 0; } - static u64 now() { - return sh4_sched_now64() + SH4_TIMESLICE - Sh4cntx.cycle_counter; + u64 now() { + return sh4_sched_now64() + SH4_TIMESLICE - ctx->cycle_counter; } int readAccessCycles(u32 addr, u32 size) const { @@ -76,4 +80,5 @@ private: sh4_eu lastUnit = CO; const int cpuRatio; int memOps = 0; + Sh4Context *ctx = nullptr; }; diff --git a/core/rec-ARM/rec_arm.cpp b/core/rec-ARM/rec_arm.cpp index 2a65ec139..4c0b36a8a 100644 --- a/core/rec-ARM/rec_arm.cpp +++ b/core/rec-ARM/rec_arm.cpp @@ -94,10 +94,12 @@ extern "C" char *stpcpy(char *dst, char const *src) struct DynaRBI : RuntimeBlockInfo { - DynaRBI(Sh4CodeBuffer& codeBuffer) : codeBuffer(codeBuffer) {} + DynaRBI(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) + : sh4ctx(sh4ctx), codeBuffer(codeBuffer) {} u32 Relink() override; Register T_reg; + Sh4Context& sh4ctx; Sh4CodeBuffer& codeBuffer; }; @@ -157,8 +159,10 @@ class Arm32Assembler : public MacroAssembler using BinaryOP = void (MacroAssembler::*)(Register, Register, const Operand&); public: - Arm32Assembler(Sh4CodeBuffer& codeBuffer) : MacroAssembler((u8 *)codeBuffer.get(), codeBuffer.getFreeSpace(), A32), codeBuffer(codeBuffer), reg(*this) {} - Arm32Assembler(Sh4CodeBuffer& codeBuffer, u8 *buffer, size_t size) : MacroAssembler(buffer, size, A32), codeBuffer(codeBuffer), reg(*this) {} + Arm32Assembler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) + : MacroAssembler((u8 *)codeBuffer.get(), codeBuffer.getFreeSpace(), A32), sh4ctx(sh4ctx), codeBuffer(codeBuffer), reg(*this) {} + Arm32Assembler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer, u8 *buffer, size_t size) + : MacroAssembler(buffer, size, A32), sh4ctx(sh4ctx), codeBuffer(codeBuffer), reg(*this) {} void compile(RuntimeBlockInfo* block, bool force_checks, bool optimise); void rewrite(Register raddr, Register rt, SRegister ft, DRegister fd, bool write, bool is_sq, mem_op_type optp); @@ -367,6 +371,7 @@ private: void genMmuLookup(RuntimeBlockInfo* block, const shil_opcode& op, u32 write, Register& raddr); void compileOp(RuntimeBlockInfo* block, shil_opcode* op, bool optimise); + Sh4Context& sh4ctx; Sh4CodeBuffer& codeBuffer; arm_reg_alloc reg; struct CC_PS @@ -416,7 +421,7 @@ public: sh4Dynarec = this; } - void init(Sh4CodeBuffer& codeBuffer) override; + void init(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) override; void reset() override; RuntimeBlockInfo *allocateBlock() override; void handleException(host_context_t &context) override; @@ -435,7 +440,7 @@ public: } void compile(RuntimeBlockInfo* block, bool smc_check, bool optimise) override { - ass = new Arm32Assembler(*codeBuffer); + ass = new Arm32Assembler(*sh4ctx, *codeBuffer); ass->compile(block, smc_check, optimise); delete ass; ass = nullptr; @@ -457,6 +462,7 @@ public: private: void generate_mainloop(); + Sh4Context *sh4ctx = nullptr; Sh4CodeBuffer *codeBuffer = nullptr; bool restarting = false; Arm32Assembler *ass = nullptr; @@ -465,7 +471,7 @@ static Arm32Dynarec instance; u32 DynaRBI::Relink() { - Arm32Assembler ass(codeBuffer, (u8 *)code + relink_offset, host_code_size - relink_offset); + Arm32Assembler ass(sh4ctx, codeBuffer, (u8 *)code + relink_offset, host_code_size - relink_offset); u32 size = ass.relinkBlock(this); @@ -846,7 +852,7 @@ bool Arm32Dynarec::rewrite(host_context_t& context, void *faultAddress) // ignore last 2 bits zeroed to avoid sigbus errors verify(fault_offs == 0 || (fault_offs & ~3) == (sh4_addr & 0x1FFFFFFC)); - ass = new Arm32Assembler(*codeBuffer, (u8 *)ptr, 12); + ass = new Arm32Assembler(*sh4ctx, *codeBuffer, (u8 *)ptr, 12); ass->rewrite(raddr, rt, ft, fd, !read, is_sq, optp); delete ass; ass = nullptr; @@ -2251,10 +2257,10 @@ void Arm32Dynarec::reset() ::mainloop = nullptr; unwinder.clear(); - if (p_sh4rcb->cntx.CpuRunning) + if (sh4ctx->CpuRunning) { // Force the dynarec out of mainloop() to regenerate it - p_sh4rcb->cntx.CpuRunning = 0; + sh4ctx->CpuRunning = 0; restarting = true; } else @@ -2267,7 +2273,7 @@ void Arm32Dynarec::generate_mainloop() return; INFO_LOG(DYNAREC, "Generating main loop"); - Arm32Assembler ass(*codeBuffer); + Arm32Assembler ass(*sh4ctx, *codeBuffer); ass.genMainLoop(); } @@ -2541,7 +2547,7 @@ void Arm32Assembler::genMainLoop() INFO_LOG(DYNAREC, "readm helpers: up to %p", GetCursorAddress()); } -void Arm32Dynarec::init(Sh4CodeBuffer& codeBuffer) +void Arm32Dynarec::init(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) { INFO_LOG(DYNAREC, "Initializing the ARM32 dynarec"); @@ -2563,6 +2569,7 @@ void Arm32Dynarec::init(Sh4CodeBuffer& codeBuffer) ccmap[shop_setab] = hi; ccnmap[shop_setab] = ls; + this->sh4ctx = &sh4ctx; this->codeBuffer = &codeBuffer; } @@ -2574,6 +2581,6 @@ void Arm32Dynarec::handleException(host_context_t &context) RuntimeBlockInfo* Arm32Dynarec::allocateBlock() { generate_mainloop(); // FIXME why is this needed? - return new DynaRBI(*codeBuffer); + return new DynaRBI(*sh4ctx, *codeBuffer); }; #endif diff --git a/core/rec-ARM64/rec_arm64.cpp b/core/rec-ARM64/rec_arm64.cpp index 66d17c106..f9e89e801 100644 --- a/core/rec-ARM64/rec_arm64.cpp +++ b/core/rec-ARM64/rec_arm64.cpp @@ -49,10 +49,12 @@ using namespace vixl::aarch64; struct DynaRBI : RuntimeBlockInfo { - DynaRBI(Sh4CodeBuffer& codeBuffer) : codeBuffer(codeBuffer) {} + DynaRBI(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) + : sh4ctx(sh4ctx), codeBuffer(codeBuffer) {} u32 Relink() override; private: + Sh4Context& sh4ctx; Sh4CodeBuffer& codeBuffer; }; @@ -126,10 +128,11 @@ class Arm64Assembler : public MacroAssembler typedef void (MacroAssembler::*Arm64Fop_RRR)(const VRegister&, const VRegister&, const VRegister&); public: - Arm64Assembler(Sh4CodeBuffer& codeBuffer) : Arm64Assembler(codeBuffer, codeBuffer.get()) { - } + Arm64Assembler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) + : Arm64Assembler(sh4ctx, codeBuffer, codeBuffer.get()) { } - Arm64Assembler(Sh4CodeBuffer& codeBuffer, void *buffer) : MacroAssembler((u8 *)buffer, codeBuffer.getFreeSpace()), regalloc(this), codeBuffer(codeBuffer) + Arm64Assembler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer, void *buffer) + : MacroAssembler((u8 *)buffer, codeBuffer.getFreeSpace()), regalloc(this), sh4ctx(sh4ctx), codeBuffer(codeBuffer) { call_regs.push_back((const WRegister*)&w0); call_regs.push_back((const WRegister*)&w1); @@ -264,7 +267,7 @@ public: regalloc.DoAlloc(block); // scheduler - Ldr(w1, sh4_context_mem_operand(&Sh4cntx.cycle_counter)); + Ldr(w1, sh4_context_mem_operand(&sh4ctx.cycle_counter)); Cmp(w1, 0); Label cycles_remaining; B(&cycles_remaining, pl); @@ -274,7 +277,7 @@ public: Bind(&cycles_remaining); Sub(w1, w1, block->guest_cycles); - Str(w1, sh4_context_mem_operand(&Sh4cntx.cycle_counter)); + Str(w1, sh4_context_mem_operand(&sh4ctx.cycle_counter)); for (size_t i = 0; i < block->oplist.size(); i++) { @@ -287,7 +290,7 @@ public: if (op.rs1._imm) // if NeedPC() { Mov(w10, op.rs2._imm); - Str(w10, sh4_context_mem_operand(&Sh4cntx.pc)); + Str(w10, sh4_context_mem_operand(&sh4ctx.pc)); } Mov(x0, x28); @@ -1069,7 +1072,7 @@ public: MemOperand sh4_context_mem_operand(void *p) { - u32 offset = (u8*)p - (u8*)&p_sh4rcb->cntx; + u32 offset = (u8*)p - (u8*)&sh4ctx; verify((offset & 3) == 0 && offset <= 16380); // FIXME 64-bit regs need multiple of 8 up to 32760 return MemOperand(x28, offset); } @@ -1163,7 +1166,7 @@ public: #endif { Mov(w29, block->BranchBlock); - Str(w29, sh4_context_mem_operand(&Sh4cntx.pc)); + Str(w29, sh4_context_mem_operand(&sh4ctx.pc)); GenBranch(arm64_no_update); } } @@ -1177,9 +1180,9 @@ public: // next_pc = branch_pc_value; if (block->has_jcond) - Ldr(w11, sh4_context_mem_operand(&Sh4cntx.jdyn)); + Ldr(w11, sh4_context_mem_operand(&sh4ctx.jdyn)); else - Ldr(w11, sh4_context_mem_operand(&Sh4cntx.sr.T)); + Ldr(w11, sh4_context_mem_operand(&sh4ctx.sr.T)); Cmp(w11, block->BlockType & 1); @@ -1207,7 +1210,7 @@ public: #endif { Mov(w29, block->BranchBlock); - Str(w29, sh4_context_mem_operand(&Sh4cntx.pc)); + Str(w29, sh4_context_mem_operand(&sh4ctx.pc)); GenBranch(arm64_no_update); } } @@ -1235,7 +1238,7 @@ public: #endif { Mov(w29, block->NextBlock); - Str(w29, sh4_context_mem_operand(&Sh4cntx.pc)); + Str(w29, sh4_context_mem_operand(&sh4ctx.pc)); GenBranch(arm64_no_update); } } @@ -1247,7 +1250,7 @@ public: case BET_DynamicRet: // next_pc = *jdyn; - Str(w29, sh4_context_mem_operand(&Sh4cntx.pc)); + Str(w29, sh4_context_mem_operand(&sh4ctx.pc)); if (!mmu_enabled()) { // TODO Call no_update instead (and check CpuRunning less frequently?) @@ -1276,11 +1279,11 @@ public: Mov(w29, block->NextBlock); // else next_pc = *jdyn (already in w29) - Str(w29, sh4_context_mem_operand(&Sh4cntx.pc)); + Str(w29, sh4_context_mem_operand(&sh4ctx.pc)); GenCallRuntime(UpdateINTC); - Ldr(w29, sh4_context_mem_operand(&Sh4cntx.pc)); + Ldr(w29, sh4_context_mem_operand(&sh4ctx.pc)); GenBranch(arm64_no_update); break; @@ -1450,21 +1453,21 @@ public: Bind(&intc_sched); // w0 is pc, w1 is cycle_counter - Str(w0, sh4_context_mem_operand(&Sh4cntx.pc)); + Str(w0, sh4_context_mem_operand(&sh4ctx.pc)); // Add timeslice to cycle counter Add(w1, w1, SH4_TIMESLICE); - Str(w1, sh4_context_mem_operand(&Sh4cntx.cycle_counter)); - Ldr(w0, sh4_context_mem_operand(&Sh4cntx.CpuRunning)); + Str(w1, sh4_context_mem_operand(&sh4ctx.cycle_counter)); + Ldr(w0, sh4_context_mem_operand(&sh4ctx.CpuRunning)); Cbz(w0, &end_mainloop); Mov(x29, lr); // Save link register in case we return GenCallRuntime(UpdateSystem_INTC); Cbnz(w0, &do_interrupts); Mov(lr, x29); - Ldr(w0, sh4_context_mem_operand(&Sh4cntx.cycle_counter)); + Ldr(w0, sh4_context_mem_operand(&sh4ctx.cycle_counter)); Ret(); Bind(&do_interrupts); - Ldr(w29, sh4_context_mem_operand(&Sh4cntx.pc)); + Ldr(w29, sh4_context_mem_operand(&sh4ctx.pc)); B(&no_update); Bind(&end_mainloop); @@ -1499,12 +1502,12 @@ public: // w0: vaddr, w1: addr checkBlockFpu = GetCursorAddress(); Label fpu_enabled; - Ldr(w10, sh4_context_mem_operand(&Sh4cntx.sr.status)); + Ldr(w10, sh4_context_mem_operand(&sh4ctx.sr.status)); Tbz(w10, 15, &fpu_enabled); // test SR.FD bit Mov(w1, Sh4Ex_FpuDisabled); // exception code GenCallRuntime(Do_Exception); - Ldr(w29, sh4_context_mem_operand(&Sh4cntx.pc)); + Ldr(w29, sh4_context_mem_operand(&sh4ctx.pc)); B(&no_update); Bind(&fpu_enabled); // fallthrough @@ -1513,7 +1516,7 @@ public: // MMU Block check (no fpu) // w0: vaddr, w1: addr checkBlockNoFpu = GetCursorAddress(); - Ldr(w2, sh4_context_mem_operand(&Sh4cntx.pc)); + Ldr(w2, sh4_context_mem_operand(&sh4ctx.pc)); Cmp(w2, w0); Mov(w0, w1); B(&blockCheckFailLabel, ne); @@ -2174,6 +2177,7 @@ private: RuntimeBlockInfo* block = NULL; const int read_memory_rewrite_size = 5; // ubfx, add, ldr for fast access. calling a handler can use more than 3 depending on offset const int write_memory_rewrite_size = 5; // ubfx, add, str + Sh4Context& sh4ctx; Sh4CodeBuffer& codeBuffer; }; @@ -2184,9 +2188,10 @@ public: sh4Dynarec = this; } - void init(Sh4CodeBuffer& codeBuffer) override + void init(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) override { INFO_LOG(DYNAREC, "Initializing the ARM64 dynarec"); + this->sh4ctx = &sh4ctx; this->codeBuffer = &codeBuffer; } @@ -2195,10 +2200,10 @@ public: unwinder.clear(); ::mainloop = nullptr; - if (p_sh4rcb->cntx.CpuRunning) + if (sh4ctx->CpuRunning) { // Force the dynarec out of mainloop() to regenerate it - p_sh4rcb->cntx.CpuRunning = 0; + sh4ctx->CpuRunning = 0; restarting = true; } else @@ -2226,7 +2231,7 @@ public: { verify(codeBuffer->getFreeSpace() >= 16 * 1024); - compiler = new Arm64Assembler(*codeBuffer); + compiler = new Arm64Assembler(*sh4ctx, *codeBuffer); compiler->compileBlock(block, smc_checks, optimise); @@ -2257,7 +2262,7 @@ public: if (::mainloop != nullptr) return; jitWriteProtect(*codeBuffer, false); - compiler = new Arm64Assembler(*codeBuffer); + compiler = new Arm64Assembler(*sh4ctx, *codeBuffer); compiler->GenMainloop(); @@ -2269,7 +2274,7 @@ public: RuntimeBlockInfo* allocateBlock() override { generate_mainloop(); - return new DynaRBI(*codeBuffer); + return new DynaRBI(*sh4ctx, *codeBuffer); } void handleException(host_context_t &context) override @@ -2340,7 +2345,7 @@ public: // Skip the preceding ops (add, ubfx) u32 *code_rewrite = code_ptr - 2; - Arm64Assembler *assembler = new Arm64Assembler(*codeBuffer, code_rewrite); + Arm64Assembler *assembler = new Arm64Assembler(*sh4ctx, *codeBuffer, code_rewrite); if (is_read) assembler->GenReadMemorySlow(size); else if (!is_read && size >= 4 && (context.x0 >> 26) == 0x38) @@ -2358,6 +2363,7 @@ public: private: Arm64Assembler* compiler = nullptr; bool restarting = false; + Sh4Context *sh4ctx = nullptr; Sh4CodeBuffer *codeBuffer = nullptr; }; @@ -2368,7 +2374,7 @@ u32 DynaRBI::Relink() #ifndef NO_BLOCK_LINKING //printf("DynaRBI::Relink %08x\n", this->addr); jitWriteProtect(codeBuffer, false); - Arm64Assembler *compiler = new Arm64Assembler(codeBuffer, (u8 *)this->code + this->relink_offset); + Arm64Assembler *compiler = new Arm64Assembler(sh4ctx, codeBuffer, (u8 *)this->code + this->relink_offset); u32 code_size = compiler->RelinkBlock(this); compiler->Finalize(true); diff --git a/core/rec-x64/rec_x64.cpp b/core/rec-x64/rec_x64.cpp index 96ad71b80..d7b6ba8c4 100644 --- a/core/rec-x64/rec_x64.cpp +++ b/core/rec-x64/rec_x64.cpp @@ -121,8 +121,8 @@ public: using BaseCompiler = BaseXbyakRec; friend class BaseXbyakRec; - BlockCompiler(Sh4CodeBuffer& codeBuffer) : BaseCompiler(codeBuffer), regalloc(this) { } - BlockCompiler(Sh4CodeBuffer& codeBuffer, u8 *code_ptr) : BaseCompiler(codeBuffer, code_ptr), regalloc(this) { } + BlockCompiler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) : BaseCompiler(sh4ctx, codeBuffer), regalloc(this) { } + BlockCompiler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer, u8 *code_ptr) : BaseCompiler(sh4ctx, codeBuffer, code_ptr), regalloc(this) { } void compile(RuntimeBlockInfo* block, bool force_checks, bool optimise) { @@ -136,7 +136,7 @@ public: if (mmu_enabled() && block->has_fpu_op) { Xbyak::Label fpu_enabled; - mov(rax, (uintptr_t)&p_sh4rcb->cntx.sr.status); + mov(rax, (uintptr_t)&sh4ctx.sr.status); test(dword[rax], 0x8000); // test SR.FD bit jz(fpu_enabled); mov(call_regs[0], block->vaddr); // pc @@ -145,7 +145,7 @@ public: jmp(exit_block, T_NEAR); L(fpu_enabled); } - mov(rax, (uintptr_t)&p_sh4rcb->cntx.cycle_counter); + mov(rax, (uintptr_t)&sh4ctx.cycle_counter); sub(dword[rax], block->guest_cycles); regalloc.DoAlloc(block); @@ -167,12 +167,12 @@ public: if (op.rs1._imm) { - mov(rax, (size_t)&p_sh4rcb->cntx.pc); + mov(rax, (size_t)&sh4ctx.pc); mov(dword[rax], op.rs2._imm); } mov(call_regs[1], op.rs3._imm); - mov(call_regs64[0], (uintptr_t)&p_sh4rcb->cntx); + mov(call_regs64[0], (uintptr_t)&sh4ctx); if (!mmu_enabled()) GenCall(OpDesc[op.rs3._imm]->oph); @@ -379,7 +379,7 @@ public: } else { - mov(call_regs64[1], (uintptr_t)&p_sh4rcb->cntx); + mov(call_regs64[1], (uintptr_t)&sh4ctx); mov(rax, (size_t)&do_sqw_nommu); saveXmmRegisters(); call(qword[rax]); @@ -472,7 +472,7 @@ public: regalloc.Cleanup(); current_opid = -1; - mov(rax, (size_t)&p_sh4rcb->cntx.pc); + mov(rax, (size_t)&sh4ctx.pc); switch (block->BlockType) { @@ -492,9 +492,9 @@ public: mov(dword[rax], block->NextBlock); if (block->has_jcond) - mov(rdx, (size_t)&Sh4cntx.jdyn); + mov(rdx, (size_t)&sh4ctx.jdyn); else - mov(rdx, (size_t)&Sh4cntx.sr.T); + mov(rdx, (size_t)&sh4ctx.sr.T); cmp(dword[rdx], block->BlockType & 1); Xbyak::Label branch_not_taken; @@ -509,7 +509,7 @@ public: case BET_DynamicCall: case BET_DynamicRet: //next_pc = *jdyn; - mov(rdx, (size_t)&Sh4cntx.jdyn); + mov(rdx, (size_t)&sh4ctx.jdyn); mov(edx, dword[rdx]); mov(dword[rax], edx); break; @@ -518,7 +518,7 @@ public: case BET_StaticIntr: if (block->BlockType == BET_DynamicIntr) { //next_pc = *jdyn; - mov(rdx, (size_t)&Sh4cntx.jdyn); + mov(rdx, (size_t)&sh4ctx.jdyn); mov(edx, dword[rdx]); mov(dword[rax], edx); } @@ -684,7 +684,7 @@ public: Xbyak::Label run_loop; L(run_loop); Xbyak::Label end_run_loop; - mov(rax, (size_t)&p_sh4rcb->cntx.CpuRunning); + mov(rax, (size_t)&sh4ctx.CpuRunning); mov(edx, dword[rax]); test(edx, edx); @@ -693,11 +693,11 @@ public: //slice_loop: Xbyak::Label slice_loop; L(slice_loop); - mov(rax, (size_t)&p_sh4rcb->cntx.pc); + mov(rax, (size_t)&sh4ctx.pc); mov(call_regs[0], dword[rax]); call(bm_GetCodeByVAddr); call(rax); - mov(rax, (uintptr_t)&p_sh4rcb->cntx.cycle_counter); + mov(rax, (uintptr_t)&sh4ctx.cycle_counter); mov(ecx, dword[rax]); test(ecx, ecx); jg(slice_loop); @@ -1058,7 +1058,7 @@ private: // same at compile and run times. if (mmu_enabled()) { - mov(rax, (uintptr_t)&p_sh4rcb->cntx.pc); + mov(rax, (uintptr_t)&sh4ctx.pc); cmp(dword[rax], block->vaddr); jne(reinterpret_cast(&ngen_blockcheckfail)); } @@ -1161,7 +1161,7 @@ private: shr(r9d, 26); cmp(r9d, 0x38); jne(no_sqw); - mov(rax, (uintptr_t)p_sh4rcb->cntx.sq_buffer); + mov(rax, (uintptr_t)sh4ctx.sq_buffer); and_(call_regs[0], 0x3F); if (size == MemSize::S32) @@ -1318,7 +1318,7 @@ public: size_t protSize = codeBuffer->getFreeSpace(); virtmem::jit_set_exec(protStart, protSize, false); - ccCompiler = new BlockCompiler(*codeBuffer); + ccCompiler = new BlockCompiler(*sh4ctx, *codeBuffer); try { ccCompiler->compile(block, smc_checks, optimise); } catch (const Xbyak::Error& e) { @@ -1329,8 +1329,9 @@ public: virtmem::jit_set_exec(protStart, protSize, true); } - void init(Sh4CodeBuffer& codeBuffer) override + void init(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) override { + this->sh4ctx = &sh4ctx; this->codeBuffer = &codeBuffer; } @@ -1370,7 +1371,7 @@ public: virtmem::jit_set_exec(protStart, protSize, false); u8 *retAddr = *(u8 **)context.rsp - 5; - BlockCompiler compiler(*codeBuffer, retAddr); + BlockCompiler compiler(*sh4ctx, *codeBuffer, retAddr); bool rc = false; try { rc = compiler.rewriteMemAccess(context); @@ -1397,7 +1398,7 @@ public: size_t protSize = codeBuffer->getFreeSpace(); virtmem::jit_set_exec(protStart, protSize, false); - BlockCompiler compiler(*codeBuffer); + BlockCompiler compiler(*sh4ctx, *codeBuffer); try { compiler.genMainloop(); } catch (const Xbyak::Error& e) { @@ -1407,6 +1408,7 @@ public: } private: + Sh4Context *sh4ctx = nullptr; Sh4CodeBuffer *codeBuffer = nullptr; BlockCompiler *ccCompiler = nullptr; }; diff --git a/core/rec-x64/xbyak_base.h b/core/rec-x64/xbyak_base.h index 044795f74..1fd69bf4a 100644 --- a/core/rec-x64/xbyak_base.h +++ b/core/rec-x64/xbyak_base.h @@ -28,8 +28,9 @@ template class BaseXbyakRec : public Xbyak::CodeGenerator { protected: - BaseXbyakRec(Sh4CodeBuffer& codeBuffer) : BaseXbyakRec(codeBuffer, (u8 *)codeBuffer.get()) { } - BaseXbyakRec(Sh4CodeBuffer& codeBuffer, u8 *code_ptr) : Xbyak::CodeGenerator(codeBuffer.getFreeSpace(), code_ptr), codeBuffer(codeBuffer) { } + BaseXbyakRec(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) : BaseXbyakRec(sh4ctx, codeBuffer, (u8 *)codeBuffer.get()) { } + BaseXbyakRec(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer, u8 *code_ptr) + : Xbyak::CodeGenerator(codeBuffer.getFreeSpace(), code_ptr), sh4ctx(sh4ctx), codeBuffer(codeBuffer) { } using BinaryOp = void (BaseXbyakRec::*)(const Xbyak::Operand&, const Xbyak::Operand&); using BinaryFOp = void (BaseXbyakRec::*)(const Xbyak::Xmm&, const Xbyak::Operand&); @@ -773,6 +774,7 @@ protected: } } + Sh4Context& sh4ctx; Sh4CodeBuffer& codeBuffer; private: diff --git a/core/rec-x86/rec_x86.cpp b/core/rec-x86/rec_x86.cpp index ac353df0f..d029e8464 100644 --- a/core/rec-x86/rec_x86.cpp +++ b/core/rec-x86/rec_x86.cpp @@ -69,11 +69,13 @@ void X86RegAlloc::Writeback_FPU(u32 reg, s8 nreg) struct DynaRBI : RuntimeBlockInfo { - DynaRBI(Sh4CodeBuffer *codeBuffer) : codeBuffer(codeBuffer) {} + DynaRBI(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) + : sh4ctx(sh4ctx), codeBuffer(codeBuffer) {} u32 Relink() override; private: - Sh4CodeBuffer *codeBuffer; + Sh4Context& sh4ctx; + Sh4CodeBuffer& codeBuffer; }; @@ -108,26 +110,26 @@ void X86Compiler::compile(RuntimeBlockInfo* block, bool force_checks, bool optim if (mmu_enabled() && block->has_fpu_op) { Xbyak::Label fpu_enabled; - mov(eax, dword[&Sh4cntx.sr.status]); + mov(eax, dword[&sh4ctx.sr.status]); test(eax, 0x8000); // test SR.FD bit jz(fpu_enabled); push(Sh4Ex_FpuDisabled); // exception code push(block->vaddr); // pc call((void (*)())Do_Exception); add(esp, 8); - mov(ecx, dword[&Sh4cntx.pc]); + mov(ecx, dword[&sh4ctx.pc]); jmp((const void *)no_update); L(fpu_enabled); } - mov(eax, dword[&Sh4cntx.cycle_counter]); + mov(eax, dword[&sh4ctx.cycle_counter]); test(eax, eax); Xbyak::Label no_up; jg(no_up); mov(ecx, block->vaddr); call((const void *)intc_sched); L(no_up); - sub(dword[&Sh4cntx.cycle_counter], block->guest_cycles); + sub(dword[&sh4ctx.cycle_counter], block->guest_cycles); regalloc.doAlloc(block); @@ -297,16 +299,16 @@ u32 X86Compiler::relinkBlock(RuntimeBlockInfo* block) case BET_DynamicIntr: if (block->BlockType == BET_StaticIntr) { - mov(dword[&Sh4cntx.pc], block->NextBlock); + mov(dword[&sh4ctx.pc], block->NextBlock); } else { mov(eax, dword[GetRegPtr(reg_pc_dyn)]); - mov(dword[&Sh4cntx.pc], eax); + mov(dword[&sh4ctx.pc], eax); } call(UpdateINTC); - mov(ecx, dword[&Sh4cntx.pc]); + mov(ecx, dword[&sh4ctx.pc]); jmp((const void *)no_update); break; @@ -323,7 +325,7 @@ u32 X86Compiler::relinkBlock(RuntimeBlockInfo* block) u32 DynaRBI::Relink() { - X86Compiler *compiler = new X86Compiler(*codeBuffer, (u8*)code + relink_offset); + X86Compiler *compiler = new X86Compiler(sh4ctx, codeBuffer, (u8*)code + relink_offset); u32 codeSize = compiler->relinkBlock(this); delete compiler; @@ -439,14 +441,14 @@ void X86Compiler::genMainloop() Xbyak::Label longjmpLabel; L(longjmpLabel); - mov(ecx, dword[&Sh4cntx.pc]); + mov(ecx, dword[&sh4ctx.pc]); //next_pc _MUST_ be on ecx Xbyak::Label cleanup; //no_update: Xbyak::Label no_updateLabel; L(no_updateLabel); - mov(edx, dword[&Sh4cntx.CpuRunning]); + mov(edx, dword[&sh4ctx.CpuRunning]); cmp(edx, 0); jz(cleanup); if (!mmu_enabled()) @@ -458,14 +460,14 @@ void X86Compiler::genMainloop() } else { - mov(dword[&Sh4cntx.pc], ecx); + mov(dword[&sh4ctx.pc], ecx); call((void *)bm_GetCodeByVAddr); jmp(eax); } //cleanup: L(cleanup); - mov(dword[&Sh4cntx.pc], ecx); + mov(dword[&sh4ctx.pc], ecx); #ifndef _WIN32 // 16-byte alignment add(esp, 12); @@ -481,7 +483,7 @@ void X86Compiler::genMainloop() Xbyak::Label do_iter; L(do_iter); add(esp, 4); // pop intc_sched() return address - mov(ecx, dword[&Sh4cntx.pc]); + mov(ecx, dword[sh4ctx.pc]); jmp(no_updateLabel); //ngen_LinkBlock_Shared_stub: @@ -503,8 +505,8 @@ void X86Compiler::genMainloop() unwinder.endProlog(0); Xbyak::Label intc_schedLabel; L(intc_schedLabel); - add(dword[&Sh4cntx.cycle_counter], SH4_TIMESLICE); - mov(dword[&Sh4cntx.pc], ecx); + add(dword[&sh4ctx.cycle_counter], SH4_TIMESLICE); + mov(dword[&sh4ctx.pc], ecx); call((void *)UpdateSystem_INTC); cmp(eax, 0); jnz(do_iter); @@ -525,7 +527,7 @@ void X86Compiler::genMainloop() //ngen_LinkBlock_Generic_stub: Xbyak::Label ngen_LinkBlock_Generic_label; L(ngen_LinkBlock_Generic_label); - mov(edx, dword[&Sh4cntx.jdyn]); + mov(edx, dword[&sh4ctx.jdyn]); jmp(ngen_LinkBlock_Shared_stub); genMemHandlers(); @@ -568,7 +570,7 @@ void X86Compiler::genMainloop() Xbyak::Label jumpblockLabel; cmp(eax, 0); jne(jumpblockLabel); - mov(ecx, dword[&Sh4cntx.pc]); + mov(ecx, dword[&sh4ctx.pc]); jmp(no_updateLabel); L(jumpblockLabel); } @@ -809,7 +811,7 @@ void X86Compiler::checkBlock(bool smc_checks, RuntimeBlockInfo* block) if (mmu_enabled()) { - mov(eax, dword[&Sh4cntx.pc]); + mov(eax, dword[&sh4ctx.pc]); cmp(eax, block->vaddr); jne(reinterpret_cast(ngen_blockcheckfail)); } @@ -842,8 +844,9 @@ public: sh4Dynarec = this; } - void init(Sh4CodeBuffer& codeBuffer) override + void init(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) override { + this->sh4ctx = &sh4ctx; this->codeBuffer = &codeBuffer; } @@ -857,7 +860,7 @@ public: if (::mainloop != nullptr) return; - compiler = new X86Compiler(*codeBuffer); + compiler = new X86Compiler(*sh4ctx, *codeBuffer); try { compiler->genMainloop(); @@ -876,10 +879,10 @@ public: ::mainloop = nullptr; unwinder.clear(); - if (p_sh4rcb->cntx.CpuRunning) + if (sh4ctx->CpuRunning) { // Force the dynarec out of mainloop() to regenerate it - p_sh4rcb->cntx.CpuRunning = 0; + sh4ctx->CpuRunning = 0; restarting = true; } else @@ -889,7 +892,7 @@ public: RuntimeBlockInfo* allocateBlock() override { generate_mainloop(); - return new DynaRBI(codeBuffer); + return new DynaRBI(*sh4ctx, *codeBuffer); } void mainloop(void* v_cntx) override @@ -911,7 +914,7 @@ public: void compile(RuntimeBlockInfo* block, bool smc_checks, bool optimise) override { - compiler = new X86Compiler(*codeBuffer); + compiler = new X86Compiler(*sh4ctx, *codeBuffer); try { compiler->compile(block, smc_checks, optimise); @@ -928,7 +931,7 @@ public: // init() not called yet return false; u8 *rewriteAddr = *(u8 **)context.esp - 5; - X86Compiler *compiler = new X86Compiler(*codeBuffer, rewriteAddr); + X86Compiler *compiler = new X86Compiler(*sh4ctx, *codeBuffer, rewriteAddr); bool rv = compiler->rewriteMemAccess(context); delete compiler; @@ -956,6 +959,7 @@ public: } private: + Sh4Context *sh4ctx = nullptr; Sh4CodeBuffer *codeBuffer = nullptr; X86Compiler *compiler = nullptr; bool restarting = false; diff --git a/core/rec-x86/rec_x86.h b/core/rec-x86/rec_x86.h index 8eecf77fc..ff9133f99 100644 --- a/core/rec-x86/rec_x86.h +++ b/core/rec-x86/rec_x86.h @@ -31,8 +31,8 @@ class X86Compiler : public BaseXbyakRec public: using BaseCompiler = BaseXbyakRec; - X86Compiler(Sh4CodeBuffer& codeBuffer) : BaseCompiler(codeBuffer), regalloc(this) { } - X86Compiler(Sh4CodeBuffer& codeBuffer, u8 *code_ptr) : BaseCompiler(codeBuffer, code_ptr), regalloc(this) { } + X86Compiler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) : BaseCompiler(sh4ctx, codeBuffer), regalloc(this) { } + X86Compiler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer, u8 *code_ptr) : BaseCompiler(sh4ctx, codeBuffer, code_ptr), regalloc(this) { } void compile(RuntimeBlockInfo* block, bool force_checks, bool optimise); diff --git a/core/rec-x86/x86_ops.cpp b/core/rec-x86/x86_ops.cpp index f423c39fb..c355965a9 100644 --- a/core/rec-x86/x86_ops.cpp +++ b/core/rec-x86/x86_ops.cpp @@ -139,12 +139,12 @@ void X86Compiler::genMemHandlers() and_(ecx, 0x3F); if (size == MemSize::S32) - mov(dword[(size_t)p_sh4rcb->cntx.sq_buffer + ecx], edx); + mov(dword[(size_t)sh4ctx.sq_buffer + ecx], edx); else if (size >= MemSize::F32) { - movss(dword[(size_t)p_sh4rcb->cntx.sq_buffer + ecx], xmm0); + movss(dword[(size_t)sh4ctx.sq_buffer + ecx], xmm0); if (size == MemSize::F64) - movss(dword[((size_t)p_sh4rcb->cntx.sq_buffer + 4) + ecx], xmm1); + movss(dword[((size_t)sh4ctx.sq_buffer + 4) + ecx], xmm1); } ret(); L(no_sqw); @@ -327,8 +327,8 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode& push(block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc } if (op.rs1.is_imm() && op.rs1.imm_value()) - mov(dword[&Sh4cntx.pc], op.rs2.imm_value()); - mov(ecx, (uintptr_t)&Sh4cntx); + mov(dword[&sh4ctx.pc], op.rs2.imm_value()); + mov(ecx, (uintptr_t)&sh4ctx); mov(edx, op.rs3.imm_value()); if (!mmu_enabled()) genCall(OpDesc[op.rs3.imm_value()]->oph);