sh4: pass sh4 context to dynarecs, sh4cycles and caches

Reorder RuntimeBlockInfo members to save space
This commit is contained in:
Flyinghead 2024-11-09 13:25:55 +01:00
parent 1cadeb276e
commit e394840344
13 changed files with 163 additions and 123 deletions

View File

@ -16,8 +16,8 @@ struct RuntimeBlockInfo
bool Setup(u32 pc,fpscr_t fpu_cfg);
u32 addr;
DynarecCodeEntryPtr code;
u32 vaddr;
DynarecCodeEntryPtr code;
u32 host_code_size; //in bytes
u32 sh4_code_size; //in bytes
@ -27,8 +27,8 @@ struct RuntimeBlockInfo
u32 guest_opcodes;
u32 host_opcodes; // set by host code generator, optional
bool has_fpu_op;
u32 blockcheck_failures;
bool temp_block;
u32 blockcheck_failures;
u32 BranchBlock; //if not 0xFFFFFFFF then jump target
u32 NextBlock; //if not 0xFFFFFFFF then next block (by position)
@ -42,8 +42,11 @@ struct RuntimeBlockInfo
BlockEndType BlockType;
bool has_jcond;
bool read_only;
std::vector<shil_opcode> oplist;
//predecessors references
std::vector<RuntimeBlockInfoPtr> pre_refs;
bool containsCode(const void *ptr)
{
@ -56,16 +59,11 @@ struct RuntimeBlockInfo
return 0;
}
//predecessors references
std::vector<RuntimeBlockInfoPtr> pre_refs;
void AddRef(const RuntimeBlockInfoPtr& other);
void RemRef(const RuntimeBlockInfoPtr& other);
void Discard();
void SetProtectedFlags();
bool read_only;
};
void bm_WriteBlockMap(const std::string& file);

View File

@ -92,7 +92,7 @@ void Sh4Recompiler::clear_temp_cache(bool full)
void Sh4Recompiler::ResetCache()
{
INFO_LOG(DYNAREC, "recSh4:Dynarec Cache clear at %08X free space %d", Sh4cntx.pc, codeBuffer.getFreeSpace());
INFO_LOG(DYNAREC, "recSh4:Dynarec Cache clear at %08X free space %d", getContext()->pc, codeBuffer.getFreeSpace());
codeBuffer.reset(false);
bm_ResetCache();
smc_hotspots.clear();
@ -103,12 +103,13 @@ void Sh4Recompiler::Run()
{
RestoreHostRoundingMode();
u8 *sh4_dyna_rcb = (u8 *)&Sh4cntx + sizeof(Sh4cntx);
INFO_LOG(DYNAREC, "cntx // fpcb offset: %td // pc offset: %td // pc %08X", (u8*)&sh4rcb.fpcb - sh4_dyna_rcb, (u8*)&sh4rcb.cntx.pc - sh4_dyna_rcb, sh4rcb.cntx.pc);
u8 *sh4_dyna_rcb = (u8 *)getContext() + sizeof(Sh4cntx);
INFO_LOG(DYNAREC, "cntx // fpcb offset: %td // pc offset: %td // pc %08X", (u8*)&sh4rcb.fpcb - sh4_dyna_rcb,
(u8*)&getContext()->pc - sh4_dyna_rcb, getContext()->pc);
sh4Dynarec->mainloop(sh4_dyna_rcb);
ctx->CpuRunning = false;
getContext()->CpuRunning = false;
}
void AnalyseBlock(RuntimeBlockInfo* blk);
@ -355,7 +356,7 @@ void Sh4Recompiler::Init()
bm_Init();
if (addrspace::virtmemEnabled())
verify(&mem_b[0] == ((u8*)p_sh4rcb->cntx.sq_buffer + 512 + 0x0C000000));
verify(&mem_b[0] == ((u8*)getContext()->sq_buffer + sizeof(Sh4Context) + 0x0C000000));
// Call the platform-specific magic to make the pages RWX
CodeCache = nullptr;
@ -369,7 +370,7 @@ void Sh4Recompiler::Init()
verify(CodeCache != nullptr);
TempCodeCache = CodeCache + CODE_SIZE;
sh4Dynarec->init(codeBuffer);
sh4Dynarec->init(*getContext(), codeBuffer);
bm_ResetCache();
}

View File

@ -85,7 +85,7 @@ class Sh4Dynarec
{
public:
// Initialize the dynarec, which should keep a reference to the passed code buffer to generate code later.
virtual void init(Sh4CodeBuffer& codeBuffer) = 0;
virtual void init(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) = 0;
// Compile the given block.
// If smc_checks is true, add self-modifying code detection.
// If optimize is true, use fast memory accesses if possible, that will be rewritten if they fail.

View File

@ -187,6 +187,9 @@ void Sh4Interpreter::Init()
{
ctx = &p_sh4rcb->cntx;
memset(ctx, 0, sizeof(*ctx));
sh4cycles.init(ctx);
icache.init(ctx);
ocache.init(ctx);
}
void Sh4Interpreter::Term()

View File

@ -55,6 +55,11 @@ static bool translatedArea(u32 area)
class Sh4ICache
{
public:
void init(Sh4Context *ctx) {
this->ctx = ctx;
sh4cycles.init(ctx);
}
u16 ReadMem(u32 address)
{
bool cacheOn = false;
@ -177,7 +182,7 @@ private:
return MmuError::BADADDR;
const u32 area = address >> 29;
const bool userMode = p_sh4rcb->cntx.sr.MD == 0;
const bool userMode = ctx->sr.MD == 0;
if (userMode)
{
@ -222,6 +227,7 @@ private:
std::array<cache_line, 256> lines;
Sh4Cycles sh4cycles;
Sh4Context *ctx = nullptr;
};
extern Sh4ICache icache;
@ -232,6 +238,11 @@ extern Sh4ICache icache;
class Sh4OCache
{
public:
void init(Sh4Context *ctx) {
this->ctx = ctx;
sh4cycles.init(ctx);
}
template<class T>
T ReadMem(u32 address)
{
@ -515,7 +526,7 @@ private:
return lookup;
}
const u32 area = address >> 29;
const bool userMode = p_sh4rcb->cntx.sr.MD == 0;
const bool userMode = ctx->sr.MD == 0;
// kernel mem protected in user mode
if (userMode && (address & 0x80000000))
@ -591,6 +602,7 @@ private:
u64 writeBackBufferCycles = 0;
u64 writeThroughBufferCycles = 0;
Sh4Cycles sh4cycles;
Sh4Context *ctx = nullptr;
};
extern Sh4OCache ocache;

View File

@ -27,24 +27,28 @@ class Sh4Cycles
public:
Sh4Cycles(int cpuRatio = 1) : cpuRatio(cpuRatio) {}
void init(Sh4Context *ctx) {
this->ctx = ctx;
}
void executeCycles(u16 op)
{
Sh4cntx.cycle_counter -= countCycles(op);
ctx->cycle_counter -= countCycles(op);
}
void addCycles(int cycles) const
{
Sh4cntx.cycle_counter -= cycles;
ctx->cycle_counter -= cycles;
}
void addReadAccessCycles(u32 addr, u32 size) const
{
Sh4cntx.cycle_counter -= readAccessCycles(addr, size);
ctx->cycle_counter -= readAccessCycles(addr, size);
}
void addWriteAccessCycles(u32 addr, u32 size) const
{
Sh4cntx.cycle_counter -= writeAccessCycles(addr, size);
ctx->cycle_counter -= writeAccessCycles(addr, size);
}
int countCycles(u16 op);
@ -55,8 +59,8 @@ public:
memOps = 0;
}
static u64 now() {
return sh4_sched_now64() + SH4_TIMESLICE - Sh4cntx.cycle_counter;
u64 now() {
return sh4_sched_now64() + SH4_TIMESLICE - ctx->cycle_counter;
}
int readAccessCycles(u32 addr, u32 size) const {
@ -76,4 +80,5 @@ private:
sh4_eu lastUnit = CO;
const int cpuRatio;
int memOps = 0;
Sh4Context *ctx = nullptr;
};

View File

@ -94,10 +94,12 @@ extern "C" char *stpcpy(char *dst, char const *src)
struct DynaRBI : RuntimeBlockInfo
{
DynaRBI(Sh4CodeBuffer& codeBuffer) : codeBuffer(codeBuffer) {}
DynaRBI(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer)
: sh4ctx(sh4ctx), codeBuffer(codeBuffer) {}
u32 Relink() override;
Register T_reg;
Sh4Context& sh4ctx;
Sh4CodeBuffer& codeBuffer;
};
@ -157,8 +159,10 @@ class Arm32Assembler : public MacroAssembler
using BinaryOP = void (MacroAssembler::*)(Register, Register, const Operand&);
public:
Arm32Assembler(Sh4CodeBuffer& codeBuffer) : MacroAssembler((u8 *)codeBuffer.get(), codeBuffer.getFreeSpace(), A32), codeBuffer(codeBuffer), reg(*this) {}
Arm32Assembler(Sh4CodeBuffer& codeBuffer, u8 *buffer, size_t size) : MacroAssembler(buffer, size, A32), codeBuffer(codeBuffer), reg(*this) {}
Arm32Assembler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer)
: MacroAssembler((u8 *)codeBuffer.get(), codeBuffer.getFreeSpace(), A32), sh4ctx(sh4ctx), codeBuffer(codeBuffer), reg(*this) {}
Arm32Assembler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer, u8 *buffer, size_t size)
: MacroAssembler(buffer, size, A32), sh4ctx(sh4ctx), codeBuffer(codeBuffer), reg(*this) {}
void compile(RuntimeBlockInfo* block, bool force_checks, bool optimise);
void rewrite(Register raddr, Register rt, SRegister ft, DRegister fd, bool write, bool is_sq, mem_op_type optp);
@ -367,6 +371,7 @@ private:
void genMmuLookup(RuntimeBlockInfo* block, const shil_opcode& op, u32 write, Register& raddr);
void compileOp(RuntimeBlockInfo* block, shil_opcode* op, bool optimise);
Sh4Context& sh4ctx;
Sh4CodeBuffer& codeBuffer;
arm_reg_alloc reg;
struct CC_PS
@ -416,7 +421,7 @@ public:
sh4Dynarec = this;
}
void init(Sh4CodeBuffer& codeBuffer) override;
void init(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) override;
void reset() override;
RuntimeBlockInfo *allocateBlock() override;
void handleException(host_context_t &context) override;
@ -435,7 +440,7 @@ public:
}
void compile(RuntimeBlockInfo* block, bool smc_check, bool optimise) override {
ass = new Arm32Assembler(*codeBuffer);
ass = new Arm32Assembler(*sh4ctx, *codeBuffer);
ass->compile(block, smc_check, optimise);
delete ass;
ass = nullptr;
@ -457,6 +462,7 @@ public:
private:
void generate_mainloop();
Sh4Context *sh4ctx = nullptr;
Sh4CodeBuffer *codeBuffer = nullptr;
bool restarting = false;
Arm32Assembler *ass = nullptr;
@ -465,7 +471,7 @@ static Arm32Dynarec instance;
u32 DynaRBI::Relink()
{
Arm32Assembler ass(codeBuffer, (u8 *)code + relink_offset, host_code_size - relink_offset);
Arm32Assembler ass(sh4ctx, codeBuffer, (u8 *)code + relink_offset, host_code_size - relink_offset);
u32 size = ass.relinkBlock(this);
@ -846,7 +852,7 @@ bool Arm32Dynarec::rewrite(host_context_t& context, void *faultAddress)
// ignore last 2 bits zeroed to avoid sigbus errors
verify(fault_offs == 0 || (fault_offs & ~3) == (sh4_addr & 0x1FFFFFFC));
ass = new Arm32Assembler(*codeBuffer, (u8 *)ptr, 12);
ass = new Arm32Assembler(*sh4ctx, *codeBuffer, (u8 *)ptr, 12);
ass->rewrite(raddr, rt, ft, fd, !read, is_sq, optp);
delete ass;
ass = nullptr;
@ -2251,10 +2257,10 @@ void Arm32Dynarec::reset()
::mainloop = nullptr;
unwinder.clear();
if (p_sh4rcb->cntx.CpuRunning)
if (sh4ctx->CpuRunning)
{
// Force the dynarec out of mainloop() to regenerate it
p_sh4rcb->cntx.CpuRunning = 0;
sh4ctx->CpuRunning = 0;
restarting = true;
}
else
@ -2267,7 +2273,7 @@ void Arm32Dynarec::generate_mainloop()
return;
INFO_LOG(DYNAREC, "Generating main loop");
Arm32Assembler ass(*codeBuffer);
Arm32Assembler ass(*sh4ctx, *codeBuffer);
ass.genMainLoop();
}
@ -2541,7 +2547,7 @@ void Arm32Assembler::genMainLoop()
INFO_LOG(DYNAREC, "readm helpers: up to %p", GetCursorAddress<void *>());
}
void Arm32Dynarec::init(Sh4CodeBuffer& codeBuffer)
void Arm32Dynarec::init(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer)
{
INFO_LOG(DYNAREC, "Initializing the ARM32 dynarec");
@ -2563,6 +2569,7 @@ void Arm32Dynarec::init(Sh4CodeBuffer& codeBuffer)
ccmap[shop_setab] = hi;
ccnmap[shop_setab] = ls;
this->sh4ctx = &sh4ctx;
this->codeBuffer = &codeBuffer;
}
@ -2574,6 +2581,6 @@ void Arm32Dynarec::handleException(host_context_t &context)
RuntimeBlockInfo* Arm32Dynarec::allocateBlock()
{
generate_mainloop(); // FIXME why is this needed?
return new DynaRBI(*codeBuffer);
return new DynaRBI(*sh4ctx, *codeBuffer);
};
#endif

View File

@ -49,10 +49,12 @@ using namespace vixl::aarch64;
struct DynaRBI : RuntimeBlockInfo
{
DynaRBI(Sh4CodeBuffer& codeBuffer) : codeBuffer(codeBuffer) {}
DynaRBI(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer)
: sh4ctx(sh4ctx), codeBuffer(codeBuffer) {}
u32 Relink() override;
private:
Sh4Context& sh4ctx;
Sh4CodeBuffer& codeBuffer;
};
@ -126,10 +128,11 @@ class Arm64Assembler : public MacroAssembler
typedef void (MacroAssembler::*Arm64Fop_RRR)(const VRegister&, const VRegister&, const VRegister&);
public:
Arm64Assembler(Sh4CodeBuffer& codeBuffer) : Arm64Assembler(codeBuffer, codeBuffer.get()) {
}
Arm64Assembler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer)
: Arm64Assembler(sh4ctx, codeBuffer, codeBuffer.get()) { }
Arm64Assembler(Sh4CodeBuffer& codeBuffer, void *buffer) : MacroAssembler((u8 *)buffer, codeBuffer.getFreeSpace()), regalloc(this), codeBuffer(codeBuffer)
Arm64Assembler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer, void *buffer)
: MacroAssembler((u8 *)buffer, codeBuffer.getFreeSpace()), regalloc(this), sh4ctx(sh4ctx), codeBuffer(codeBuffer)
{
call_regs.push_back((const WRegister*)&w0);
call_regs.push_back((const WRegister*)&w1);
@ -264,7 +267,7 @@ public:
regalloc.DoAlloc(block);
// scheduler
Ldr(w1, sh4_context_mem_operand(&Sh4cntx.cycle_counter));
Ldr(w1, sh4_context_mem_operand(&sh4ctx.cycle_counter));
Cmp(w1, 0);
Label cycles_remaining;
B(&cycles_remaining, pl);
@ -274,7 +277,7 @@ public:
Bind(&cycles_remaining);
Sub(w1, w1, block->guest_cycles);
Str(w1, sh4_context_mem_operand(&Sh4cntx.cycle_counter));
Str(w1, sh4_context_mem_operand(&sh4ctx.cycle_counter));
for (size_t i = 0; i < block->oplist.size(); i++)
{
@ -287,7 +290,7 @@ public:
if (op.rs1._imm) // if NeedPC()
{
Mov(w10, op.rs2._imm);
Str(w10, sh4_context_mem_operand(&Sh4cntx.pc));
Str(w10, sh4_context_mem_operand(&sh4ctx.pc));
}
Mov(x0, x28);
@ -1069,7 +1072,7 @@ public:
MemOperand sh4_context_mem_operand(void *p)
{
u32 offset = (u8*)p - (u8*)&p_sh4rcb->cntx;
u32 offset = (u8*)p - (u8*)&sh4ctx;
verify((offset & 3) == 0 && offset <= 16380); // FIXME 64-bit regs need multiple of 8 up to 32760
return MemOperand(x28, offset);
}
@ -1163,7 +1166,7 @@ public:
#endif
{
Mov(w29, block->BranchBlock);
Str(w29, sh4_context_mem_operand(&Sh4cntx.pc));
Str(w29, sh4_context_mem_operand(&sh4ctx.pc));
GenBranch(arm64_no_update);
}
}
@ -1177,9 +1180,9 @@ public:
// next_pc = branch_pc_value;
if (block->has_jcond)
Ldr(w11, sh4_context_mem_operand(&Sh4cntx.jdyn));
Ldr(w11, sh4_context_mem_operand(&sh4ctx.jdyn));
else
Ldr(w11, sh4_context_mem_operand(&Sh4cntx.sr.T));
Ldr(w11, sh4_context_mem_operand(&sh4ctx.sr.T));
Cmp(w11, block->BlockType & 1);
@ -1207,7 +1210,7 @@ public:
#endif
{
Mov(w29, block->BranchBlock);
Str(w29, sh4_context_mem_operand(&Sh4cntx.pc));
Str(w29, sh4_context_mem_operand(&sh4ctx.pc));
GenBranch(arm64_no_update);
}
}
@ -1235,7 +1238,7 @@ public:
#endif
{
Mov(w29, block->NextBlock);
Str(w29, sh4_context_mem_operand(&Sh4cntx.pc));
Str(w29, sh4_context_mem_operand(&sh4ctx.pc));
GenBranch(arm64_no_update);
}
}
@ -1247,7 +1250,7 @@ public:
case BET_DynamicRet:
// next_pc = *jdyn;
Str(w29, sh4_context_mem_operand(&Sh4cntx.pc));
Str(w29, sh4_context_mem_operand(&sh4ctx.pc));
if (!mmu_enabled())
{
// TODO Call no_update instead (and check CpuRunning less frequently?)
@ -1276,11 +1279,11 @@ public:
Mov(w29, block->NextBlock);
// else next_pc = *jdyn (already in w29)
Str(w29, sh4_context_mem_operand(&Sh4cntx.pc));
Str(w29, sh4_context_mem_operand(&sh4ctx.pc));
GenCallRuntime(UpdateINTC);
Ldr(w29, sh4_context_mem_operand(&Sh4cntx.pc));
Ldr(w29, sh4_context_mem_operand(&sh4ctx.pc));
GenBranch(arm64_no_update);
break;
@ -1450,21 +1453,21 @@ public:
Bind(&intc_sched); // w0 is pc, w1 is cycle_counter
Str(w0, sh4_context_mem_operand(&Sh4cntx.pc));
Str(w0, sh4_context_mem_operand(&sh4ctx.pc));
// Add timeslice to cycle counter
Add(w1, w1, SH4_TIMESLICE);
Str(w1, sh4_context_mem_operand(&Sh4cntx.cycle_counter));
Ldr(w0, sh4_context_mem_operand(&Sh4cntx.CpuRunning));
Str(w1, sh4_context_mem_operand(&sh4ctx.cycle_counter));
Ldr(w0, sh4_context_mem_operand(&sh4ctx.CpuRunning));
Cbz(w0, &end_mainloop);
Mov(x29, lr); // Save link register in case we return
GenCallRuntime(UpdateSystem_INTC);
Cbnz(w0, &do_interrupts);
Mov(lr, x29);
Ldr(w0, sh4_context_mem_operand(&Sh4cntx.cycle_counter));
Ldr(w0, sh4_context_mem_operand(&sh4ctx.cycle_counter));
Ret();
Bind(&do_interrupts);
Ldr(w29, sh4_context_mem_operand(&Sh4cntx.pc));
Ldr(w29, sh4_context_mem_operand(&sh4ctx.pc));
B(&no_update);
Bind(&end_mainloop);
@ -1499,12 +1502,12 @@ public:
// w0: vaddr, w1: addr
checkBlockFpu = GetCursorAddress<DynaCode *>();
Label fpu_enabled;
Ldr(w10, sh4_context_mem_operand(&Sh4cntx.sr.status));
Ldr(w10, sh4_context_mem_operand(&sh4ctx.sr.status));
Tbz(w10, 15, &fpu_enabled); // test SR.FD bit
Mov(w1, Sh4Ex_FpuDisabled); // exception code
GenCallRuntime(Do_Exception);
Ldr(w29, sh4_context_mem_operand(&Sh4cntx.pc));
Ldr(w29, sh4_context_mem_operand(&sh4ctx.pc));
B(&no_update);
Bind(&fpu_enabled);
// fallthrough
@ -1513,7 +1516,7 @@ public:
// MMU Block check (no fpu)
// w0: vaddr, w1: addr
checkBlockNoFpu = GetCursorAddress<DynaCode *>();
Ldr(w2, sh4_context_mem_operand(&Sh4cntx.pc));
Ldr(w2, sh4_context_mem_operand(&sh4ctx.pc));
Cmp(w2, w0);
Mov(w0, w1);
B(&blockCheckFailLabel, ne);
@ -2174,6 +2177,7 @@ private:
RuntimeBlockInfo* block = NULL;
const int read_memory_rewrite_size = 5; // ubfx, add, ldr for fast access. calling a handler can use more than 3 depending on offset
const int write_memory_rewrite_size = 5; // ubfx, add, str
Sh4Context& sh4ctx;
Sh4CodeBuffer& codeBuffer;
};
@ -2184,9 +2188,10 @@ public:
sh4Dynarec = this;
}
void init(Sh4CodeBuffer& codeBuffer) override
void init(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) override
{
INFO_LOG(DYNAREC, "Initializing the ARM64 dynarec");
this->sh4ctx = &sh4ctx;
this->codeBuffer = &codeBuffer;
}
@ -2195,10 +2200,10 @@ public:
unwinder.clear();
::mainloop = nullptr;
if (p_sh4rcb->cntx.CpuRunning)
if (sh4ctx->CpuRunning)
{
// Force the dynarec out of mainloop() to regenerate it
p_sh4rcb->cntx.CpuRunning = 0;
sh4ctx->CpuRunning = 0;
restarting = true;
}
else
@ -2226,7 +2231,7 @@ public:
{
verify(codeBuffer->getFreeSpace() >= 16 * 1024);
compiler = new Arm64Assembler(*codeBuffer);
compiler = new Arm64Assembler(*sh4ctx, *codeBuffer);
compiler->compileBlock(block, smc_checks, optimise);
@ -2257,7 +2262,7 @@ public:
if (::mainloop != nullptr)
return;
jitWriteProtect(*codeBuffer, false);
compiler = new Arm64Assembler(*codeBuffer);
compiler = new Arm64Assembler(*sh4ctx, *codeBuffer);
compiler->GenMainloop();
@ -2269,7 +2274,7 @@ public:
RuntimeBlockInfo* allocateBlock() override
{
generate_mainloop();
return new DynaRBI(*codeBuffer);
return new DynaRBI(*sh4ctx, *codeBuffer);
}
void handleException(host_context_t &context) override
@ -2340,7 +2345,7 @@ public:
// Skip the preceding ops (add, ubfx)
u32 *code_rewrite = code_ptr - 2;
Arm64Assembler *assembler = new Arm64Assembler(*codeBuffer, code_rewrite);
Arm64Assembler *assembler = new Arm64Assembler(*sh4ctx, *codeBuffer, code_rewrite);
if (is_read)
assembler->GenReadMemorySlow(size);
else if (!is_read && size >= 4 && (context.x0 >> 26) == 0x38)
@ -2358,6 +2363,7 @@ public:
private:
Arm64Assembler* compiler = nullptr;
bool restarting = false;
Sh4Context *sh4ctx = nullptr;
Sh4CodeBuffer *codeBuffer = nullptr;
};
@ -2368,7 +2374,7 @@ u32 DynaRBI::Relink()
#ifndef NO_BLOCK_LINKING
//printf("DynaRBI::Relink %08x\n", this->addr);
jitWriteProtect(codeBuffer, false);
Arm64Assembler *compiler = new Arm64Assembler(codeBuffer, (u8 *)this->code + this->relink_offset);
Arm64Assembler *compiler = new Arm64Assembler(sh4ctx, codeBuffer, (u8 *)this->code + this->relink_offset);
u32 code_size = compiler->RelinkBlock(this);
compiler->Finalize(true);

View File

@ -121,8 +121,8 @@ public:
using BaseCompiler = BaseXbyakRec<BlockCompiler, true>;
friend class BaseXbyakRec<BlockCompiler, true>;
BlockCompiler(Sh4CodeBuffer& codeBuffer) : BaseCompiler(codeBuffer), regalloc(this) { }
BlockCompiler(Sh4CodeBuffer& codeBuffer, u8 *code_ptr) : BaseCompiler(codeBuffer, code_ptr), regalloc(this) { }
BlockCompiler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) : BaseCompiler(sh4ctx, codeBuffer), regalloc(this) { }
BlockCompiler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer, u8 *code_ptr) : BaseCompiler(sh4ctx, codeBuffer, code_ptr), regalloc(this) { }
void compile(RuntimeBlockInfo* block, bool force_checks, bool optimise)
{
@ -136,7 +136,7 @@ public:
if (mmu_enabled() && block->has_fpu_op)
{
Xbyak::Label fpu_enabled;
mov(rax, (uintptr_t)&p_sh4rcb->cntx.sr.status);
mov(rax, (uintptr_t)&sh4ctx.sr.status);
test(dword[rax], 0x8000); // test SR.FD bit
jz(fpu_enabled);
mov(call_regs[0], block->vaddr); // pc
@ -145,7 +145,7 @@ public:
jmp(exit_block, T_NEAR);
L(fpu_enabled);
}
mov(rax, (uintptr_t)&p_sh4rcb->cntx.cycle_counter);
mov(rax, (uintptr_t)&sh4ctx.cycle_counter);
sub(dword[rax], block->guest_cycles);
regalloc.DoAlloc(block);
@ -167,12 +167,12 @@ public:
if (op.rs1._imm)
{
mov(rax, (size_t)&p_sh4rcb->cntx.pc);
mov(rax, (size_t)&sh4ctx.pc);
mov(dword[rax], op.rs2._imm);
}
mov(call_regs[1], op.rs3._imm);
mov(call_regs64[0], (uintptr_t)&p_sh4rcb->cntx);
mov(call_regs64[0], (uintptr_t)&sh4ctx);
if (!mmu_enabled())
GenCall(OpDesc[op.rs3._imm]->oph);
@ -379,7 +379,7 @@ public:
}
else
{
mov(call_regs64[1], (uintptr_t)&p_sh4rcb->cntx);
mov(call_regs64[1], (uintptr_t)&sh4ctx);
mov(rax, (size_t)&do_sqw_nommu);
saveXmmRegisters();
call(qword[rax]);
@ -472,7 +472,7 @@ public:
regalloc.Cleanup();
current_opid = -1;
mov(rax, (size_t)&p_sh4rcb->cntx.pc);
mov(rax, (size_t)&sh4ctx.pc);
switch (block->BlockType) {
@ -492,9 +492,9 @@ public:
mov(dword[rax], block->NextBlock);
if (block->has_jcond)
mov(rdx, (size_t)&Sh4cntx.jdyn);
mov(rdx, (size_t)&sh4ctx.jdyn);
else
mov(rdx, (size_t)&Sh4cntx.sr.T);
mov(rdx, (size_t)&sh4ctx.sr.T);
cmp(dword[rdx], block->BlockType & 1);
Xbyak::Label branch_not_taken;
@ -509,7 +509,7 @@ public:
case BET_DynamicCall:
case BET_DynamicRet:
//next_pc = *jdyn;
mov(rdx, (size_t)&Sh4cntx.jdyn);
mov(rdx, (size_t)&sh4ctx.jdyn);
mov(edx, dword[rdx]);
mov(dword[rax], edx);
break;
@ -518,7 +518,7 @@ public:
case BET_StaticIntr:
if (block->BlockType == BET_DynamicIntr) {
//next_pc = *jdyn;
mov(rdx, (size_t)&Sh4cntx.jdyn);
mov(rdx, (size_t)&sh4ctx.jdyn);
mov(edx, dword[rdx]);
mov(dword[rax], edx);
}
@ -684,7 +684,7 @@ public:
Xbyak::Label run_loop;
L(run_loop);
Xbyak::Label end_run_loop;
mov(rax, (size_t)&p_sh4rcb->cntx.CpuRunning);
mov(rax, (size_t)&sh4ctx.CpuRunning);
mov(edx, dword[rax]);
test(edx, edx);
@ -693,11 +693,11 @@ public:
//slice_loop:
Xbyak::Label slice_loop;
L(slice_loop);
mov(rax, (size_t)&p_sh4rcb->cntx.pc);
mov(rax, (size_t)&sh4ctx.pc);
mov(call_regs[0], dword[rax]);
call(bm_GetCodeByVAddr);
call(rax);
mov(rax, (uintptr_t)&p_sh4rcb->cntx.cycle_counter);
mov(rax, (uintptr_t)&sh4ctx.cycle_counter);
mov(ecx, dword[rax]);
test(ecx, ecx);
jg(slice_loop);
@ -1058,7 +1058,7 @@ private:
// same at compile and run times.
if (mmu_enabled())
{
mov(rax, (uintptr_t)&p_sh4rcb->cntx.pc);
mov(rax, (uintptr_t)&sh4ctx.pc);
cmp(dword[rax], block->vaddr);
jne(reinterpret_cast<const void*>(&ngen_blockcheckfail));
}
@ -1161,7 +1161,7 @@ private:
shr(r9d, 26);
cmp(r9d, 0x38);
jne(no_sqw);
mov(rax, (uintptr_t)p_sh4rcb->cntx.sq_buffer);
mov(rax, (uintptr_t)sh4ctx.sq_buffer);
and_(call_regs[0], 0x3F);
if (size == MemSize::S32)
@ -1318,7 +1318,7 @@ public:
size_t protSize = codeBuffer->getFreeSpace();
virtmem::jit_set_exec(protStart, protSize, false);
ccCompiler = new BlockCompiler(*codeBuffer);
ccCompiler = new BlockCompiler(*sh4ctx, *codeBuffer);
try {
ccCompiler->compile(block, smc_checks, optimise);
} catch (const Xbyak::Error& e) {
@ -1329,8 +1329,9 @@ public:
virtmem::jit_set_exec(protStart, protSize, true);
}
void init(Sh4CodeBuffer& codeBuffer) override
void init(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) override
{
this->sh4ctx = &sh4ctx;
this->codeBuffer = &codeBuffer;
}
@ -1370,7 +1371,7 @@ public:
virtmem::jit_set_exec(protStart, protSize, false);
u8 *retAddr = *(u8 **)context.rsp - 5;
BlockCompiler compiler(*codeBuffer, retAddr);
BlockCompiler compiler(*sh4ctx, *codeBuffer, retAddr);
bool rc = false;
try {
rc = compiler.rewriteMemAccess(context);
@ -1397,7 +1398,7 @@ public:
size_t protSize = codeBuffer->getFreeSpace();
virtmem::jit_set_exec(protStart, protSize, false);
BlockCompiler compiler(*codeBuffer);
BlockCompiler compiler(*sh4ctx, *codeBuffer);
try {
compiler.genMainloop();
} catch (const Xbyak::Error& e) {
@ -1407,6 +1408,7 @@ public:
}
private:
Sh4Context *sh4ctx = nullptr;
Sh4CodeBuffer *codeBuffer = nullptr;
BlockCompiler *ccCompiler = nullptr;
};

View File

@ -28,8 +28,9 @@ template<typename T, bool ArchX64>
class BaseXbyakRec : public Xbyak::CodeGenerator
{
protected:
BaseXbyakRec(Sh4CodeBuffer& codeBuffer) : BaseXbyakRec(codeBuffer, (u8 *)codeBuffer.get()) { }
BaseXbyakRec(Sh4CodeBuffer& codeBuffer, u8 *code_ptr) : Xbyak::CodeGenerator(codeBuffer.getFreeSpace(), code_ptr), codeBuffer(codeBuffer) { }
BaseXbyakRec(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) : BaseXbyakRec(sh4ctx, codeBuffer, (u8 *)codeBuffer.get()) { }
BaseXbyakRec(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer, u8 *code_ptr)
: Xbyak::CodeGenerator(codeBuffer.getFreeSpace(), code_ptr), sh4ctx(sh4ctx), codeBuffer(codeBuffer) { }
using BinaryOp = void (BaseXbyakRec::*)(const Xbyak::Operand&, const Xbyak::Operand&);
using BinaryFOp = void (BaseXbyakRec::*)(const Xbyak::Xmm&, const Xbyak::Operand&);
@ -773,6 +774,7 @@ protected:
}
}
Sh4Context& sh4ctx;
Sh4CodeBuffer& codeBuffer;
private:

View File

@ -69,11 +69,13 @@ void X86RegAlloc::Writeback_FPU(u32 reg, s8 nreg)
struct DynaRBI : RuntimeBlockInfo
{
DynaRBI(Sh4CodeBuffer *codeBuffer) : codeBuffer(codeBuffer) {}
DynaRBI(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer)
: sh4ctx(sh4ctx), codeBuffer(codeBuffer) {}
u32 Relink() override;
private:
Sh4CodeBuffer *codeBuffer;
Sh4Context& sh4ctx;
Sh4CodeBuffer& codeBuffer;
};
@ -108,26 +110,26 @@ void X86Compiler::compile(RuntimeBlockInfo* block, bool force_checks, bool optim
if (mmu_enabled() && block->has_fpu_op)
{
Xbyak::Label fpu_enabled;
mov(eax, dword[&Sh4cntx.sr.status]);
mov(eax, dword[&sh4ctx.sr.status]);
test(eax, 0x8000); // test SR.FD bit
jz(fpu_enabled);
push(Sh4Ex_FpuDisabled); // exception code
push(block->vaddr); // pc
call((void (*)())Do_Exception);
add(esp, 8);
mov(ecx, dword[&Sh4cntx.pc]);
mov(ecx, dword[&sh4ctx.pc]);
jmp((const void *)no_update);
L(fpu_enabled);
}
mov(eax, dword[&Sh4cntx.cycle_counter]);
mov(eax, dword[&sh4ctx.cycle_counter]);
test(eax, eax);
Xbyak::Label no_up;
jg(no_up);
mov(ecx, block->vaddr);
call((const void *)intc_sched);
L(no_up);
sub(dword[&Sh4cntx.cycle_counter], block->guest_cycles);
sub(dword[&sh4ctx.cycle_counter], block->guest_cycles);
regalloc.doAlloc(block);
@ -297,16 +299,16 @@ u32 X86Compiler::relinkBlock(RuntimeBlockInfo* block)
case BET_DynamicIntr:
if (block->BlockType == BET_StaticIntr)
{
mov(dword[&Sh4cntx.pc], block->NextBlock);
mov(dword[&sh4ctx.pc], block->NextBlock);
}
else
{
mov(eax, dword[GetRegPtr(reg_pc_dyn)]);
mov(dword[&Sh4cntx.pc], eax);
mov(dword[&sh4ctx.pc], eax);
}
call(UpdateINTC);
mov(ecx, dword[&Sh4cntx.pc]);
mov(ecx, dword[&sh4ctx.pc]);
jmp((const void *)no_update);
break;
@ -323,7 +325,7 @@ u32 X86Compiler::relinkBlock(RuntimeBlockInfo* block)
u32 DynaRBI::Relink()
{
X86Compiler *compiler = new X86Compiler(*codeBuffer, (u8*)code + relink_offset);
X86Compiler *compiler = new X86Compiler(sh4ctx, codeBuffer, (u8*)code + relink_offset);
u32 codeSize = compiler->relinkBlock(this);
delete compiler;
@ -439,14 +441,14 @@ void X86Compiler::genMainloop()
Xbyak::Label longjmpLabel;
L(longjmpLabel);
mov(ecx, dword[&Sh4cntx.pc]);
mov(ecx, dword[&sh4ctx.pc]);
//next_pc _MUST_ be on ecx
Xbyak::Label cleanup;
//no_update:
Xbyak::Label no_updateLabel;
L(no_updateLabel);
mov(edx, dword[&Sh4cntx.CpuRunning]);
mov(edx, dword[&sh4ctx.CpuRunning]);
cmp(edx, 0);
jz(cleanup);
if (!mmu_enabled())
@ -458,14 +460,14 @@ void X86Compiler::genMainloop()
}
else
{
mov(dword[&Sh4cntx.pc], ecx);
mov(dword[&sh4ctx.pc], ecx);
call((void *)bm_GetCodeByVAddr);
jmp(eax);
}
//cleanup:
L(cleanup);
mov(dword[&Sh4cntx.pc], ecx);
mov(dword[&sh4ctx.pc], ecx);
#ifndef _WIN32
// 16-byte alignment
add(esp, 12);
@ -481,7 +483,7 @@ void X86Compiler::genMainloop()
Xbyak::Label do_iter;
L(do_iter);
add(esp, 4); // pop intc_sched() return address
mov(ecx, dword[&Sh4cntx.pc]);
mov(ecx, dword[sh4ctx.pc]);
jmp(no_updateLabel);
//ngen_LinkBlock_Shared_stub:
@ -503,8 +505,8 @@ void X86Compiler::genMainloop()
unwinder.endProlog(0);
Xbyak::Label intc_schedLabel;
L(intc_schedLabel);
add(dword[&Sh4cntx.cycle_counter], SH4_TIMESLICE);
mov(dword[&Sh4cntx.pc], ecx);
add(dword[&sh4ctx.cycle_counter], SH4_TIMESLICE);
mov(dword[&sh4ctx.pc], ecx);
call((void *)UpdateSystem_INTC);
cmp(eax, 0);
jnz(do_iter);
@ -525,7 +527,7 @@ void X86Compiler::genMainloop()
//ngen_LinkBlock_Generic_stub:
Xbyak::Label ngen_LinkBlock_Generic_label;
L(ngen_LinkBlock_Generic_label);
mov(edx, dword[&Sh4cntx.jdyn]);
mov(edx, dword[&sh4ctx.jdyn]);
jmp(ngen_LinkBlock_Shared_stub);
genMemHandlers();
@ -568,7 +570,7 @@ void X86Compiler::genMainloop()
Xbyak::Label jumpblockLabel;
cmp(eax, 0);
jne(jumpblockLabel);
mov(ecx, dword[&Sh4cntx.pc]);
mov(ecx, dword[&sh4ctx.pc]);
jmp(no_updateLabel);
L(jumpblockLabel);
}
@ -809,7 +811,7 @@ void X86Compiler::checkBlock(bool smc_checks, RuntimeBlockInfo* block)
if (mmu_enabled())
{
mov(eax, dword[&Sh4cntx.pc]);
mov(eax, dword[&sh4ctx.pc]);
cmp(eax, block->vaddr);
jne(reinterpret_cast<const void*>(ngen_blockcheckfail));
}
@ -842,8 +844,9 @@ public:
sh4Dynarec = this;
}
void init(Sh4CodeBuffer& codeBuffer) override
void init(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) override
{
this->sh4ctx = &sh4ctx;
this->codeBuffer = &codeBuffer;
}
@ -857,7 +860,7 @@ public:
if (::mainloop != nullptr)
return;
compiler = new X86Compiler(*codeBuffer);
compiler = new X86Compiler(*sh4ctx, *codeBuffer);
try {
compiler->genMainloop();
@ -876,10 +879,10 @@ public:
::mainloop = nullptr;
unwinder.clear();
if (p_sh4rcb->cntx.CpuRunning)
if (sh4ctx->CpuRunning)
{
// Force the dynarec out of mainloop() to regenerate it
p_sh4rcb->cntx.CpuRunning = 0;
sh4ctx->CpuRunning = 0;
restarting = true;
}
else
@ -889,7 +892,7 @@ public:
RuntimeBlockInfo* allocateBlock() override
{
generate_mainloop();
return new DynaRBI(codeBuffer);
return new DynaRBI(*sh4ctx, *codeBuffer);
}
void mainloop(void* v_cntx) override
@ -911,7 +914,7 @@ public:
void compile(RuntimeBlockInfo* block, bool smc_checks, bool optimise) override
{
compiler = new X86Compiler(*codeBuffer);
compiler = new X86Compiler(*sh4ctx, *codeBuffer);
try {
compiler->compile(block, smc_checks, optimise);
@ -928,7 +931,7 @@ public:
// init() not called yet
return false;
u8 *rewriteAddr = *(u8 **)context.esp - 5;
X86Compiler *compiler = new X86Compiler(*codeBuffer, rewriteAddr);
X86Compiler *compiler = new X86Compiler(*sh4ctx, *codeBuffer, rewriteAddr);
bool rv = compiler->rewriteMemAccess(context);
delete compiler;
@ -956,6 +959,7 @@ public:
}
private:
Sh4Context *sh4ctx = nullptr;
Sh4CodeBuffer *codeBuffer = nullptr;
X86Compiler *compiler = nullptr;
bool restarting = false;

View File

@ -31,8 +31,8 @@ class X86Compiler : public BaseXbyakRec<X86Compiler, false>
public:
using BaseCompiler = BaseXbyakRec<X86Compiler, false>;
X86Compiler(Sh4CodeBuffer& codeBuffer) : BaseCompiler(codeBuffer), regalloc(this) { }
X86Compiler(Sh4CodeBuffer& codeBuffer, u8 *code_ptr) : BaseCompiler(codeBuffer, code_ptr), regalloc(this) { }
X86Compiler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer) : BaseCompiler(sh4ctx, codeBuffer), regalloc(this) { }
X86Compiler(Sh4Context& sh4ctx, Sh4CodeBuffer& codeBuffer, u8 *code_ptr) : BaseCompiler(sh4ctx, codeBuffer, code_ptr), regalloc(this) { }
void compile(RuntimeBlockInfo* block, bool force_checks, bool optimise);

View File

@ -139,12 +139,12 @@ void X86Compiler::genMemHandlers()
and_(ecx, 0x3F);
if (size == MemSize::S32)
mov(dword[(size_t)p_sh4rcb->cntx.sq_buffer + ecx], edx);
mov(dword[(size_t)sh4ctx.sq_buffer + ecx], edx);
else if (size >= MemSize::F32)
{
movss(dword[(size_t)p_sh4rcb->cntx.sq_buffer + ecx], xmm0);
movss(dword[(size_t)sh4ctx.sq_buffer + ecx], xmm0);
if (size == MemSize::F64)
movss(dword[((size_t)p_sh4rcb->cntx.sq_buffer + 4) + ecx], xmm1);
movss(dword[((size_t)sh4ctx.sq_buffer + 4) + ecx], xmm1);
}
ret();
L(no_sqw);
@ -327,8 +327,8 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode&
push(block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
}
if (op.rs1.is_imm() && op.rs1.imm_value())
mov(dword[&Sh4cntx.pc], op.rs2.imm_value());
mov(ecx, (uintptr_t)&Sh4cntx);
mov(dword[&sh4ctx.pc], op.rs2.imm_value());
mov(ecx, (uintptr_t)&sh4ctx);
mov(edx, op.rs3.imm_value());
if (!mmu_enabled())
genCall(OpDesc[op.rs3.imm_value()]->oph);