diff --git a/core/deps/vixl/aarch64/assembler-aarch64.cc b/core/deps/vixl/aarch64/assembler-aarch64.cc index 1c3ea65b7..d4af145bc 100644 --- a/core/deps/vixl/aarch64/assembler-aarch64.cc +++ b/core/deps/vixl/aarch64/assembler-aarch64.cc @@ -58,7 +58,7 @@ void Assembler::bind(Label* label) { void Assembler::BindToOffset(Label* label, ptrdiff_t offset) { - VIXL_ASSERT((offset >= 0) && (offset <= GetBuffer()->GetCursorOffset())); +// VIXL_ASSERT((offset >= 0) && (offset <= GetBuffer()->GetCursorOffset())); // hack to have negative offsets VIXL_ASSERT(offset % kInstructionSize == 0); label->Bind(offset); diff --git a/core/deps/vixl/aarch64/assembler-aarch64.h b/core/deps/vixl/aarch64/assembler-aarch64.h index ecdba12c3..b3cdb7749 100644 --- a/core/deps/vixl/aarch64/assembler-aarch64.h +++ b/core/deps/vixl/aarch64/assembler-aarch64.h @@ -49,7 +49,7 @@ class Label { VIXL_ASSERT(!IsLinked()); } - bool IsBound() const { return location_ >= 0; } + bool IsBound() const { return location_ != kLocationUnbound; } bool IsLinked() const { return !links_.empty(); } ptrdiff_t GetLocation() const { return location_; } diff --git a/core/deps/vixl/code-buffer-vixl.h b/core/deps/vixl/code-buffer-vixl.h index d0d815e44..fa7963479 100644 --- a/core/deps/vixl/code-buffer-vixl.h +++ b/core/deps/vixl/code-buffer-vixl.h @@ -78,7 +78,7 @@ class CodeBuffer { template T GetOffsetAddress(ptrdiff_t offset) const { VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); - VIXL_ASSERT((offset >= 0) && (offset <= (cursor_ - buffer_))); + //VIXL_ASSERT((offset >= 0) && (offset <= (cursor_ - buffer_))); // hack to have negative offsets return reinterpret_cast(buffer_ + offset); } diff --git a/core/hw/sh4/dyna/blockmanager.cpp b/core/hw/sh4/dyna/blockmanager.cpp index d86288d8b..b69d79090 100644 --- a/core/hw/sh4/dyna/blockmanager.cpp +++ b/core/hw/sh4/dyna/blockmanager.cpp @@ -30,9 +30,6 @@ op_agent_t oprofHandle; typedef vector bm_List; -#define BLOCKS_IN_PAGE_LIST_COUNT (RAM_SIZE/4096) -bm_List blocks_page[BLOCKS_IN_PAGE_LIST_COUNT]; - bm_List all_blocks; bm_List del_blocks; #include @@ -140,12 +137,6 @@ RuntimeBlockInfo* bm_GetStaleBlock(void* dynarec_code) void bm_AddBlock(RuntimeBlockInfo* blk) { - /* - if (IsOnRam(blk->addr) && PageIsConst(blk->addr)) - { - blocks_page[(blk->addr&RAM_MASK)/PAGE_SIZE].push_back(blk); - } - */ all_blocks.push_back(blk); if (blkmap.find(blk)!=blkmap.end()) { @@ -373,11 +364,6 @@ void bm_vmem_pagefill(void** ptr,u32 PAGE_SZ) void bm_Reset() { ngen_ResetBlocks(); - for (u32 i=0; i #include "types.h" #include "decoder.h" #pragma once @@ -71,6 +72,7 @@ struct RuntimeBlockInfo: RuntimeBlockInfo_Core u32 memops; u32 linkedmemops; + std::map memory_accesses; // key is host pc when access is made, value is opcode id }; struct CachedBlockInfo: RuntimeBlockInfo_Core diff --git a/core/hw/sh4/dyna/regalloc.h b/core/hw/sh4/dyna/regalloc.h index cbf142971..eee756571 100644 --- a/core/hw/sh4/dyna/regalloc.h +++ b/core/hw/sh4/dyna/regalloc.h @@ -465,10 +465,6 @@ struct RegAlloc for (int i=reg_gbr;i<=reg_fpul;i++) flush_span(i); - for (int i=reg_gbr;i<=reg_fpul;i++) - flush_span(i); - - switch(OpDesc[op->rs3._imm]->mask) { case Mask_imm8: diff --git a/core/linux/common.cpp b/core/linux/common.cpp index 3d2efb684..43c367b09 100644 --- a/core/linux/common.cpp +++ b/core/linux/common.cpp @@ -50,7 +50,7 @@ void sigill_handler(int sn, siginfo_t * si, void *segfault_ctx) { unat pc = (unat)ctx.pc; bool dyna_cde = (pc>(unat)CodeCache) && (pc<(unat)(CodeCache + CODE_SIZE)); - printf("SIGILL @ %08X, fault_handler+0x%08X ... %08X -> was not in vram, %d\n", pc, pc - (unat)sigill_handler, (unat)si->si_addr, dyna_cde); + printf("SIGILL @ %lx -> %p was not in vram, dynacode:%d\n", pc, si->si_addr, dyna_cde); //printf("PC is used here %08X\n", pc); kill(getpid(), SIGABRT); @@ -93,14 +93,17 @@ void fault_handler (int sn, siginfo_t * si, void *segfault_ctx) #elif HOST_CPU == CPU_X64 //x64 has no rewrite support #elif HOST_CPU == CPU_ARM64 - // arm64 has no rewrite support + else if (dyna_cde && ngen_Rewrite(ctx.pc, 0, 0)) + { + context_to_segfault(&ctx, segfault_ctx); + } #else #error JIT: Not supported arch #endif #endif else { - printf("SIGSEGV @ %u (fault_handler+0x%u) ... %p -> was not in vram\n", ctx.pc, ctx.pc - (unat)fault_handler, si->si_addr); + printf("SIGSEGV @ %lx -> %p was not in vram, dynacode:%d\n", ctx.pc, si->si_addr, dyna_cde); die("segfault"); signal(SIGSEGV, SIG_DFL); } diff --git a/core/linux/context.cpp b/core/linux/context.cpp index 2171e8dff..7c317c401 100644 --- a/core/linux/context.cpp +++ b/core/linux/context.cpp @@ -53,10 +53,6 @@ void context_segfault(rei_host_context_t* reictx, void* segfault_ctx, bool to_se #endif #elif HOST_CPU == CPU_ARM64 bicopy(reictx->pc, MCTX(.pc), to_segfault); - u64* r =(u64*) &MCTX(.regs[0]); - - for (int i = 0; i < 31; i++) - bicopy(reictx->r[i], r[i], to_segfault); #elif HOST_CPU == CPU_X86 #if defined(__FreeBSD__) bicopy(reictx->pc, MCTX(.mc_eip), to_segfault); diff --git a/core/linux/context.h b/core/linux/context.h index 361fc3904..667be6a7d 100644 --- a/core/linux/context.h +++ b/core/linux/context.h @@ -12,8 +12,6 @@ struct rei_host_context_t { u32 esp; #elif HOST_CPU == CPU_ARM u32 r[15]; -#elif HOST_CPU == CPU_ARM64 - u64 r[31]; #endif }; diff --git a/core/rec-ARM64/arm64_regalloc.h b/core/rec-ARM64/arm64_regalloc.h index 63a23545f..fb1458e23 100644 --- a/core/rec-ARM64/arm64_regalloc.h +++ b/core/rec-ARM64/arm64_regalloc.h @@ -34,7 +34,7 @@ enum eFReg { S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31 }; -static eReg alloc_regs[] = { W19, W20, W21, W22, W23, W24, W25, W26, (eReg)-1 }; +static eReg alloc_regs[] = { W19, W20, W21, W22, W23, W24, W25, W26, W29, (eReg)-1 }; static eFReg alloc_fregs[] = { S8, S9, S10, S11, S12, S13, S14, S15, (eFReg)-1 }; class Arm64Assembler; diff --git a/core/rec-ARM64/rec_arm64.cpp b/core/rec-ARM64/rec_arm64.cpp index 107288ba8..9b7874efb 100644 --- a/core/rec-ARM64/rec_arm64.cpp +++ b/core/rec-ARM64/rec_arm64.cpp @@ -116,8 +116,7 @@ void ngen_mainloop(void* v_cntx) "run_loop: \n\t" "ldr w0, [x28, %[CpuRunning]] \n\t" - "cmp w0, #0 \n\t" - "b.eq end_run_loop \n\t" + "cbz w0, end_run_loop \n\t" "slice_loop: \n\t" "ldr w0, [x28, %[pc]] \n\t" @@ -179,7 +178,10 @@ class Arm64Assembler : public MacroAssembler typedef void (MacroAssembler::*Arm64Op_RROF)(const Register&, const Register&, const Operand&, enum FlagsUpdate); public: - Arm64Assembler() : MacroAssembler((u8 *)emit_GetCCPtr(), 64 * 1024), regalloc(this) + Arm64Assembler() : Arm64Assembler(emit_GetCCPtr()) + { + } + Arm64Assembler(void *buffer) : MacroAssembler((u8 *)buffer, 64 * 1024), regalloc(this) { call_regs.push_back(&w0); call_regs.push_back(&w1); @@ -226,24 +228,6 @@ public: ((*this).*arm_op2)(regalloc.MapRegister(op->rd), regalloc.MapRegister(op->rs1), op3, LeaveFlags); } - template - void ngen_CallRuntime(R (*function)(P...)) - { - if (!frame_reg_saved) - { - Str(x30, MemOperand(sp, -16, PreIndex)); - frame_reg_saved = true; - } - Literal *function_address = function_literals[(void*)function]; - if (function_address == NULL) - { - function_address = new Literal(reinterpret_cast(function), GetLiteralPool(), RawLiteral::kDeletedOnPoolDestruction); - function_literals[(void*)function] = function_address; - } - Ldr(x9, function_address); - Blr(x9); - } - const Register& GenMemAddr(const shil_opcode& op, const Register* raddr = NULL) { const Register* ret_reg = raddr == NULL ? &w0 : raddr; @@ -274,6 +258,7 @@ public: void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise) { //printf("REC-ARM64 compiling %08x\n", block->addr); + this->block = block; if (force_checks) CheckBlock(block); @@ -298,7 +283,7 @@ public: } Mov(*call_regs[0], op.rs3._imm); - ngen_CallRuntime(OpDesc[op.rs3._imm]->oph); + GenCallRuntime(OpDesc[op.rs3._imm]->oph); break; case shop_jcond: @@ -350,203 +335,18 @@ public: break; case shop_readm: - { - u32 size = op.flags & 0x7f; - bool is_float = op.rs2.is_r32f() || op.rd.is_r32f(); - - if (op.rs1.is_imm()) - { - bool isram = false; - void* ptr = _vmem_read_const(op.rs1._imm, isram, size); - - if (isram) - { - Ldr(x1, reinterpret_cast(ptr)); - switch (size) - { - case 2: - Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1, xzr, SXTW)); - break; - - case 4: - if (is_float) - Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1)); - else - Ldr(regalloc.MapRegister(op.rd), MemOperand(x1)); - break; - - default: - die("Invalid size"); - break; - } - } - else - { - // Not RAM - Mov(w0, op.rs1._imm); - - switch(size) - { - case 1: - ngen_CallRuntime((void (*)())ptr); - Sxtb(w0, w0); - break; - - case 2: - ngen_CallRuntime((void (*)())ptr); - Sxth(w0, w0); - break; - - case 4: - ngen_CallRuntime((void (*)())ptr); - break; - - case 8: - die("SZ_64F not supported"); - break; - } - - if (regalloc.IsAllocg(op.rd)) - Mov(regalloc.MapRegister(op.rd), w0); - else - Fmov(regalloc.MapVRegister(op.rd), w0); - } - } - else - { -#if 0 // Direct memory access. Need to handle SIGSEGV and rewrite block as needed (?) - const Register& raddr = GenMemAddr(&op); - - if (_nvmem_enabled()) - { - Add(w1, raddr, sizeof(Sh4Context)); - Bfc(w1, 29, 3); // addr &= ~0xE0000000 - - switch(size) - { - case 1: - Ldrsb(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW)); - break; - - case 2: - Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW)); - break; - - case 4: - if (!is_float) - Ldr(regalloc.MapRegister(op.rd), MemOperand(x28, x1)); - else - Ldr(regalloc.MapVRegister(op.rd), MemOperand(x28, x1)); - break; - - case 8: - // TODO use regalloc - Ldr(x0, MemOperand(x28, x1)); - Str(x0, sh4_context_mem_operand(op.rd.reg_ptr())); - break; - } - } - else - { - // TODO - die("Not implemented") - } -#endif - - GenMemAddr(op, call_regs[0]); - - switch (size) - { - case 1: - ngen_CallRuntime(ReadMem8); - Sxtb(w0, w0); - break; - - case 2: - ngen_CallRuntime(ReadMem16); - Sxth(w0, w0); - break; - - case 4: - ngen_CallRuntime(ReadMem32); - break; - - case 8: - ngen_CallRuntime(ReadMem64); - break; - - default: - die("1..8 bytes"); - break; - } - - if (size != 8) - host_reg_to_shil_param(op.rd, w0); - else - { -#ifdef EXPLODE_SPANS - verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1)); - Fmov(regalloc.MapVRegister(op.rd, 0), w0); - Lsr(x0, x0, 32); - Fmov(regalloc.MapVRegister(op.rd, 1), w0); -#else - host_reg_to_shil_param(op.rd, x0); -#endif - } - } - } - break; + GenReadMemory(op, i); + break; case shop_writem: - { - GenMemAddr(op, call_regs[0]); - - u32 size = op.flags & 0x7f; - if (size != 8) - shil_param_to_host_reg(op.rs2, *call_regs[1]); - else - { -#ifdef EXPLODE_SPANS - verify(op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1)); - Fmov(*call_regs[1], regalloc.MapVRegister(op.rs2, 1)); - Lsl(*call_regs64[1], *call_regs64[1], 32); - Fmov(w2, regalloc.MapVRegister(op.rs2, 0)); - Orr(*call_regs64[1], *call_regs64[1], x2); -#else - shil_param_to_host_reg(op.rs2, *call_regs64[1]); -#endif - } - - switch (size) - { - case 1: - ngen_CallRuntime(WriteMem8); - break; - - case 2: - ngen_CallRuntime(WriteMem16); - break; - - case 4: - ngen_CallRuntime(WriteMem32); - break; - - case 8: - ngen_CallRuntime(WriteMem64); - break; - - default: - die("1..8 bytes"); - break; - } - } - break; + GenWriteMemory(op, i); + break; case shop_sync_sr: - ngen_CallRuntime(UpdateSR); + GenCallRuntime(UpdateSR); break; case shop_sync_fpscr: - ngen_CallRuntime(UpdateFPSCR); + GenCallRuntime(UpdateFPSCR); break; case shop_swaplb: @@ -731,11 +531,7 @@ public: Ldr(x9, MemOperand(x9)); Sub(x1, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, sq_buffer)); } - if (!frame_reg_saved) - { - Str(x30, MemOperand(sp, -16, PreIndex)); - frame_reg_saved = true; - } + SaveFramePointer(); if (op.flags == 0x1337) Blr(x9); else @@ -921,7 +717,7 @@ public: } Str(w10, sh4_context_mem_operand(&next_pc)); - ngen_CallRuntime(UpdateINTC); + GenCallRuntime(UpdateINTC); break; default: @@ -932,31 +728,7 @@ public: Ldr(x30, MemOperand(sp, 16, PostIndex)); Ret(); - Label code_end; - Bind(&code_end); - - FinalizeCode(); - - block->code = GetBuffer()->GetStartAddress(); - block->host_code_size = GetBuffer()->GetSizeInBytes(); - block->host_opcodes = GetLabelAddress(&code_end) - GetBuffer()->GetStartAddress(); - - emit_Skip(block->host_code_size); - CacheFlush((void*)block->code, GetBuffer()->GetEndAddress()); -#if 0 - Instruction* instr_start = GetBuffer()->GetStartAddress(); - Instruction* instr_end = GetLabelAddress(&code_end); - Decoder decoder; - Disassembler disasm; - decoder.AppendVisitor(&disasm); - Instruction* instr; - for (instr = instr_start; instr < instr_end; instr += kInstructionSize) { - decoder.Decode(instr); - printf("VIXL\t %p:\t%s\n", - reinterpret_cast(instr), - disasm.GetOutput()); - } -#endif + Finalize(); } void ngen_CC_Start(shil_opcode* op) @@ -1037,7 +809,7 @@ public: break; } } - ngen_CallRuntime((void (*)())function); + GenCallRuntime((void (*)())function); } MemOperand sh4_context_mem_operand(void *p) @@ -1047,7 +819,346 @@ public: return MemOperand(x28, offset); } + void GenReadMemorySlow(const shil_opcode& op) + { + Instruction *start_instruction = GetCursorAddress(); + u32 size = op.flags & 0x7f; + + switch (size) + { + case 1: + GenCallRuntime(ReadMem8); + Sxtb(w0, w0); + break; + + case 2: + GenCallRuntime(ReadMem16); + Sxth(w0, w0); + break; + + case 4: + GenCallRuntime(ReadMem32); + break; + + case 8: + GenCallRuntime(ReadMem64); + break; + + default: + die("1..8 bytes"); + break; + } + + if (size != 8) + host_reg_to_shil_param(op.rd, w0); + else + { +#ifdef EXPLODE_SPANS + verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1)); + Fmov(regalloc.MapVRegister(op.rd, 0), w0); + Lsr(x0, x0, 32); + Fmov(regalloc.MapVRegister(op.rd, 1), w0); +#else + host_reg_to_shil_param(op.rd, x0); +#endif + } + EnsureCodeSize(start_instruction, read_memory_rewrite_size); + } + + void GenWriteMemorySlow(const shil_opcode& op) + { + Instruction *start_instruction = GetCursorAddress(); + u32 size = op.flags & 0x7f; + switch (size) + { + case 1: + GenCallRuntime(WriteMem8); + break; + + case 2: + GenCallRuntime(WriteMem16); + break; + + case 4: + GenCallRuntime(WriteMem32); + break; + + case 8: + GenCallRuntime(WriteMem64); + break; + + default: + die("1..8 bytes"); + break; + } + EnsureCodeSize(start_instruction, write_memory_rewrite_size); + } + + void InitializeRewrite(RuntimeBlockInfo *block, size_t opid) + { + regalloc.DoAlloc(block); + regalloc.current_opid = opid; + frame_reg_saved = true; + } + + void Finalize(bool rewrite = false) + { + Label code_end; + Bind(&code_end); + + FinalizeCode(); + + if (!rewrite) + { + block->code = GetBuffer()->GetStartAddress(); + block->host_code_size = GetBuffer()->GetSizeInBytes(); + block->host_opcodes = GetLabelAddress(&code_end) - GetBuffer()->GetStartAddress(); + + emit_Skip(block->host_code_size); + } + CacheFlush(GetBuffer()->GetStartAddress(), GetBuffer()->GetEndAddress()); +#if 0 + if (rewrite) + { + Instruction* instr_start = GetBuffer()->GetStartAddress(); + Instruction* instr_end = GetLabelAddress(&code_end); +// Instruction* instr_end = (Instruction*)((u8 *)block->code + block->host_code_size); + Decoder decoder; + Disassembler disasm; + decoder.AppendVisitor(&disasm); + Instruction* instr; + for (instr = instr_start; instr < instr_end; instr += kInstructionSize) { + decoder.Decode(instr); + printf("VIXL\t %p:\t%s\n", + reinterpret_cast(instr), + disasm.GetOutput()); + } + } +#endif + } + private: + template + void GenCallRuntime(R (*function)(P...)) + { + SaveFramePointer(); + uintptr_t offset = reinterpret_cast(function) - GetBuffer()->GetStartAddress(); + Label function_label; + BindToOffset(&function_label, offset); + Bl(&function_label); + } + + void GenReadMemory(const shil_opcode& op, size_t opid) + { + u32 size = op.flags & 0x7f; + + if (GenReadMemoryImmediate(op)) + return; + + SaveFramePointer(); // needed if rewritten + + GenMemAddr(op, call_regs[0]); + + if (GenReadMemoryFast(op, opid)) + return; + + GenReadMemorySlow(op); + } + + bool GenReadMemoryImmediate(const shil_opcode& op) + { + if (!op.rs1.is_imm()) + return false; + + u32 size = op.flags & 0x7f; + bool isram = false; + void* ptr = _vmem_read_const(op.rs1._imm, isram, size); + + if (isram) + { + Ldr(x1, reinterpret_cast(ptr)); + switch (size) + { + case 2: + Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1, xzr, SXTW)); + break; + + case 4: + if (op.rd.is_r32f()) + Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1)); + else + Ldr(regalloc.MapRegister(op.rd), MemOperand(x1)); + break; + + default: + die("Invalid size"); + break; + } + } + else + { + // Not RAM + Mov(w0, op.rs1._imm); + + switch(size) + { + case 1: + GenCallRuntime((void (*)())ptr); + Sxtb(w0, w0); + break; + + case 2: + GenCallRuntime((void (*)())ptr); + Sxth(w0, w0); + break; + + case 4: + GenCallRuntime((void (*)())ptr); + break; + + case 8: + die("SZ_64F not supported"); + break; + } + + if (regalloc.IsAllocg(op.rd)) + Mov(regalloc.MapRegister(op.rd), w0); + else + Fmov(regalloc.MapVRegister(op.rd), w0); + } + + return true; + } + + bool GenReadMemoryFast(const shil_opcode& op, size_t opid) + { + // Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite() + if (!_nvmem_enabled()) + return false; + + Instruction *start_instruction = GetCursorAddress(); + + // WARNING: the rewrite code relies on having two ops before the memory access + // Update ngen_Rewrite (and perhaps read_memory_rewrite_size) if adding or removing code + Add(w1, *call_regs[0], sizeof(Sh4Context), LeaveFlags); + Bfc(w1, 29, 3); // addr &= ~0xE0000000 + + //printf("direct read memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress(), this->block->addr); + this->block->memory_accesses[GetCursorAddress()] = (u32)opid; + + u32 size = op.flags & 0x7f; + switch(size) + { + case 1: + Ldrsb(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW)); + break; + + case 2: + Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW)); + break; + + case 4: + if (!op.rd.is_r32f()) + Ldr(regalloc.MapRegister(op.rd), MemOperand(x28, x1)); + else + Ldr(regalloc.MapVRegister(op.rd), MemOperand(x28, x1)); + break; + + case 8: + Ldr(x1, MemOperand(x28, x1)); + break; + } + + if (size == 8) + { +#ifdef EXPLODE_SPANS + verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1)); + Fmov(regalloc.MapVRegister(op.rd, 0), w1); + Lsr(x1, x1, 32); + Fmov(regalloc.MapVRegister(op.rd, 1), w1); +#else + Str(x1, sh4_context_mem_operand(op.rd.reg_ptr())); +#endif + } + EnsureCodeSize(start_instruction, read_memory_rewrite_size); + + return true; + } + + void GenWriteMemory(const shil_opcode& op, size_t opid) + { + SaveFramePointer(); // needed if rewritten + + GenMemAddr(op, call_regs[0]); + + u32 size = op.flags & 0x7f; + if (size != 8) + shil_param_to_host_reg(op.rs2, *call_regs[1]); + else + { +#ifdef EXPLODE_SPANS + verify(op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1)); + Fmov(*call_regs[1], regalloc.MapVRegister(op.rs2, 1)); + Lsl(*call_regs64[1], *call_regs64[1], 32); + Fmov(w2, regalloc.MapVRegister(op.rs2, 0)); + Orr(*call_regs64[1], *call_regs64[1], x2); +#else + shil_param_to_host_reg(op.rs2, *call_regs64[1]); +#endif + } + if (GenWriteMemoryFast(op, opid)) + return; + + GenWriteMemorySlow(op); + } + + bool GenWriteMemoryFast(const shil_opcode& op, size_t opid) + { + // Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite() + if (!_nvmem_enabled()) + return false; + + Instruction *start_instruction = GetCursorAddress(); + + // WARNING: the rewrite code relies on having two ops before the memory access + // Update ngen_Rewrite (and perhaps write_memory_rewrite_size) if adding or removing code + Add(w7, *call_regs[0], sizeof(Sh4Context), LeaveFlags); + Bfc(w7, 29, 3); // addr &= ~0xE0000000 + + //printf("direct write memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress(), this->block->addr); + this->block->memory_accesses[GetCursorAddress()] = (u32)opid; + + u32 size = op.flags & 0x7f; + switch(size) + { + case 1: + Strb(w1, MemOperand(x28, x7, SXTW)); + break; + + case 2: + Strh(w1, MemOperand(x28, x7, SXTW)); + break; + + case 4: + Str(w1, MemOperand(x28, x7)); + break; + + case 8: + Str(x1, MemOperand(x28, x7)); + break; + } + EnsureCodeSize(start_instruction, write_memory_rewrite_size); + + return true; + } + + void EnsureCodeSize(Instruction *start_instruction, int code_size) + { + while (GetCursorAddress() - start_instruction < code_size * kInstructionSize) + Nop(); + verify (GetCursorAddress() - start_instruction == code_size * kInstructionSize); + } + void CheckBlock(RuntimeBlockInfo* block) { s32 sz = block->sh4_code_size; @@ -1108,10 +1219,7 @@ private: else if (param.is_reg()) { if (param.is_r64f()) - { - // TODO use regalloc Ldr(reg, sh4_context_mem_operand(param.reg_ptr())); - } else if (param.is_r32f()) Fmov(reg, regalloc.MapVRegister(param)); else @@ -1127,7 +1235,6 @@ private: { if (reg.Is64Bits()) { - // TODO use regalloc Str((const Register&)reg, sh4_context_mem_operand(param.reg_ptr())); } else if (regalloc.IsAllocg(param)) @@ -1146,6 +1253,15 @@ private: } } + void SaveFramePointer() + { + if (!frame_reg_saved) + { + Str(x30, MemOperand(sp, -16, PreIndex)); + frame_reg_saved = true; + } + } + struct CC_PS { CanonicalParamType type; @@ -1157,7 +1273,10 @@ private: std::vector call_fregs; Arm64RegAlloc regalloc; bool frame_reg_saved = false; - std::map*> function_literals; + RuntimeBlockInfo* block; + const int read_memory_rewrite_size = 6; // worst case for u64: add, bfc, ldr, fmov, lsr, fmov + // FIXME rewrite size per read/write size? + const int write_memory_rewrite_size = 3; }; static Arm64Assembler* compiler; @@ -1194,6 +1313,39 @@ void ngen_CC_Finish(shil_opcode* op) } +bool ngen_Rewrite(unat& host_pc, unat, unat) +{ + u32 guest_pc = p_sh4rcb->cntx.pc; + //printf("ngen_Rewrite pc %p code addr %08x\n", host_pc, guest_pc); + RuntimeBlockInfo *block = bm_GetBlock(guest_pc); + if (block == NULL) + { + printf("ngen_Rewrite: Block at %08x not found\n", guest_pc); + return false; + } + u32 *code_ptr = (u32*)host_pc; + auto it = block->memory_accesses.find(code_ptr); + if (it == block->memory_accesses.end()) + { + printf("ngen_Rewrite: memory access at %p not found (%lu entries)\n", code_ptr, block->memory_accesses.size()); + return false; + } + u32 opid = it->second; + verify(opid < block->oplist.size()); + const shil_opcode& op = block->oplist[opid]; + Arm64Assembler *assembler = new Arm64Assembler(code_ptr - 2); // Skip the 2 preceding ops (bic, add) + assembler->InitializeRewrite(block, opid); + if (op.op == shop_readm) + assembler->GenReadMemorySlow(op); + else + assembler->GenWriteMemorySlow(op); + assembler->Finalize(true); + delete assembler; + host_pc = (unat)(code_ptr - 2); + + return true; +} + void Arm64RegAlloc::Preload(u32 reg, eReg nreg) { assembler->Ldr(Register(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));