arm64: direct memory access and jit rewrite

generates direct vmem read & write accesses
trap sigsegv and rewrite using slow path
add w29 to allocatable registers
get rid of literals and use pc-rel branching
minor optimizations and cleanup
This commit is contained in:
Flyinghead 2019-01-16 13:04:16 +01:00
parent c2a048e8d8
commit cb8e81d473
11 changed files with 414 additions and 281 deletions

View File

@ -58,7 +58,7 @@ void Assembler::bind(Label* label) {
void Assembler::BindToOffset(Label* label, ptrdiff_t offset) {
VIXL_ASSERT((offset >= 0) && (offset <= GetBuffer()->GetCursorOffset()));
// VIXL_ASSERT((offset >= 0) && (offset <= GetBuffer()->GetCursorOffset())); // hack to have negative offsets
VIXL_ASSERT(offset % kInstructionSize == 0);
label->Bind(offset);

View File

@ -49,7 +49,7 @@ class Label {
VIXL_ASSERT(!IsLinked());
}
bool IsBound() const { return location_ >= 0; }
bool IsBound() const { return location_ != kLocationUnbound; }
bool IsLinked() const { return !links_.empty(); }
ptrdiff_t GetLocation() const { return location_; }

View File

@ -78,7 +78,7 @@ class CodeBuffer {
template <typename T>
T GetOffsetAddress(ptrdiff_t offset) const {
VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
VIXL_ASSERT((offset >= 0) && (offset <= (cursor_ - buffer_)));
//VIXL_ASSERT((offset >= 0) && (offset <= (cursor_ - buffer_))); // hack to have negative offsets
return reinterpret_cast<T>(buffer_ + offset);
}

View File

@ -30,9 +30,6 @@ op_agent_t oprofHandle;
typedef vector<RuntimeBlockInfo*> bm_List;
#define BLOCKS_IN_PAGE_LIST_COUNT (RAM_SIZE/4096)
bm_List blocks_page[BLOCKS_IN_PAGE_LIST_COUNT];
bm_List all_blocks;
bm_List del_blocks;
#include <set>
@ -140,12 +137,6 @@ RuntimeBlockInfo* bm_GetStaleBlock(void* dynarec_code)
void bm_AddBlock(RuntimeBlockInfo* blk)
{
/*
if (IsOnRam(blk->addr) && PageIsConst(blk->addr))
{
blocks_page[(blk->addr&RAM_MASK)/PAGE_SIZE].push_back(blk);
}
*/
all_blocks.push_back(blk);
if (blkmap.find(blk)!=blkmap.end())
{
@ -373,11 +364,6 @@ void bm_vmem_pagefill(void** ptr,u32 PAGE_SZ)
void bm_Reset()
{
ngen_ResetBlocks();
for (u32 i=0; i<BLOCKS_IN_PAGE_LIST_COUNT; i++)
{
blocks_page[i].clear();
}
_vmem_bm_reset();
for (size_t i=0; i<all_blocks.size(); i++)

View File

@ -1,6 +1,7 @@
/*
In case you wonder, the extern "C" stuff are for the assembly code on beagleboard/pandora
*/
#include <map>
#include "types.h"
#include "decoder.h"
#pragma once
@ -71,6 +72,7 @@ struct RuntimeBlockInfo: RuntimeBlockInfo_Core
u32 memops;
u32 linkedmemops;
std::map<void*, u32> memory_accesses; // key is host pc when access is made, value is opcode id
};
struct CachedBlockInfo: RuntimeBlockInfo_Core

View File

@ -465,10 +465,6 @@ struct RegAlloc
for (int i=reg_gbr;i<=reg_fpul;i++)
flush_span(i);
for (int i=reg_gbr;i<=reg_fpul;i++)
flush_span(i);
switch(OpDesc[op->rs3._imm]->mask)
{
case Mask_imm8:

View File

@ -50,7 +50,7 @@ void sigill_handler(int sn, siginfo_t * si, void *segfault_ctx) {
unat pc = (unat)ctx.pc;
bool dyna_cde = (pc>(unat)CodeCache) && (pc<(unat)(CodeCache + CODE_SIZE));
printf("SIGILL @ %08X, fault_handler+0x%08X ... %08X -> was not in vram, %d\n", pc, pc - (unat)sigill_handler, (unat)si->si_addr, dyna_cde);
printf("SIGILL @ %lx -> %p was not in vram, dynacode:%d\n", pc, si->si_addr, dyna_cde);
//printf("PC is used here %08X\n", pc);
kill(getpid(), SIGABRT);
@ -93,14 +93,17 @@ void fault_handler (int sn, siginfo_t * si, void *segfault_ctx)
#elif HOST_CPU == CPU_X64
//x64 has no rewrite support
#elif HOST_CPU == CPU_ARM64
// arm64 has no rewrite support
else if (dyna_cde && ngen_Rewrite(ctx.pc, 0, 0))
{
context_to_segfault(&ctx, segfault_ctx);
}
#else
#error JIT: Not supported arch
#endif
#endif
else
{
printf("SIGSEGV @ %u (fault_handler+0x%u) ... %p -> was not in vram\n", ctx.pc, ctx.pc - (unat)fault_handler, si->si_addr);
printf("SIGSEGV @ %lx -> %p was not in vram, dynacode:%d\n", ctx.pc, si->si_addr, dyna_cde);
die("segfault");
signal(SIGSEGV, SIG_DFL);
}

View File

@ -53,10 +53,6 @@ void context_segfault(rei_host_context_t* reictx, void* segfault_ctx, bool to_se
#endif
#elif HOST_CPU == CPU_ARM64
bicopy(reictx->pc, MCTX(.pc), to_segfault);
u64* r =(u64*) &MCTX(.regs[0]);
for (int i = 0; i < 31; i++)
bicopy(reictx->r[i], r[i], to_segfault);
#elif HOST_CPU == CPU_X86
#if defined(__FreeBSD__)
bicopy(reictx->pc, MCTX(.mc_eip), to_segfault);

View File

@ -12,8 +12,6 @@ struct rei_host_context_t {
u32 esp;
#elif HOST_CPU == CPU_ARM
u32 r[15];
#elif HOST_CPU == CPU_ARM64
u64 r[31];
#endif
};

View File

@ -34,7 +34,7 @@ enum eFReg {
S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31
};
static eReg alloc_regs[] = { W19, W20, W21, W22, W23, W24, W25, W26, (eReg)-1 };
static eReg alloc_regs[] = { W19, W20, W21, W22, W23, W24, W25, W26, W29, (eReg)-1 };
static eFReg alloc_fregs[] = { S8, S9, S10, S11, S12, S13, S14, S15, (eFReg)-1 };
class Arm64Assembler;

View File

@ -116,8 +116,7 @@ void ngen_mainloop(void* v_cntx)
"run_loop: \n\t"
"ldr w0, [x28, %[CpuRunning]] \n\t"
"cmp w0, #0 \n\t"
"b.eq end_run_loop \n\t"
"cbz w0, end_run_loop \n\t"
"slice_loop: \n\t"
"ldr w0, [x28, %[pc]] \n\t"
@ -179,7 +178,10 @@ class Arm64Assembler : public MacroAssembler
typedef void (MacroAssembler::*Arm64Op_RROF)(const Register&, const Register&, const Operand&, enum FlagsUpdate);
public:
Arm64Assembler() : MacroAssembler((u8 *)emit_GetCCPtr(), 64 * 1024), regalloc(this)
Arm64Assembler() : Arm64Assembler(emit_GetCCPtr())
{
}
Arm64Assembler(void *buffer) : MacroAssembler((u8 *)buffer, 64 * 1024), regalloc(this)
{
call_regs.push_back(&w0);
call_regs.push_back(&w1);
@ -226,24 +228,6 @@ public:
((*this).*arm_op2)(regalloc.MapRegister(op->rd), regalloc.MapRegister(op->rs1), op3, LeaveFlags);
}
template <typename R, typename... P>
void ngen_CallRuntime(R (*function)(P...))
{
if (!frame_reg_saved)
{
Str(x30, MemOperand(sp, -16, PreIndex));
frame_reg_saved = true;
}
Literal<uintptr_t> *function_address = function_literals[(void*)function];
if (function_address == NULL)
{
function_address = new Literal<uintptr_t>(reinterpret_cast<uintptr_t>(function), GetLiteralPool(), RawLiteral::kDeletedOnPoolDestruction);
function_literals[(void*)function] = function_address;
}
Ldr(x9, function_address);
Blr(x9);
}
const Register& GenMemAddr(const shil_opcode& op, const Register* raddr = NULL)
{
const Register* ret_reg = raddr == NULL ? &w0 : raddr;
@ -274,6 +258,7 @@ public:
void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise)
{
//printf("REC-ARM64 compiling %08x\n", block->addr);
this->block = block;
if (force_checks)
CheckBlock(block);
@ -298,7 +283,7 @@ public:
}
Mov(*call_regs[0], op.rs3._imm);
ngen_CallRuntime(OpDesc[op.rs3._imm]->oph);
GenCallRuntime(OpDesc[op.rs3._imm]->oph);
break;
case shop_jcond:
@ -350,203 +335,18 @@ public:
break;
case shop_readm:
{
u32 size = op.flags & 0x7f;
bool is_float = op.rs2.is_r32f() || op.rd.is_r32f();
if (op.rs1.is_imm())
{
bool isram = false;
void* ptr = _vmem_read_const(op.rs1._imm, isram, size);
if (isram)
{
Ldr(x1, reinterpret_cast<uintptr_t>(ptr));
switch (size)
{
case 2:
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1, xzr, SXTW));
break;
case 4:
if (is_float)
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1));
else
Ldr(regalloc.MapRegister(op.rd), MemOperand(x1));
break;
default:
die("Invalid size");
break;
}
}
else
{
// Not RAM
Mov(w0, op.rs1._imm);
switch(size)
{
case 1:
ngen_CallRuntime((void (*)())ptr);
Sxtb(w0, w0);
break;
case 2:
ngen_CallRuntime((void (*)())ptr);
Sxth(w0, w0);
break;
case 4:
ngen_CallRuntime((void (*)())ptr);
break;
case 8:
die("SZ_64F not supported");
break;
}
if (regalloc.IsAllocg(op.rd))
Mov(regalloc.MapRegister(op.rd), w0);
else
Fmov(regalloc.MapVRegister(op.rd), w0);
}
}
else
{
#if 0 // Direct memory access. Need to handle SIGSEGV and rewrite block as needed (?)
const Register& raddr = GenMemAddr(&op);
if (_nvmem_enabled())
{
Add(w1, raddr, sizeof(Sh4Context));
Bfc(w1, 29, 3); // addr &= ~0xE0000000
switch(size)
{
case 1:
Ldrsb(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW));
break;
case 2:
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW));
break;
case 4:
if (!is_float)
Ldr(regalloc.MapRegister(op.rd), MemOperand(x28, x1));
else
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x28, x1));
break;
case 8:
// TODO use regalloc
Ldr(x0, MemOperand(x28, x1));
Str(x0, sh4_context_mem_operand(op.rd.reg_ptr()));
break;
}
}
else
{
// TODO
die("Not implemented")
}
#endif
GenMemAddr(op, call_regs[0]);
switch (size)
{
case 1:
ngen_CallRuntime(ReadMem8);
Sxtb(w0, w0);
break;
case 2:
ngen_CallRuntime(ReadMem16);
Sxth(w0, w0);
break;
case 4:
ngen_CallRuntime(ReadMem32);
break;
case 8:
ngen_CallRuntime(ReadMem64);
break;
default:
die("1..8 bytes");
break;
}
if (size != 8)
host_reg_to_shil_param(op.rd, w0);
else
{
#ifdef EXPLODE_SPANS
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
Fmov(regalloc.MapVRegister(op.rd, 0), w0);
Lsr(x0, x0, 32);
Fmov(regalloc.MapVRegister(op.rd, 1), w0);
#else
host_reg_to_shil_param(op.rd, x0);
#endif
}
}
}
break;
GenReadMemory(op, i);
break;
case shop_writem:
{
GenMemAddr(op, call_regs[0]);
u32 size = op.flags & 0x7f;
if (size != 8)
shil_param_to_host_reg(op.rs2, *call_regs[1]);
else
{
#ifdef EXPLODE_SPANS
verify(op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1));
Fmov(*call_regs[1], regalloc.MapVRegister(op.rs2, 1));
Lsl(*call_regs64[1], *call_regs64[1], 32);
Fmov(w2, regalloc.MapVRegister(op.rs2, 0));
Orr(*call_regs64[1], *call_regs64[1], x2);
#else
shil_param_to_host_reg(op.rs2, *call_regs64[1]);
#endif
}
switch (size)
{
case 1:
ngen_CallRuntime(WriteMem8);
break;
case 2:
ngen_CallRuntime(WriteMem16);
break;
case 4:
ngen_CallRuntime(WriteMem32);
break;
case 8:
ngen_CallRuntime(WriteMem64);
break;
default:
die("1..8 bytes");
break;
}
}
break;
GenWriteMemory(op, i);
break;
case shop_sync_sr:
ngen_CallRuntime(UpdateSR);
GenCallRuntime(UpdateSR);
break;
case shop_sync_fpscr:
ngen_CallRuntime(UpdateFPSCR);
GenCallRuntime(UpdateFPSCR);
break;
case shop_swaplb:
@ -731,11 +531,7 @@ public:
Ldr(x9, MemOperand(x9));
Sub(x1, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, sq_buffer));
}
if (!frame_reg_saved)
{
Str(x30, MemOperand(sp, -16, PreIndex));
frame_reg_saved = true;
}
SaveFramePointer();
if (op.flags == 0x1337)
Blr(x9);
else
@ -921,7 +717,7 @@ public:
}
Str(w10, sh4_context_mem_operand(&next_pc));
ngen_CallRuntime(UpdateINTC);
GenCallRuntime(UpdateINTC);
break;
default:
@ -932,31 +728,7 @@ public:
Ldr(x30, MemOperand(sp, 16, PostIndex));
Ret();
Label code_end;
Bind(&code_end);
FinalizeCode();
block->code = GetBuffer()->GetStartAddress<DynarecCodeEntryPtr>();
block->host_code_size = GetBuffer()->GetSizeInBytes();
block->host_opcodes = GetLabelAddress<u32*>(&code_end) - GetBuffer()->GetStartAddress<u32*>();
emit_Skip(block->host_code_size);
CacheFlush((void*)block->code, GetBuffer()->GetEndAddress<void*>());
#if 0
Instruction* instr_start = GetBuffer()->GetStartAddress<Instruction*>();
Instruction* instr_end = GetLabelAddress<Instruction*>(&code_end);
Decoder decoder;
Disassembler disasm;
decoder.AppendVisitor(&disasm);
Instruction* instr;
for (instr = instr_start; instr < instr_end; instr += kInstructionSize) {
decoder.Decode(instr);
printf("VIXL\t %p:\t%s\n",
reinterpret_cast<void*>(instr),
disasm.GetOutput());
}
#endif
Finalize();
}
void ngen_CC_Start(shil_opcode* op)
@ -1037,7 +809,7 @@ public:
break;
}
}
ngen_CallRuntime((void (*)())function);
GenCallRuntime((void (*)())function);
}
MemOperand sh4_context_mem_operand(void *p)
@ -1047,7 +819,346 @@ public:
return MemOperand(x28, offset);
}
void GenReadMemorySlow(const shil_opcode& op)
{
Instruction *start_instruction = GetCursorAddress<Instruction *>();
u32 size = op.flags & 0x7f;
switch (size)
{
case 1:
GenCallRuntime(ReadMem8);
Sxtb(w0, w0);
break;
case 2:
GenCallRuntime(ReadMem16);
Sxth(w0, w0);
break;
case 4:
GenCallRuntime(ReadMem32);
break;
case 8:
GenCallRuntime(ReadMem64);
break;
default:
die("1..8 bytes");
break;
}
if (size != 8)
host_reg_to_shil_param(op.rd, w0);
else
{
#ifdef EXPLODE_SPANS
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
Fmov(regalloc.MapVRegister(op.rd, 0), w0);
Lsr(x0, x0, 32);
Fmov(regalloc.MapVRegister(op.rd, 1), w0);
#else
host_reg_to_shil_param(op.rd, x0);
#endif
}
EnsureCodeSize(start_instruction, read_memory_rewrite_size);
}
void GenWriteMemorySlow(const shil_opcode& op)
{
Instruction *start_instruction = GetCursorAddress<Instruction *>();
u32 size = op.flags & 0x7f;
switch (size)
{
case 1:
GenCallRuntime(WriteMem8);
break;
case 2:
GenCallRuntime(WriteMem16);
break;
case 4:
GenCallRuntime(WriteMem32);
break;
case 8:
GenCallRuntime(WriteMem64);
break;
default:
die("1..8 bytes");
break;
}
EnsureCodeSize(start_instruction, write_memory_rewrite_size);
}
void InitializeRewrite(RuntimeBlockInfo *block, size_t opid)
{
regalloc.DoAlloc(block);
regalloc.current_opid = opid;
frame_reg_saved = true;
}
void Finalize(bool rewrite = false)
{
Label code_end;
Bind(&code_end);
FinalizeCode();
if (!rewrite)
{
block->code = GetBuffer()->GetStartAddress<DynarecCodeEntryPtr>();
block->host_code_size = GetBuffer()->GetSizeInBytes();
block->host_opcodes = GetLabelAddress<u32*>(&code_end) - GetBuffer()->GetStartAddress<u32*>();
emit_Skip(block->host_code_size);
}
CacheFlush(GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());
#if 0
if (rewrite)
{
Instruction* instr_start = GetBuffer()->GetStartAddress<Instruction*>();
Instruction* instr_end = GetLabelAddress<Instruction*>(&code_end);
// Instruction* instr_end = (Instruction*)((u8 *)block->code + block->host_code_size);
Decoder decoder;
Disassembler disasm;
decoder.AppendVisitor(&disasm);
Instruction* instr;
for (instr = instr_start; instr < instr_end; instr += kInstructionSize) {
decoder.Decode(instr);
printf("VIXL\t %p:\t%s\n",
reinterpret_cast<void*>(instr),
disasm.GetOutput());
}
}
#endif
}
private:
template <typename R, typename... P>
void GenCallRuntime(R (*function)(P...))
{
SaveFramePointer();
uintptr_t offset = reinterpret_cast<uintptr_t>(function) - GetBuffer()->GetStartAddress<uintptr_t>();
Label function_label;
BindToOffset(&function_label, offset);
Bl(&function_label);
}
void GenReadMemory(const shil_opcode& op, size_t opid)
{
u32 size = op.flags & 0x7f;
if (GenReadMemoryImmediate(op))
return;
SaveFramePointer(); // needed if rewritten
GenMemAddr(op, call_regs[0]);
if (GenReadMemoryFast(op, opid))
return;
GenReadMemorySlow(op);
}
bool GenReadMemoryImmediate(const shil_opcode& op)
{
if (!op.rs1.is_imm())
return false;
u32 size = op.flags & 0x7f;
bool isram = false;
void* ptr = _vmem_read_const(op.rs1._imm, isram, size);
if (isram)
{
Ldr(x1, reinterpret_cast<uintptr_t>(ptr));
switch (size)
{
case 2:
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1, xzr, SXTW));
break;
case 4:
if (op.rd.is_r32f())
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1));
else
Ldr(regalloc.MapRegister(op.rd), MemOperand(x1));
break;
default:
die("Invalid size");
break;
}
}
else
{
// Not RAM
Mov(w0, op.rs1._imm);
switch(size)
{
case 1:
GenCallRuntime((void (*)())ptr);
Sxtb(w0, w0);
break;
case 2:
GenCallRuntime((void (*)())ptr);
Sxth(w0, w0);
break;
case 4:
GenCallRuntime((void (*)())ptr);
break;
case 8:
die("SZ_64F not supported");
break;
}
if (regalloc.IsAllocg(op.rd))
Mov(regalloc.MapRegister(op.rd), w0);
else
Fmov(regalloc.MapVRegister(op.rd), w0);
}
return true;
}
bool GenReadMemoryFast(const shil_opcode& op, size_t opid)
{
// Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite()
if (!_nvmem_enabled())
return false;
Instruction *start_instruction = GetCursorAddress<Instruction *>();
// WARNING: the rewrite code relies on having two ops before the memory access
// Update ngen_Rewrite (and perhaps read_memory_rewrite_size) if adding or removing code
Add(w1, *call_regs[0], sizeof(Sh4Context), LeaveFlags);
Bfc(w1, 29, 3); // addr &= ~0xE0000000
//printf("direct read memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress<void *>(), this->block->addr);
this->block->memory_accesses[GetCursorAddress<void *>()] = (u32)opid;
u32 size = op.flags & 0x7f;
switch(size)
{
case 1:
Ldrsb(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW));
break;
case 2:
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW));
break;
case 4:
if (!op.rd.is_r32f())
Ldr(regalloc.MapRegister(op.rd), MemOperand(x28, x1));
else
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x28, x1));
break;
case 8:
Ldr(x1, MemOperand(x28, x1));
break;
}
if (size == 8)
{
#ifdef EXPLODE_SPANS
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
Fmov(regalloc.MapVRegister(op.rd, 0), w1);
Lsr(x1, x1, 32);
Fmov(regalloc.MapVRegister(op.rd, 1), w1);
#else
Str(x1, sh4_context_mem_operand(op.rd.reg_ptr()));
#endif
}
EnsureCodeSize(start_instruction, read_memory_rewrite_size);
return true;
}
void GenWriteMemory(const shil_opcode& op, size_t opid)
{
SaveFramePointer(); // needed if rewritten
GenMemAddr(op, call_regs[0]);
u32 size = op.flags & 0x7f;
if (size != 8)
shil_param_to_host_reg(op.rs2, *call_regs[1]);
else
{
#ifdef EXPLODE_SPANS
verify(op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1));
Fmov(*call_regs[1], regalloc.MapVRegister(op.rs2, 1));
Lsl(*call_regs64[1], *call_regs64[1], 32);
Fmov(w2, regalloc.MapVRegister(op.rs2, 0));
Orr(*call_regs64[1], *call_regs64[1], x2);
#else
shil_param_to_host_reg(op.rs2, *call_regs64[1]);
#endif
}
if (GenWriteMemoryFast(op, opid))
return;
GenWriteMemorySlow(op);
}
bool GenWriteMemoryFast(const shil_opcode& op, size_t opid)
{
// Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite()
if (!_nvmem_enabled())
return false;
Instruction *start_instruction = GetCursorAddress<Instruction *>();
// WARNING: the rewrite code relies on having two ops before the memory access
// Update ngen_Rewrite (and perhaps write_memory_rewrite_size) if adding or removing code
Add(w7, *call_regs[0], sizeof(Sh4Context), LeaveFlags);
Bfc(w7, 29, 3); // addr &= ~0xE0000000
//printf("direct write memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress<void *>(), this->block->addr);
this->block->memory_accesses[GetCursorAddress<void *>()] = (u32)opid;
u32 size = op.flags & 0x7f;
switch(size)
{
case 1:
Strb(w1, MemOperand(x28, x7, SXTW));
break;
case 2:
Strh(w1, MemOperand(x28, x7, SXTW));
break;
case 4:
Str(w1, MemOperand(x28, x7));
break;
case 8:
Str(x1, MemOperand(x28, x7));
break;
}
EnsureCodeSize(start_instruction, write_memory_rewrite_size);
return true;
}
void EnsureCodeSize(Instruction *start_instruction, int code_size)
{
while (GetCursorAddress<Instruction *>() - start_instruction < code_size * kInstructionSize)
Nop();
verify (GetCursorAddress<Instruction *>() - start_instruction == code_size * kInstructionSize);
}
void CheckBlock(RuntimeBlockInfo* block)
{
s32 sz = block->sh4_code_size;
@ -1108,10 +1219,7 @@ private:
else if (param.is_reg())
{
if (param.is_r64f())
{
// TODO use regalloc
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
}
else if (param.is_r32f())
Fmov(reg, regalloc.MapVRegister(param));
else
@ -1127,7 +1235,6 @@ private:
{
if (reg.Is64Bits())
{
// TODO use regalloc
Str((const Register&)reg, sh4_context_mem_operand(param.reg_ptr()));
}
else if (regalloc.IsAllocg(param))
@ -1146,6 +1253,15 @@ private:
}
}
void SaveFramePointer()
{
if (!frame_reg_saved)
{
Str(x30, MemOperand(sp, -16, PreIndex));
frame_reg_saved = true;
}
}
struct CC_PS
{
CanonicalParamType type;
@ -1157,7 +1273,10 @@ private:
std::vector<const VRegister*> call_fregs;
Arm64RegAlloc regalloc;
bool frame_reg_saved = false;
std::map<void*, Literal<uintptr_t>*> function_literals;
RuntimeBlockInfo* block;
const int read_memory_rewrite_size = 6; // worst case for u64: add, bfc, ldr, fmov, lsr, fmov
// FIXME rewrite size per read/write size?
const int write_memory_rewrite_size = 3;
};
static Arm64Assembler* compiler;
@ -1194,6 +1313,39 @@ void ngen_CC_Finish(shil_opcode* op)
}
bool ngen_Rewrite(unat& host_pc, unat, unat)
{
u32 guest_pc = p_sh4rcb->cntx.pc;
//printf("ngen_Rewrite pc %p code addr %08x\n", host_pc, guest_pc);
RuntimeBlockInfo *block = bm_GetBlock(guest_pc);
if (block == NULL)
{
printf("ngen_Rewrite: Block at %08x not found\n", guest_pc);
return false;
}
u32 *code_ptr = (u32*)host_pc;
auto it = block->memory_accesses.find(code_ptr);
if (it == block->memory_accesses.end())
{
printf("ngen_Rewrite: memory access at %p not found (%lu entries)\n", code_ptr, block->memory_accesses.size());
return false;
}
u32 opid = it->second;
verify(opid < block->oplist.size());
const shil_opcode& op = block->oplist[opid];
Arm64Assembler *assembler = new Arm64Assembler(code_ptr - 2); // Skip the 2 preceding ops (bic, add)
assembler->InitializeRewrite(block, opid);
if (op.op == shop_readm)
assembler->GenReadMemorySlow(op);
else
assembler->GenWriteMemorySlow(op);
assembler->Finalize(true);
delete assembler;
host_pc = (unat)(code_ptr - 2);
return true;
}
void Arm64RegAlloc::Preload(u32 reg, eReg nreg)
{
assembler->Ldr(Register(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));