arm64: direct memory access and jit rewrite
generates direct vmem read & write accesses trap sigsegv and rewrite using slow path add w29 to allocatable registers get rid of literals and use pc-rel branching minor optimizations and cleanup
This commit is contained in:
parent
c2a048e8d8
commit
cb8e81d473
|
@ -58,7 +58,7 @@ void Assembler::bind(Label* label) {
|
|||
|
||||
|
||||
void Assembler::BindToOffset(Label* label, ptrdiff_t offset) {
|
||||
VIXL_ASSERT((offset >= 0) && (offset <= GetBuffer()->GetCursorOffset()));
|
||||
// VIXL_ASSERT((offset >= 0) && (offset <= GetBuffer()->GetCursorOffset())); // hack to have negative offsets
|
||||
VIXL_ASSERT(offset % kInstructionSize == 0);
|
||||
|
||||
label->Bind(offset);
|
||||
|
|
|
@ -49,7 +49,7 @@ class Label {
|
|||
VIXL_ASSERT(!IsLinked());
|
||||
}
|
||||
|
||||
bool IsBound() const { return location_ >= 0; }
|
||||
bool IsBound() const { return location_ != kLocationUnbound; }
|
||||
bool IsLinked() const { return !links_.empty(); }
|
||||
|
||||
ptrdiff_t GetLocation() const { return location_; }
|
||||
|
|
|
@ -78,7 +78,7 @@ class CodeBuffer {
|
|||
template <typename T>
|
||||
T GetOffsetAddress(ptrdiff_t offset) const {
|
||||
VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
|
||||
VIXL_ASSERT((offset >= 0) && (offset <= (cursor_ - buffer_)));
|
||||
//VIXL_ASSERT((offset >= 0) && (offset <= (cursor_ - buffer_))); // hack to have negative offsets
|
||||
return reinterpret_cast<T>(buffer_ + offset);
|
||||
}
|
||||
|
||||
|
|
|
@ -30,9 +30,6 @@ op_agent_t oprofHandle;
|
|||
|
||||
typedef vector<RuntimeBlockInfo*> bm_List;
|
||||
|
||||
#define BLOCKS_IN_PAGE_LIST_COUNT (RAM_SIZE/4096)
|
||||
bm_List blocks_page[BLOCKS_IN_PAGE_LIST_COUNT];
|
||||
|
||||
bm_List all_blocks;
|
||||
bm_List del_blocks;
|
||||
#include <set>
|
||||
|
@ -140,12 +137,6 @@ RuntimeBlockInfo* bm_GetStaleBlock(void* dynarec_code)
|
|||
|
||||
void bm_AddBlock(RuntimeBlockInfo* blk)
|
||||
{
|
||||
/*
|
||||
if (IsOnRam(blk->addr) && PageIsConst(blk->addr))
|
||||
{
|
||||
blocks_page[(blk->addr&RAM_MASK)/PAGE_SIZE].push_back(blk);
|
||||
}
|
||||
*/
|
||||
all_blocks.push_back(blk);
|
||||
if (blkmap.find(blk)!=blkmap.end())
|
||||
{
|
||||
|
@ -373,11 +364,6 @@ void bm_vmem_pagefill(void** ptr,u32 PAGE_SZ)
|
|||
void bm_Reset()
|
||||
{
|
||||
ngen_ResetBlocks();
|
||||
for (u32 i=0; i<BLOCKS_IN_PAGE_LIST_COUNT; i++)
|
||||
{
|
||||
blocks_page[i].clear();
|
||||
}
|
||||
|
||||
_vmem_bm_reset();
|
||||
|
||||
for (size_t i=0; i<all_blocks.size(); i++)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
/*
|
||||
In case you wonder, the extern "C" stuff are for the assembly code on beagleboard/pandora
|
||||
*/
|
||||
#include <map>
|
||||
#include "types.h"
|
||||
#include "decoder.h"
|
||||
#pragma once
|
||||
|
@ -71,6 +72,7 @@ struct RuntimeBlockInfo: RuntimeBlockInfo_Core
|
|||
|
||||
u32 memops;
|
||||
u32 linkedmemops;
|
||||
std::map<void*, u32> memory_accesses; // key is host pc when access is made, value is opcode id
|
||||
};
|
||||
|
||||
struct CachedBlockInfo: RuntimeBlockInfo_Core
|
||||
|
|
|
@ -465,10 +465,6 @@ struct RegAlloc
|
|||
for (int i=reg_gbr;i<=reg_fpul;i++)
|
||||
flush_span(i);
|
||||
|
||||
for (int i=reg_gbr;i<=reg_fpul;i++)
|
||||
flush_span(i);
|
||||
|
||||
|
||||
switch(OpDesc[op->rs3._imm]->mask)
|
||||
{
|
||||
case Mask_imm8:
|
||||
|
|
|
@ -50,7 +50,7 @@ void sigill_handler(int sn, siginfo_t * si, void *segfault_ctx) {
|
|||
unat pc = (unat)ctx.pc;
|
||||
bool dyna_cde = (pc>(unat)CodeCache) && (pc<(unat)(CodeCache + CODE_SIZE));
|
||||
|
||||
printf("SIGILL @ %08X, fault_handler+0x%08X ... %08X -> was not in vram, %d\n", pc, pc - (unat)sigill_handler, (unat)si->si_addr, dyna_cde);
|
||||
printf("SIGILL @ %lx -> %p was not in vram, dynacode:%d\n", pc, si->si_addr, dyna_cde);
|
||||
|
||||
//printf("PC is used here %08X\n", pc);
|
||||
kill(getpid(), SIGABRT);
|
||||
|
@ -93,14 +93,17 @@ void fault_handler (int sn, siginfo_t * si, void *segfault_ctx)
|
|||
#elif HOST_CPU == CPU_X64
|
||||
//x64 has no rewrite support
|
||||
#elif HOST_CPU == CPU_ARM64
|
||||
// arm64 has no rewrite support
|
||||
else if (dyna_cde && ngen_Rewrite(ctx.pc, 0, 0))
|
||||
{
|
||||
context_to_segfault(&ctx, segfault_ctx);
|
||||
}
|
||||
#else
|
||||
#error JIT: Not supported arch
|
||||
#endif
|
||||
#endif
|
||||
else
|
||||
{
|
||||
printf("SIGSEGV @ %u (fault_handler+0x%u) ... %p -> was not in vram\n", ctx.pc, ctx.pc - (unat)fault_handler, si->si_addr);
|
||||
printf("SIGSEGV @ %lx -> %p was not in vram, dynacode:%d\n", ctx.pc, si->si_addr, dyna_cde);
|
||||
die("segfault");
|
||||
signal(SIGSEGV, SIG_DFL);
|
||||
}
|
||||
|
|
|
@ -53,10 +53,6 @@ void context_segfault(rei_host_context_t* reictx, void* segfault_ctx, bool to_se
|
|||
#endif
|
||||
#elif HOST_CPU == CPU_ARM64
|
||||
bicopy(reictx->pc, MCTX(.pc), to_segfault);
|
||||
u64* r =(u64*) &MCTX(.regs[0]);
|
||||
|
||||
for (int i = 0; i < 31; i++)
|
||||
bicopy(reictx->r[i], r[i], to_segfault);
|
||||
#elif HOST_CPU == CPU_X86
|
||||
#if defined(__FreeBSD__)
|
||||
bicopy(reictx->pc, MCTX(.mc_eip), to_segfault);
|
||||
|
|
|
@ -12,8 +12,6 @@ struct rei_host_context_t {
|
|||
u32 esp;
|
||||
#elif HOST_CPU == CPU_ARM
|
||||
u32 r[15];
|
||||
#elif HOST_CPU == CPU_ARM64
|
||||
u64 r[31];
|
||||
#endif
|
||||
};
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ enum eFReg {
|
|||
S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31
|
||||
};
|
||||
|
||||
static eReg alloc_regs[] = { W19, W20, W21, W22, W23, W24, W25, W26, (eReg)-1 };
|
||||
static eReg alloc_regs[] = { W19, W20, W21, W22, W23, W24, W25, W26, W29, (eReg)-1 };
|
||||
static eFReg alloc_fregs[] = { S8, S9, S10, S11, S12, S13, S14, S15, (eFReg)-1 };
|
||||
|
||||
class Arm64Assembler;
|
||||
|
|
|
@ -116,8 +116,7 @@ void ngen_mainloop(void* v_cntx)
|
|||
|
||||
"run_loop: \n\t"
|
||||
"ldr w0, [x28, %[CpuRunning]] \n\t"
|
||||
"cmp w0, #0 \n\t"
|
||||
"b.eq end_run_loop \n\t"
|
||||
"cbz w0, end_run_loop \n\t"
|
||||
|
||||
"slice_loop: \n\t"
|
||||
"ldr w0, [x28, %[pc]] \n\t"
|
||||
|
@ -179,7 +178,10 @@ class Arm64Assembler : public MacroAssembler
|
|||
typedef void (MacroAssembler::*Arm64Op_RROF)(const Register&, const Register&, const Operand&, enum FlagsUpdate);
|
||||
|
||||
public:
|
||||
Arm64Assembler() : MacroAssembler((u8 *)emit_GetCCPtr(), 64 * 1024), regalloc(this)
|
||||
Arm64Assembler() : Arm64Assembler(emit_GetCCPtr())
|
||||
{
|
||||
}
|
||||
Arm64Assembler(void *buffer) : MacroAssembler((u8 *)buffer, 64 * 1024), regalloc(this)
|
||||
{
|
||||
call_regs.push_back(&w0);
|
||||
call_regs.push_back(&w1);
|
||||
|
@ -226,24 +228,6 @@ public:
|
|||
((*this).*arm_op2)(regalloc.MapRegister(op->rd), regalloc.MapRegister(op->rs1), op3, LeaveFlags);
|
||||
}
|
||||
|
||||
template <typename R, typename... P>
|
||||
void ngen_CallRuntime(R (*function)(P...))
|
||||
{
|
||||
if (!frame_reg_saved)
|
||||
{
|
||||
Str(x30, MemOperand(sp, -16, PreIndex));
|
||||
frame_reg_saved = true;
|
||||
}
|
||||
Literal<uintptr_t> *function_address = function_literals[(void*)function];
|
||||
if (function_address == NULL)
|
||||
{
|
||||
function_address = new Literal<uintptr_t>(reinterpret_cast<uintptr_t>(function), GetLiteralPool(), RawLiteral::kDeletedOnPoolDestruction);
|
||||
function_literals[(void*)function] = function_address;
|
||||
}
|
||||
Ldr(x9, function_address);
|
||||
Blr(x9);
|
||||
}
|
||||
|
||||
const Register& GenMemAddr(const shil_opcode& op, const Register* raddr = NULL)
|
||||
{
|
||||
const Register* ret_reg = raddr == NULL ? &w0 : raddr;
|
||||
|
@ -274,6 +258,7 @@ public:
|
|||
void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise)
|
||||
{
|
||||
//printf("REC-ARM64 compiling %08x\n", block->addr);
|
||||
this->block = block;
|
||||
if (force_checks)
|
||||
CheckBlock(block);
|
||||
|
||||
|
@ -298,7 +283,7 @@ public:
|
|||
}
|
||||
Mov(*call_regs[0], op.rs3._imm);
|
||||
|
||||
ngen_CallRuntime(OpDesc[op.rs3._imm]->oph);
|
||||
GenCallRuntime(OpDesc[op.rs3._imm]->oph);
|
||||
break;
|
||||
|
||||
case shop_jcond:
|
||||
|
@ -350,203 +335,18 @@ public:
|
|||
break;
|
||||
|
||||
case shop_readm:
|
||||
{
|
||||
u32 size = op.flags & 0x7f;
|
||||
bool is_float = op.rs2.is_r32f() || op.rd.is_r32f();
|
||||
|
||||
if (op.rs1.is_imm())
|
||||
{
|
||||
bool isram = false;
|
||||
void* ptr = _vmem_read_const(op.rs1._imm, isram, size);
|
||||
|
||||
if (isram)
|
||||
{
|
||||
Ldr(x1, reinterpret_cast<uintptr_t>(ptr));
|
||||
switch (size)
|
||||
{
|
||||
case 2:
|
||||
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1, xzr, SXTW));
|
||||
break;
|
||||
|
||||
case 4:
|
||||
if (is_float)
|
||||
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1));
|
||||
else
|
||||
Ldr(regalloc.MapRegister(op.rd), MemOperand(x1));
|
||||
break;
|
||||
|
||||
default:
|
||||
die("Invalid size");
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not RAM
|
||||
Mov(w0, op.rs1._imm);
|
||||
|
||||
switch(size)
|
||||
{
|
||||
case 1:
|
||||
ngen_CallRuntime((void (*)())ptr);
|
||||
Sxtb(w0, w0);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
ngen_CallRuntime((void (*)())ptr);
|
||||
Sxth(w0, w0);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
ngen_CallRuntime((void (*)())ptr);
|
||||
break;
|
||||
|
||||
case 8:
|
||||
die("SZ_64F not supported");
|
||||
break;
|
||||
}
|
||||
|
||||
if (regalloc.IsAllocg(op.rd))
|
||||
Mov(regalloc.MapRegister(op.rd), w0);
|
||||
else
|
||||
Fmov(regalloc.MapVRegister(op.rd), w0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#if 0 // Direct memory access. Need to handle SIGSEGV and rewrite block as needed (?)
|
||||
const Register& raddr = GenMemAddr(&op);
|
||||
|
||||
if (_nvmem_enabled())
|
||||
{
|
||||
Add(w1, raddr, sizeof(Sh4Context));
|
||||
Bfc(w1, 29, 3); // addr &= ~0xE0000000
|
||||
|
||||
switch(size)
|
||||
{
|
||||
case 1:
|
||||
Ldrsb(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW));
|
||||
break;
|
||||
|
||||
case 2:
|
||||
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW));
|
||||
break;
|
||||
|
||||
case 4:
|
||||
if (!is_float)
|
||||
Ldr(regalloc.MapRegister(op.rd), MemOperand(x28, x1));
|
||||
else
|
||||
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x28, x1));
|
||||
break;
|
||||
|
||||
case 8:
|
||||
// TODO use regalloc
|
||||
Ldr(x0, MemOperand(x28, x1));
|
||||
Str(x0, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO
|
||||
die("Not implemented")
|
||||
}
|
||||
#endif
|
||||
|
||||
GenMemAddr(op, call_regs[0]);
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case 1:
|
||||
ngen_CallRuntime(ReadMem8);
|
||||
Sxtb(w0, w0);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
ngen_CallRuntime(ReadMem16);
|
||||
Sxth(w0, w0);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
ngen_CallRuntime(ReadMem32);
|
||||
break;
|
||||
|
||||
case 8:
|
||||
ngen_CallRuntime(ReadMem64);
|
||||
break;
|
||||
|
||||
default:
|
||||
die("1..8 bytes");
|
||||
break;
|
||||
}
|
||||
|
||||
if (size != 8)
|
||||
host_reg_to_shil_param(op.rd, w0);
|
||||
else
|
||||
{
|
||||
#ifdef EXPLODE_SPANS
|
||||
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
|
||||
Fmov(regalloc.MapVRegister(op.rd, 0), w0);
|
||||
Lsr(x0, x0, 32);
|
||||
Fmov(regalloc.MapVRegister(op.rd, 1), w0);
|
||||
#else
|
||||
host_reg_to_shil_param(op.rd, x0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
GenReadMemory(op, i);
|
||||
break;
|
||||
|
||||
case shop_writem:
|
||||
{
|
||||
GenMemAddr(op, call_regs[0]);
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
if (size != 8)
|
||||
shil_param_to_host_reg(op.rs2, *call_regs[1]);
|
||||
else
|
||||
{
|
||||
#ifdef EXPLODE_SPANS
|
||||
verify(op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1));
|
||||
Fmov(*call_regs[1], regalloc.MapVRegister(op.rs2, 1));
|
||||
Lsl(*call_regs64[1], *call_regs64[1], 32);
|
||||
Fmov(w2, regalloc.MapVRegister(op.rs2, 0));
|
||||
Orr(*call_regs64[1], *call_regs64[1], x2);
|
||||
#else
|
||||
shil_param_to_host_reg(op.rs2, *call_regs64[1]);
|
||||
#endif
|
||||
}
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case 1:
|
||||
ngen_CallRuntime(WriteMem8);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
ngen_CallRuntime(WriteMem16);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
ngen_CallRuntime(WriteMem32);
|
||||
break;
|
||||
|
||||
case 8:
|
||||
ngen_CallRuntime(WriteMem64);
|
||||
break;
|
||||
|
||||
default:
|
||||
die("1..8 bytes");
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
GenWriteMemory(op, i);
|
||||
break;
|
||||
|
||||
case shop_sync_sr:
|
||||
ngen_CallRuntime(UpdateSR);
|
||||
GenCallRuntime(UpdateSR);
|
||||
break;
|
||||
case shop_sync_fpscr:
|
||||
ngen_CallRuntime(UpdateFPSCR);
|
||||
GenCallRuntime(UpdateFPSCR);
|
||||
break;
|
||||
|
||||
case shop_swaplb:
|
||||
|
@ -731,11 +531,7 @@ public:
|
|||
Ldr(x9, MemOperand(x9));
|
||||
Sub(x1, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, sq_buffer));
|
||||
}
|
||||
if (!frame_reg_saved)
|
||||
{
|
||||
Str(x30, MemOperand(sp, -16, PreIndex));
|
||||
frame_reg_saved = true;
|
||||
}
|
||||
SaveFramePointer();
|
||||
if (op.flags == 0x1337)
|
||||
Blr(x9);
|
||||
else
|
||||
|
@ -921,7 +717,7 @@ public:
|
|||
}
|
||||
Str(w10, sh4_context_mem_operand(&next_pc));
|
||||
|
||||
ngen_CallRuntime(UpdateINTC);
|
||||
GenCallRuntime(UpdateINTC);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -932,31 +728,7 @@ public:
|
|||
Ldr(x30, MemOperand(sp, 16, PostIndex));
|
||||
Ret();
|
||||
|
||||
Label code_end;
|
||||
Bind(&code_end);
|
||||
|
||||
FinalizeCode();
|
||||
|
||||
block->code = GetBuffer()->GetStartAddress<DynarecCodeEntryPtr>();
|
||||
block->host_code_size = GetBuffer()->GetSizeInBytes();
|
||||
block->host_opcodes = GetLabelAddress<u32*>(&code_end) - GetBuffer()->GetStartAddress<u32*>();
|
||||
|
||||
emit_Skip(block->host_code_size);
|
||||
CacheFlush((void*)block->code, GetBuffer()->GetEndAddress<void*>());
|
||||
#if 0
|
||||
Instruction* instr_start = GetBuffer()->GetStartAddress<Instruction*>();
|
||||
Instruction* instr_end = GetLabelAddress<Instruction*>(&code_end);
|
||||
Decoder decoder;
|
||||
Disassembler disasm;
|
||||
decoder.AppendVisitor(&disasm);
|
||||
Instruction* instr;
|
||||
for (instr = instr_start; instr < instr_end; instr += kInstructionSize) {
|
||||
decoder.Decode(instr);
|
||||
printf("VIXL\t %p:\t%s\n",
|
||||
reinterpret_cast<void*>(instr),
|
||||
disasm.GetOutput());
|
||||
}
|
||||
#endif
|
||||
Finalize();
|
||||
}
|
||||
|
||||
void ngen_CC_Start(shil_opcode* op)
|
||||
|
@ -1037,7 +809,7 @@ public:
|
|||
break;
|
||||
}
|
||||
}
|
||||
ngen_CallRuntime((void (*)())function);
|
||||
GenCallRuntime((void (*)())function);
|
||||
}
|
||||
|
||||
MemOperand sh4_context_mem_operand(void *p)
|
||||
|
@ -1047,7 +819,346 @@ public:
|
|||
return MemOperand(x28, offset);
|
||||
}
|
||||
|
||||
void GenReadMemorySlow(const shil_opcode& op)
|
||||
{
|
||||
Instruction *start_instruction = GetCursorAddress<Instruction *>();
|
||||
u32 size = op.flags & 0x7f;
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case 1:
|
||||
GenCallRuntime(ReadMem8);
|
||||
Sxtb(w0, w0);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
GenCallRuntime(ReadMem16);
|
||||
Sxth(w0, w0);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
GenCallRuntime(ReadMem32);
|
||||
break;
|
||||
|
||||
case 8:
|
||||
GenCallRuntime(ReadMem64);
|
||||
break;
|
||||
|
||||
default:
|
||||
die("1..8 bytes");
|
||||
break;
|
||||
}
|
||||
|
||||
if (size != 8)
|
||||
host_reg_to_shil_param(op.rd, w0);
|
||||
else
|
||||
{
|
||||
#ifdef EXPLODE_SPANS
|
||||
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
|
||||
Fmov(regalloc.MapVRegister(op.rd, 0), w0);
|
||||
Lsr(x0, x0, 32);
|
||||
Fmov(regalloc.MapVRegister(op.rd, 1), w0);
|
||||
#else
|
||||
host_reg_to_shil_param(op.rd, x0);
|
||||
#endif
|
||||
}
|
||||
EnsureCodeSize(start_instruction, read_memory_rewrite_size);
|
||||
}
|
||||
|
||||
void GenWriteMemorySlow(const shil_opcode& op)
|
||||
{
|
||||
Instruction *start_instruction = GetCursorAddress<Instruction *>();
|
||||
u32 size = op.flags & 0x7f;
|
||||
switch (size)
|
||||
{
|
||||
case 1:
|
||||
GenCallRuntime(WriteMem8);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
GenCallRuntime(WriteMem16);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
GenCallRuntime(WriteMem32);
|
||||
break;
|
||||
|
||||
case 8:
|
||||
GenCallRuntime(WriteMem64);
|
||||
break;
|
||||
|
||||
default:
|
||||
die("1..8 bytes");
|
||||
break;
|
||||
}
|
||||
EnsureCodeSize(start_instruction, write_memory_rewrite_size);
|
||||
}
|
||||
|
||||
void InitializeRewrite(RuntimeBlockInfo *block, size_t opid)
|
||||
{
|
||||
regalloc.DoAlloc(block);
|
||||
regalloc.current_opid = opid;
|
||||
frame_reg_saved = true;
|
||||
}
|
||||
|
||||
void Finalize(bool rewrite = false)
|
||||
{
|
||||
Label code_end;
|
||||
Bind(&code_end);
|
||||
|
||||
FinalizeCode();
|
||||
|
||||
if (!rewrite)
|
||||
{
|
||||
block->code = GetBuffer()->GetStartAddress<DynarecCodeEntryPtr>();
|
||||
block->host_code_size = GetBuffer()->GetSizeInBytes();
|
||||
block->host_opcodes = GetLabelAddress<u32*>(&code_end) - GetBuffer()->GetStartAddress<u32*>();
|
||||
|
||||
emit_Skip(block->host_code_size);
|
||||
}
|
||||
CacheFlush(GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());
|
||||
#if 0
|
||||
if (rewrite)
|
||||
{
|
||||
Instruction* instr_start = GetBuffer()->GetStartAddress<Instruction*>();
|
||||
Instruction* instr_end = GetLabelAddress<Instruction*>(&code_end);
|
||||
// Instruction* instr_end = (Instruction*)((u8 *)block->code + block->host_code_size);
|
||||
Decoder decoder;
|
||||
Disassembler disasm;
|
||||
decoder.AppendVisitor(&disasm);
|
||||
Instruction* instr;
|
||||
for (instr = instr_start; instr < instr_end; instr += kInstructionSize) {
|
||||
decoder.Decode(instr);
|
||||
printf("VIXL\t %p:\t%s\n",
|
||||
reinterpret_cast<void*>(instr),
|
||||
disasm.GetOutput());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename R, typename... P>
|
||||
void GenCallRuntime(R (*function)(P...))
|
||||
{
|
||||
SaveFramePointer();
|
||||
uintptr_t offset = reinterpret_cast<uintptr_t>(function) - GetBuffer()->GetStartAddress<uintptr_t>();
|
||||
Label function_label;
|
||||
BindToOffset(&function_label, offset);
|
||||
Bl(&function_label);
|
||||
}
|
||||
|
||||
void GenReadMemory(const shil_opcode& op, size_t opid)
|
||||
{
|
||||
u32 size = op.flags & 0x7f;
|
||||
|
||||
if (GenReadMemoryImmediate(op))
|
||||
return;
|
||||
|
||||
SaveFramePointer(); // needed if rewritten
|
||||
|
||||
GenMemAddr(op, call_regs[0]);
|
||||
|
||||
if (GenReadMemoryFast(op, opid))
|
||||
return;
|
||||
|
||||
GenReadMemorySlow(op);
|
||||
}
|
||||
|
||||
bool GenReadMemoryImmediate(const shil_opcode& op)
|
||||
{
|
||||
if (!op.rs1.is_imm())
|
||||
return false;
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
bool isram = false;
|
||||
void* ptr = _vmem_read_const(op.rs1._imm, isram, size);
|
||||
|
||||
if (isram)
|
||||
{
|
||||
Ldr(x1, reinterpret_cast<uintptr_t>(ptr));
|
||||
switch (size)
|
||||
{
|
||||
case 2:
|
||||
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1, xzr, SXTW));
|
||||
break;
|
||||
|
||||
case 4:
|
||||
if (op.rd.is_r32f())
|
||||
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1));
|
||||
else
|
||||
Ldr(regalloc.MapRegister(op.rd), MemOperand(x1));
|
||||
break;
|
||||
|
||||
default:
|
||||
die("Invalid size");
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not RAM
|
||||
Mov(w0, op.rs1._imm);
|
||||
|
||||
switch(size)
|
||||
{
|
||||
case 1:
|
||||
GenCallRuntime((void (*)())ptr);
|
||||
Sxtb(w0, w0);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
GenCallRuntime((void (*)())ptr);
|
||||
Sxth(w0, w0);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
GenCallRuntime((void (*)())ptr);
|
||||
break;
|
||||
|
||||
case 8:
|
||||
die("SZ_64F not supported");
|
||||
break;
|
||||
}
|
||||
|
||||
if (regalloc.IsAllocg(op.rd))
|
||||
Mov(regalloc.MapRegister(op.rd), w0);
|
||||
else
|
||||
Fmov(regalloc.MapVRegister(op.rd), w0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GenReadMemoryFast(const shil_opcode& op, size_t opid)
|
||||
{
|
||||
// Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite()
|
||||
if (!_nvmem_enabled())
|
||||
return false;
|
||||
|
||||
Instruction *start_instruction = GetCursorAddress<Instruction *>();
|
||||
|
||||
// WARNING: the rewrite code relies on having two ops before the memory access
|
||||
// Update ngen_Rewrite (and perhaps read_memory_rewrite_size) if adding or removing code
|
||||
Add(w1, *call_regs[0], sizeof(Sh4Context), LeaveFlags);
|
||||
Bfc(w1, 29, 3); // addr &= ~0xE0000000
|
||||
|
||||
//printf("direct read memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress<void *>(), this->block->addr);
|
||||
this->block->memory_accesses[GetCursorAddress<void *>()] = (u32)opid;
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
switch(size)
|
||||
{
|
||||
case 1:
|
||||
Ldrsb(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW));
|
||||
break;
|
||||
|
||||
case 2:
|
||||
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW));
|
||||
break;
|
||||
|
||||
case 4:
|
||||
if (!op.rd.is_r32f())
|
||||
Ldr(regalloc.MapRegister(op.rd), MemOperand(x28, x1));
|
||||
else
|
||||
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x28, x1));
|
||||
break;
|
||||
|
||||
case 8:
|
||||
Ldr(x1, MemOperand(x28, x1));
|
||||
break;
|
||||
}
|
||||
|
||||
if (size == 8)
|
||||
{
|
||||
#ifdef EXPLODE_SPANS
|
||||
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
|
||||
Fmov(regalloc.MapVRegister(op.rd, 0), w1);
|
||||
Lsr(x1, x1, 32);
|
||||
Fmov(regalloc.MapVRegister(op.rd, 1), w1);
|
||||
#else
|
||||
Str(x1, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
#endif
|
||||
}
|
||||
EnsureCodeSize(start_instruction, read_memory_rewrite_size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void GenWriteMemory(const shil_opcode& op, size_t opid)
|
||||
{
|
||||
SaveFramePointer(); // needed if rewritten
|
||||
|
||||
GenMemAddr(op, call_regs[0]);
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
if (size != 8)
|
||||
shil_param_to_host_reg(op.rs2, *call_regs[1]);
|
||||
else
|
||||
{
|
||||
#ifdef EXPLODE_SPANS
|
||||
verify(op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1));
|
||||
Fmov(*call_regs[1], regalloc.MapVRegister(op.rs2, 1));
|
||||
Lsl(*call_regs64[1], *call_regs64[1], 32);
|
||||
Fmov(w2, regalloc.MapVRegister(op.rs2, 0));
|
||||
Orr(*call_regs64[1], *call_regs64[1], x2);
|
||||
#else
|
||||
shil_param_to_host_reg(op.rs2, *call_regs64[1]);
|
||||
#endif
|
||||
}
|
||||
if (GenWriteMemoryFast(op, opid))
|
||||
return;
|
||||
|
||||
GenWriteMemorySlow(op);
|
||||
}
|
||||
|
||||
bool GenWriteMemoryFast(const shil_opcode& op, size_t opid)
|
||||
{
|
||||
// Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite()
|
||||
if (!_nvmem_enabled())
|
||||
return false;
|
||||
|
||||
Instruction *start_instruction = GetCursorAddress<Instruction *>();
|
||||
|
||||
// WARNING: the rewrite code relies on having two ops before the memory access
|
||||
// Update ngen_Rewrite (and perhaps write_memory_rewrite_size) if adding or removing code
|
||||
Add(w7, *call_regs[0], sizeof(Sh4Context), LeaveFlags);
|
||||
Bfc(w7, 29, 3); // addr &= ~0xE0000000
|
||||
|
||||
//printf("direct write memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress<void *>(), this->block->addr);
|
||||
this->block->memory_accesses[GetCursorAddress<void *>()] = (u32)opid;
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
switch(size)
|
||||
{
|
||||
case 1:
|
||||
Strb(w1, MemOperand(x28, x7, SXTW));
|
||||
break;
|
||||
|
||||
case 2:
|
||||
Strh(w1, MemOperand(x28, x7, SXTW));
|
||||
break;
|
||||
|
||||
case 4:
|
||||
Str(w1, MemOperand(x28, x7));
|
||||
break;
|
||||
|
||||
case 8:
|
||||
Str(x1, MemOperand(x28, x7));
|
||||
break;
|
||||
}
|
||||
EnsureCodeSize(start_instruction, write_memory_rewrite_size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void EnsureCodeSize(Instruction *start_instruction, int code_size)
|
||||
{
|
||||
while (GetCursorAddress<Instruction *>() - start_instruction < code_size * kInstructionSize)
|
||||
Nop();
|
||||
verify (GetCursorAddress<Instruction *>() - start_instruction == code_size * kInstructionSize);
|
||||
}
|
||||
|
||||
void CheckBlock(RuntimeBlockInfo* block)
|
||||
{
|
||||
s32 sz = block->sh4_code_size;
|
||||
|
@ -1108,10 +1219,7 @@ private:
|
|||
else if (param.is_reg())
|
||||
{
|
||||
if (param.is_r64f())
|
||||
{
|
||||
// TODO use regalloc
|
||||
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
|
||||
}
|
||||
else if (param.is_r32f())
|
||||
Fmov(reg, regalloc.MapVRegister(param));
|
||||
else
|
||||
|
@ -1127,7 +1235,6 @@ private:
|
|||
{
|
||||
if (reg.Is64Bits())
|
||||
{
|
||||
// TODO use regalloc
|
||||
Str((const Register&)reg, sh4_context_mem_operand(param.reg_ptr()));
|
||||
}
|
||||
else if (regalloc.IsAllocg(param))
|
||||
|
@ -1146,6 +1253,15 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
void SaveFramePointer()
|
||||
{
|
||||
if (!frame_reg_saved)
|
||||
{
|
||||
Str(x30, MemOperand(sp, -16, PreIndex));
|
||||
frame_reg_saved = true;
|
||||
}
|
||||
}
|
||||
|
||||
struct CC_PS
|
||||
{
|
||||
CanonicalParamType type;
|
||||
|
@ -1157,7 +1273,10 @@ private:
|
|||
std::vector<const VRegister*> call_fregs;
|
||||
Arm64RegAlloc regalloc;
|
||||
bool frame_reg_saved = false;
|
||||
std::map<void*, Literal<uintptr_t>*> function_literals;
|
||||
RuntimeBlockInfo* block;
|
||||
const int read_memory_rewrite_size = 6; // worst case for u64: add, bfc, ldr, fmov, lsr, fmov
|
||||
// FIXME rewrite size per read/write size?
|
||||
const int write_memory_rewrite_size = 3;
|
||||
};
|
||||
|
||||
static Arm64Assembler* compiler;
|
||||
|
@ -1194,6 +1313,39 @@ void ngen_CC_Finish(shil_opcode* op)
|
|||
|
||||
}
|
||||
|
||||
bool ngen_Rewrite(unat& host_pc, unat, unat)
|
||||
{
|
||||
u32 guest_pc = p_sh4rcb->cntx.pc;
|
||||
//printf("ngen_Rewrite pc %p code addr %08x\n", host_pc, guest_pc);
|
||||
RuntimeBlockInfo *block = bm_GetBlock(guest_pc);
|
||||
if (block == NULL)
|
||||
{
|
||||
printf("ngen_Rewrite: Block at %08x not found\n", guest_pc);
|
||||
return false;
|
||||
}
|
||||
u32 *code_ptr = (u32*)host_pc;
|
||||
auto it = block->memory_accesses.find(code_ptr);
|
||||
if (it == block->memory_accesses.end())
|
||||
{
|
||||
printf("ngen_Rewrite: memory access at %p not found (%lu entries)\n", code_ptr, block->memory_accesses.size());
|
||||
return false;
|
||||
}
|
||||
u32 opid = it->second;
|
||||
verify(opid < block->oplist.size());
|
||||
const shil_opcode& op = block->oplist[opid];
|
||||
Arm64Assembler *assembler = new Arm64Assembler(code_ptr - 2); // Skip the 2 preceding ops (bic, add)
|
||||
assembler->InitializeRewrite(block, opid);
|
||||
if (op.op == shop_readm)
|
||||
assembler->GenReadMemorySlow(op);
|
||||
else
|
||||
assembler->GenWriteMemorySlow(op);
|
||||
assembler->Finalize(true);
|
||||
delete assembler;
|
||||
host_pc = (unat)(code_ptr - 2);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Arm64RegAlloc::Preload(u32 reg, eReg nreg)
|
||||
{
|
||||
assembler->Ldr(Register(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
|
||||
|
|
Loading…
Reference in New Issue