1415 lines
32 KiB
C++
1415 lines
32 KiB
C++
#include "build.h"
|
|
|
|
#if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X64
|
|
|
|
//#define CANONICAL_TEST
|
|
|
|
#include <xbyak/xbyak.h>
|
|
#include <xbyak/xbyak_util.h>
|
|
using namespace Xbyak::util;
|
|
|
|
#include "types.h"
|
|
#include "hw/sh4/sh4_opcode_list.h"
|
|
#include "hw/sh4/dyna/ngen.h"
|
|
#include "hw/sh4/modules/mmu.h"
|
|
#include "hw/sh4/sh4_interrupts.h"
|
|
|
|
#include "hw/sh4/sh4_core.h"
|
|
#include "hw/sh4/sh4_mem.h"
|
|
#include "x64_regalloc.h"
|
|
#include "xbyak_base.h"
|
|
#include "oslib/unwind_info.h"
|
|
#include "oslib/virtmem.h"
|
|
|
|
static void (*mainloop)();
|
|
static void (*handleException)();
|
|
|
|
static u64 jmp_rsp;
|
|
|
|
namespace MemSize {
|
|
enum {
|
|
S8,
|
|
S16,
|
|
S32,
|
|
S64,
|
|
Count
|
|
};
|
|
}
|
|
namespace MemOp {
|
|
enum {
|
|
R,
|
|
W,
|
|
Count
|
|
};
|
|
}
|
|
namespace MemType {
|
|
enum {
|
|
Fast,
|
|
StoreQueue,
|
|
Slow,
|
|
Count
|
|
};
|
|
}
|
|
|
|
static const void *MemHandlers[MemType::Count][MemSize::Count][MemOp::Count];
|
|
static const u8 *MemHandlerStart, *MemHandlerEnd;
|
|
static UnwindInfo unwinder;
|
|
#ifndef _WIN32
|
|
static float xmmSave[4];
|
|
#endif
|
|
|
|
static void ngen_blockcheckfail(u32 pc) {
|
|
//printf("X64 JIT: SMC invalidation at %08X\n", pc);
|
|
rdv_BlockCheckFail(pc);
|
|
}
|
|
|
|
static void handle_sh4_exception(SH4ThrownException& ex, u32 pc)
|
|
{
|
|
if (pc & 1)
|
|
{
|
|
// Delay slot
|
|
AdjustDelaySlotException(ex);
|
|
pc--;
|
|
}
|
|
Do_Exception(pc, ex.expEvn);
|
|
p_sh4rcb->cntx.cycle_counter += 4; // probably more is needed
|
|
handleException();
|
|
}
|
|
|
|
static void interpreter_fallback(u16 op, OpCallFP *oph, u32 pc)
|
|
{
|
|
try {
|
|
oph(op);
|
|
} catch (SH4ThrownException& ex) {
|
|
handle_sh4_exception(ex, pc);
|
|
}
|
|
}
|
|
|
|
static void do_sqw_mmu_no_ex(u32 addr, u32 pc)
|
|
{
|
|
try {
|
|
do_sqw_mmu(addr);
|
|
} catch (SH4ThrownException& ex) {
|
|
handle_sh4_exception(ex, pc);
|
|
}
|
|
}
|
|
|
|
const std::array<Xbyak::Reg32, 4> call_regs
|
|
#ifdef _WIN32
|
|
{ ecx, edx, r8d, r9d };
|
|
#else
|
|
{ edi, esi, edx, ecx };
|
|
#endif
|
|
const std::array<Xbyak::Reg64, 4> call_regs64
|
|
#ifdef _WIN32
|
|
{ rcx, rdx, r8, r9 };
|
|
#else
|
|
{ rdi, rsi, rdx, rcx };
|
|
#endif
|
|
const std::array<Xbyak::Xmm, 4> call_regsxmm { xmm0, xmm1, xmm2, xmm3 };
|
|
|
|
#ifdef _WIN32
|
|
constexpr u32 STACK_ALIGN = 0x28; // 32-byte shadow space + 8 byte alignment
|
|
#else
|
|
constexpr u32 STACK_ALIGN = 8;
|
|
#endif
|
|
|
|
class BlockCompiler : public BaseXbyakRec<BlockCompiler, true>
|
|
{
|
|
public:
|
|
using BaseCompiler = BaseXbyakRec<BlockCompiler, true>;
|
|
friend class BaseXbyakRec<BlockCompiler, true>;
|
|
|
|
BlockCompiler(Sh4CodeBuffer& codeBuffer) : BaseCompiler(codeBuffer), regalloc(this) { }
|
|
BlockCompiler(Sh4CodeBuffer& codeBuffer, u8 *code_ptr) : BaseCompiler(codeBuffer, code_ptr), regalloc(this) { }
|
|
|
|
void compile(RuntimeBlockInfo* block, bool force_checks, bool optimise)
|
|
{
|
|
//printf("X86_64 compiling %08x to %p\n", block->addr, codeBuffer.get());
|
|
current_opid = -1;
|
|
|
|
CheckBlock(force_checks, block);
|
|
|
|
sub(rsp, STACK_ALIGN);
|
|
|
|
if (mmu_enabled() && block->has_fpu_op)
|
|
{
|
|
Xbyak::Label fpu_enabled;
|
|
mov(rax, (uintptr_t)&sr);
|
|
test(dword[rax], 0x8000); // test SR.FD bit
|
|
jz(fpu_enabled);
|
|
mov(call_regs[0], block->vaddr); // pc
|
|
mov(call_regs[1], Sh4Ex_FpuDisabled);// exception code
|
|
GenCall((void (*)())Do_Exception);
|
|
jmp(exit_block, T_NEAR);
|
|
L(fpu_enabled);
|
|
}
|
|
mov(rax, (uintptr_t)&p_sh4rcb->cntx.cycle_counter);
|
|
sub(dword[rax], block->guest_cycles);
|
|
|
|
regalloc.DoAlloc(block);
|
|
|
|
for (current_opid = 0; current_opid < block->oplist.size(); current_opid++)
|
|
{
|
|
shil_opcode& op = block->oplist[current_opid];
|
|
|
|
regalloc.OpBegin(&op, current_opid);
|
|
|
|
switch (op.op)
|
|
{
|
|
case shop_ifb:
|
|
if (mmu_enabled())
|
|
{
|
|
mov(call_regs64[1], reinterpret_cast<uintptr_t>(*OpDesc[op.rs3._imm]->oph)); // op handler
|
|
mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
|
}
|
|
|
|
if (op.rs1._imm)
|
|
{
|
|
mov(rax, (size_t)&next_pc);
|
|
mov(dword[rax], op.rs2._imm);
|
|
}
|
|
|
|
mov(call_regs[0], op.rs3._imm);
|
|
|
|
if (!mmu_enabled())
|
|
GenCall(OpDesc[op.rs3._imm]->oph);
|
|
else
|
|
GenCall(interpreter_fallback);
|
|
|
|
break;
|
|
|
|
case shop_mov64:
|
|
{
|
|
verify(op.rd.is_r64f());
|
|
verify(op.rs1.is_r64f());
|
|
|
|
#if ALLOC_F64 == false
|
|
mov(rax, (uintptr_t)op.rs1.reg_ptr());
|
|
mov(rax, qword[rax]);
|
|
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
|
mov(qword[rcx], rax);
|
|
#else
|
|
Xbyak::Xmm rd0 = regalloc.MapXRegister(op.rd, 0);
|
|
Xbyak::Xmm rs0 = regalloc.MapXRegister(op.rs1, 0);
|
|
Xbyak::Xmm rd1 = regalloc.MapXRegister(op.rd, 1);
|
|
Xbyak::Xmm rs1 = regalloc.MapXRegister(op.rs1, 1);
|
|
if (rd0 == rs1)
|
|
{
|
|
movss(xmm0, rd0);
|
|
movss(rd0, rs0);
|
|
movss(rd1, xmm0);
|
|
}
|
|
else
|
|
{
|
|
if (rd0 != rs0)
|
|
movss(rd0, rs0);
|
|
if (rd1 != rs1)
|
|
movss(rd1, rs1);
|
|
}
|
|
#endif
|
|
}
|
|
break;
|
|
|
|
case shop_readm:
|
|
if (!GenReadMemImmediate(op, block))
|
|
{
|
|
// Not an immediate address
|
|
shil_param_to_host_reg(op.rs1, call_regs[0]);
|
|
if (!op.rs3.is_null())
|
|
{
|
|
if (op.rs3.is_imm())
|
|
add(call_regs[0], op.rs3._imm);
|
|
else if (regalloc.IsAllocg(op.rs3))
|
|
add(call_regs[0], regalloc.MapRegister(op.rs3));
|
|
else
|
|
{
|
|
mov(rax, (uintptr_t)op.rs3.reg_ptr());
|
|
add(call_regs[0], dword[rax]);
|
|
}
|
|
}
|
|
genMmuLookup(block, op, 0);
|
|
|
|
int size = op.size == 1 ? MemSize::S8 : op.size == 2 ? MemSize::S16 : op.size == 4 ? MemSize::S32 : MemSize::S64;
|
|
GenCall((void (*)())MemHandlers[optimise ? MemType::Fast : MemType::Slow][size][MemOp::R], mmu_enabled());
|
|
|
|
#if ALLOC_F64 == false
|
|
if (size == MemSize::S64)
|
|
{
|
|
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
|
mov(qword[rcx], rax);
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
mov(rcx, rax);
|
|
host_reg_to_shil_param(op.rd, rcx);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case shop_writem:
|
|
{
|
|
if (!GenWriteMemImmediate(op, block))
|
|
{
|
|
shil_param_to_host_reg(op.rs1, call_regs[0]);
|
|
if (!op.rs3.is_null())
|
|
{
|
|
if (op.rs3.is_imm())
|
|
add(call_regs[0], op.rs3._imm);
|
|
else if (regalloc.IsAllocg(op.rs3))
|
|
add(call_regs[0], regalloc.MapRegister(op.rs3));
|
|
else
|
|
{
|
|
mov(rax, (uintptr_t)op.rs3.reg_ptr());
|
|
add(call_regs[0], dword[rax]);
|
|
}
|
|
}
|
|
genMmuLookup(block, op, 1);
|
|
|
|
#if ALLOC_F64 == false
|
|
if (op.size == 8)
|
|
{
|
|
mov(rax, (uintptr_t)op.rs2.reg_ptr());
|
|
mov(call_regs64[1], qword[rax]);
|
|
}
|
|
else
|
|
#endif
|
|
shil_param_to_host_reg(op.rs2, call_regs64[1]);
|
|
|
|
int size = op.size == 1 ? MemSize::S8 : op.size == 2 ? MemSize::S16 : op.size == 4 ? MemSize::S32 : MemSize::S64;
|
|
GenCall((void (*)())MemHandlers[optimise ? MemType::Fast : MemType::Slow][size][MemOp::W], mmu_enabled());
|
|
}
|
|
}
|
|
break;
|
|
|
|
case shop_jcond:
|
|
case shop_jdyn:
|
|
case shop_mov32:
|
|
genBaseOpcode(op);
|
|
break;
|
|
|
|
#ifndef CANONICAL_TEST
|
|
case shop_sync_sr:
|
|
GenCall(UpdateSR);
|
|
break;
|
|
case shop_sync_fpscr:
|
|
GenCall(UpdateFPSCR);
|
|
break;
|
|
|
|
case shop_negc:
|
|
{
|
|
Xbyak::Reg32 rs2;
|
|
if (op.rs2.is_reg())
|
|
{
|
|
rs2 = regalloc.MapRegister(op.rs2);
|
|
if (regalloc.mapg(op.rd) == regalloc.mapg(op.rs2))
|
|
{
|
|
mov(ecx, rs2);
|
|
rs2 = ecx;
|
|
}
|
|
}
|
|
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
|
|
if (op.rs1.is_imm())
|
|
mov(rd, op.rs1.imm_value());
|
|
else if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
|
|
mov(rd, regalloc.MapRegister(op.rs1));
|
|
Xbyak::Reg64 rd64 = rd.cvt64();
|
|
neg(rd64);
|
|
if (op.rs2.is_imm())
|
|
sub(rd64, op.rs2.imm_value());
|
|
else
|
|
sub(rd64, rs2.cvt64());
|
|
Xbyak::Reg64 rd2_64 = regalloc.MapRegister(op.rd2).cvt64();
|
|
mov(rd2_64, rd64);
|
|
shr(rd2_64, 63);
|
|
}
|
|
break;
|
|
|
|
case shop_mul_s64:
|
|
movsxd(rax, regalloc.MapRegister(op.rs1));
|
|
if (op.rs2.is_reg())
|
|
movsxd(rcx, regalloc.MapRegister(op.rs2));
|
|
else
|
|
mov(rcx, (s64)(s32)op.rs2._imm);
|
|
mul(rcx);
|
|
mov(regalloc.MapRegister(op.rd), eax);
|
|
shr(rax, 32);
|
|
mov(regalloc.MapRegister(op.rd2), eax);
|
|
break;
|
|
|
|
case shop_pref:
|
|
{
|
|
Xbyak::Label no_sqw;
|
|
if (op.rs1.is_imm())
|
|
{
|
|
// this test shouldn't be necessary
|
|
if ((op.rs1._imm & 0xFC000000) != 0xE0000000)
|
|
break;
|
|
|
|
mov(call_regs[0], op.rs1._imm);
|
|
}
|
|
else
|
|
{
|
|
Xbyak::Reg32 rn;
|
|
if (regalloc.IsAllocg(op.rs1))
|
|
{
|
|
rn = regalloc.MapRegister(op.rs1);
|
|
}
|
|
else
|
|
{
|
|
mov(rax, (uintptr_t)op.rs1.reg_ptr());
|
|
mov(eax, dword[rax]);
|
|
rn = eax;
|
|
}
|
|
mov(ecx, rn);
|
|
shr(ecx, 26);
|
|
cmp(ecx, 0x38);
|
|
jne(no_sqw);
|
|
|
|
mov(call_regs[0], rn);
|
|
}
|
|
if (mmu_enabled())
|
|
{
|
|
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
|
|
|
GenCall(do_sqw_mmu_no_ex);
|
|
}
|
|
else
|
|
{
|
|
mov(call_regs64[1], (uintptr_t)sq_both);
|
|
mov(rax, (size_t)&do_sqw_nommu);
|
|
saveXmmRegisters();
|
|
call(qword[rax]);
|
|
restoreXmmRegisters();
|
|
}
|
|
L(no_sqw);
|
|
}
|
|
break;
|
|
|
|
case shop_frswap:
|
|
mov(rax, (uintptr_t)op.rs1.reg_ptr());
|
|
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
|
if (cpu.has(Cpu::tAVX512F))
|
|
{
|
|
vmovaps(zmm0, zword[rax]);
|
|
vmovaps(zmm1, zword[rcx]);
|
|
vmovaps(zword[rax], zmm1);
|
|
vmovaps(zword[rcx], zmm0);
|
|
}
|
|
else if (cpu.has(Cpu::tAVX))
|
|
{
|
|
vmovaps(ymm0, yword[rax]);
|
|
vmovaps(ymm1, yword[rcx]);
|
|
vmovaps(yword[rax], ymm1);
|
|
vmovaps(yword[rcx], ymm0);
|
|
|
|
vmovaps(ymm0, yword[rax + 32]);
|
|
vmovaps(ymm1, yword[rcx + 32]);
|
|
vmovaps(yword[rax + 32], ymm1);
|
|
vmovaps(yword[rcx + 32], ymm0);
|
|
}
|
|
else
|
|
{
|
|
for (int i = 0; i < 4; i++)
|
|
{
|
|
movaps(xmm0, xword[rax + (i * 16)]);
|
|
movaps(xmm1, xword[rcx + (i * 16)]);
|
|
movaps(xword[rax + (i * 16)], xmm1);
|
|
movaps(xword[rcx + (i * 16)], xmm0);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case shop_fmac:
|
|
{
|
|
Xbyak::Xmm rs1 = regalloc.MapXRegister(op.rs1);
|
|
Xbyak::Xmm rs2 = regalloc.MapXRegister(op.rs2);
|
|
Xbyak::Xmm rs3 = regalloc.MapXRegister(op.rs3);
|
|
Xbyak::Xmm rd = regalloc.MapXRegister(op.rd);
|
|
if (rd == rs2)
|
|
{
|
|
movss(xmm1, rs2);
|
|
rs2 = xmm1;
|
|
}
|
|
if (rd == rs3)
|
|
{
|
|
movss(xmm2, rs3);
|
|
rs3 = xmm2;
|
|
}
|
|
if (op.rs1.is_imm()) // FIXME MapXRegister(op.rs1) would have failed
|
|
{
|
|
mov(eax, op.rs1._imm);
|
|
movd(rd, eax);
|
|
}
|
|
else if (rd != rs1)
|
|
{
|
|
movss(rd, rs1);
|
|
}
|
|
if (cpu.has(Cpu::tFMA) && !config::GGPOEnable)
|
|
vfmadd231ss(rd, rs2, rs3);
|
|
else
|
|
{
|
|
movss(xmm0, rs2);
|
|
mulss(xmm0, rs3);
|
|
addss(rd, xmm0);
|
|
}
|
|
}
|
|
break;
|
|
#endif
|
|
|
|
default:
|
|
#ifndef CANONICAL_TEST
|
|
if (!genBaseOpcode(op))
|
|
#endif
|
|
shil_chf[op.op](&op);
|
|
break;
|
|
}
|
|
regalloc.OpEnd(&op);
|
|
}
|
|
regalloc.Cleanup();
|
|
current_opid = -1;
|
|
|
|
mov(rax, (size_t)&next_pc);
|
|
|
|
switch (block->BlockType) {
|
|
|
|
case BET_StaticJump:
|
|
case BET_StaticCall:
|
|
//next_pc = block->BranchBlock;
|
|
mov(dword[rax], block->BranchBlock);
|
|
break;
|
|
|
|
case BET_Cond_0:
|
|
case BET_Cond_1:
|
|
{
|
|
//next_pc = next_pc_value;
|
|
//if (*jdyn == 0)
|
|
//next_pc = branch_pc_value;
|
|
|
|
mov(dword[rax], block->NextBlock);
|
|
|
|
if (block->has_jcond)
|
|
mov(rdx, (size_t)&Sh4cntx.jdyn);
|
|
else
|
|
mov(rdx, (size_t)&sr.T);
|
|
|
|
cmp(dword[rdx], block->BlockType & 1);
|
|
Xbyak::Label branch_not_taken;
|
|
|
|
jne(branch_not_taken, T_SHORT);
|
|
mov(dword[rax], block->BranchBlock);
|
|
L(branch_not_taken);
|
|
}
|
|
break;
|
|
|
|
case BET_DynamicJump:
|
|
case BET_DynamicCall:
|
|
case BET_DynamicRet:
|
|
//next_pc = *jdyn;
|
|
mov(rdx, (size_t)&Sh4cntx.jdyn);
|
|
mov(edx, dword[rdx]);
|
|
mov(dword[rax], edx);
|
|
break;
|
|
|
|
case BET_DynamicIntr:
|
|
case BET_StaticIntr:
|
|
if (block->BlockType == BET_DynamicIntr) {
|
|
//next_pc = *jdyn;
|
|
mov(rdx, (size_t)&Sh4cntx.jdyn);
|
|
mov(edx, dword[rdx]);
|
|
mov(dword[rax], edx);
|
|
}
|
|
else {
|
|
//next_pc = next_pc_value;
|
|
mov(dword[rax], block->NextBlock);
|
|
}
|
|
|
|
GenCall(UpdateINTC);
|
|
break;
|
|
|
|
default:
|
|
die("Invalid block end type");
|
|
}
|
|
|
|
L(exit_block);
|
|
add(rsp, STACK_ALIGN);
|
|
ret();
|
|
|
|
ready();
|
|
|
|
block->code = (DynarecCodeEntryPtr)getCode();
|
|
block->host_code_size = getSize();
|
|
|
|
codeBuffer.advance(getSize());
|
|
}
|
|
|
|
void canonStart(const shil_opcode& op)
|
|
{
|
|
CC_pars.clear();
|
|
}
|
|
|
|
void canonParam(const shil_opcode& op, const shil_param& prm, CanonicalParamType tp) {
|
|
switch (tp)
|
|
{
|
|
|
|
case CPT_u32:
|
|
case CPT_ptr:
|
|
case CPT_f32:
|
|
{
|
|
CC_PS t = { tp, &prm };
|
|
CC_pars.push_back(t);
|
|
}
|
|
break;
|
|
|
|
// store from EAX
|
|
case CPT_u64rvL:
|
|
case CPT_u32rv:
|
|
mov(rcx, rax);
|
|
host_reg_to_shil_param(prm, ecx);
|
|
break;
|
|
|
|
case CPT_u64rvH:
|
|
// assuming CPT_u64rvL has just been called
|
|
shr(rcx, 32);
|
|
host_reg_to_shil_param(prm, ecx);
|
|
break;
|
|
|
|
// store from xmm0
|
|
case CPT_f32rv:
|
|
host_reg_to_shil_param(prm, xmm0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void canonCall(const shil_opcode& op, void* function)
|
|
{
|
|
int regused = 0;
|
|
int xmmused = 0;
|
|
|
|
for (int i = CC_pars.size(); i-- > 0;)
|
|
{
|
|
verify(xmmused < 4 && regused < 4);
|
|
const shil_param& prm = *CC_pars[i].prm;
|
|
switch (CC_pars[i].type) {
|
|
//push the contents
|
|
|
|
case CPT_u32:
|
|
shil_param_to_host_reg(prm, call_regs[regused++]);
|
|
break;
|
|
|
|
case CPT_f32:
|
|
shil_param_to_host_reg(prm, call_regsxmm[xmmused++]);
|
|
break;
|
|
|
|
//push the ptr itself
|
|
case CPT_ptr:
|
|
verify(prm.is_reg());
|
|
mov(call_regs64[regused++], (size_t)prm.reg_ptr());
|
|
break;
|
|
|
|
default:
|
|
// Other cases handled in canonParam
|
|
break;
|
|
}
|
|
}
|
|
GenCall((void (*)())function);
|
|
#if ALLOC_F64 == true
|
|
for (const CC_PS& ccParam : CC_pars)
|
|
{
|
|
const shil_param& prm = *ccParam.prm;
|
|
if (ccParam.type == CPT_ptr && prm.count() == 2 && regalloc.IsAllocf(prm) && (op.rd._reg == prm._reg || op.rd2._reg == prm._reg)) {
|
|
// fsca rd param is a pointer to a 64-bit reg so reload the regs if allocated
|
|
mov(rax, (size_t)GetRegPtr(prm._reg));
|
|
movss(regalloc.MapXRegister(prm, 0), dword[rax]);
|
|
mov(rax, (size_t)GetRegPtr(prm._reg + 1));
|
|
movss(regalloc.MapXRegister(prm, 1), dword[rax]);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void RegPreload(u32 reg, Xbyak::Operand::Code nreg)
|
|
{
|
|
mov(rax, (size_t)GetRegPtr(reg));
|
|
mov(Xbyak::Reg32(nreg), dword[rax]);
|
|
}
|
|
void RegWriteback(u32 reg, Xbyak::Operand::Code nreg)
|
|
{
|
|
mov(rax, (size_t)GetRegPtr(reg));
|
|
mov(dword[rax], Xbyak::Reg32(nreg));
|
|
}
|
|
void RegPreload_FPU(u32 reg, s8 nreg)
|
|
{
|
|
mov(rax, (size_t)GetRegPtr(reg));
|
|
movss(Xbyak::Xmm(nreg), dword[rax]);
|
|
}
|
|
void RegWriteback_FPU(u32 reg, s8 nreg)
|
|
{
|
|
mov(rax, (size_t)GetRegPtr(reg));
|
|
movss(dword[rax], Xbyak::Xmm(nreg));
|
|
}
|
|
|
|
void genMainloop()
|
|
{
|
|
unwinder.start((void *)getCurr());
|
|
|
|
push(rbx);
|
|
unwinder.pushReg(getSize(), Xbyak::Operand::RBX);
|
|
push(rbp);
|
|
unwinder.pushReg(getSize(), Xbyak::Operand::RBP);
|
|
#ifdef _WIN32
|
|
push(rdi);
|
|
unwinder.pushReg(getSize(), Xbyak::Operand::RDI);
|
|
push(rsi);
|
|
unwinder.pushReg(getSize(), Xbyak::Operand::RSI);
|
|
#endif
|
|
push(r12);
|
|
unwinder.pushReg(getSize(), Xbyak::Operand::R12);
|
|
push(r13);
|
|
unwinder.pushReg(getSize(), Xbyak::Operand::R13);
|
|
push(r14);
|
|
unwinder.pushReg(getSize(), Xbyak::Operand::R14);
|
|
push(r15);
|
|
unwinder.pushReg(getSize(), Xbyak::Operand::R15);
|
|
sub(rsp, STACK_ALIGN);
|
|
unwinder.allocStack(getSize(), STACK_ALIGN);
|
|
unwinder.endProlog(getSize());
|
|
|
|
mov(qword[rip + &jmp_rsp], rsp);
|
|
|
|
//run_loop:
|
|
Xbyak::Label run_loop;
|
|
L(run_loop);
|
|
Xbyak::Label end_run_loop;
|
|
mov(rax, (size_t)&p_sh4rcb->cntx.CpuRunning);
|
|
mov(edx, dword[rax]);
|
|
|
|
test(edx, edx);
|
|
je(end_run_loop);
|
|
|
|
//slice_loop:
|
|
Xbyak::Label slice_loop;
|
|
L(slice_loop);
|
|
mov(rax, (size_t)&p_sh4rcb->cntx.pc);
|
|
mov(call_regs[0], dword[rax]);
|
|
call(bm_GetCodeByVAddr);
|
|
call(rax);
|
|
mov(rax, (uintptr_t)&p_sh4rcb->cntx.cycle_counter);
|
|
mov(ecx, dword[rax]);
|
|
test(ecx, ecx);
|
|
jg(slice_loop);
|
|
|
|
add(ecx, SH4_TIMESLICE);
|
|
mov(dword[rax], ecx);
|
|
call(UpdateSystem_INTC);
|
|
jmp(run_loop);
|
|
|
|
//end_run_loop:
|
|
L(end_run_loop);
|
|
add(rsp, STACK_ALIGN);
|
|
pop(r15);
|
|
pop(r14);
|
|
pop(r13);
|
|
pop(r12);
|
|
#ifdef _WIN32
|
|
pop(rsi);
|
|
pop(rdi);
|
|
#endif
|
|
pop(rbp);
|
|
pop(rbx);
|
|
ret();
|
|
size_t unwindSize = unwinder.end(getSize());
|
|
setSize(getSize() + unwindSize);
|
|
|
|
unwinder.start((void *)getCurr());
|
|
size_t startOffset = getSize();
|
|
#ifdef _WIN32
|
|
// 32-byte shadow space + 8 for stack 16-byte alignment
|
|
unwinder.allocStack(0, 40);
|
|
#else
|
|
// stack 16-byte alignment
|
|
unwinder.allocStack(0, 8);
|
|
#endif
|
|
unwinder.endProlog(0);
|
|
|
|
//handleException:
|
|
Xbyak::Label handleExceptionLabel;
|
|
L(handleExceptionLabel);
|
|
mov(rsp, qword[rip + &jmp_rsp]);
|
|
jmp(run_loop);
|
|
|
|
genMemHandlers();
|
|
|
|
size_t savedSize = getSize();
|
|
setSize(codeBuffer.getFreeSpace() - 128 - startOffset);
|
|
unwindSize = unwinder.end(getSize());
|
|
verify(unwindSize <= 128);
|
|
setSize(savedSize);
|
|
|
|
ready();
|
|
mainloop = (void (*)())getCode();
|
|
handleException = (void(*)())handleExceptionLabel.getAddress();
|
|
|
|
codeBuffer.advance(getSize());
|
|
}
|
|
|
|
bool rewriteMemAccess(host_context_t &context)
|
|
{
|
|
if (!addrspace::virtmemEnabled())
|
|
return false;
|
|
|
|
//printf("rewriteMemAccess pc %p\n", context.pc);
|
|
if (context.pc < (size_t)MemHandlerStart || context.pc >= (size_t)MemHandlerEnd)
|
|
return false;
|
|
|
|
u8 *retAddr = *(u8 **)context.rsp;
|
|
void *ca = *(s32 *)(retAddr - 4) + retAddr;
|
|
for (int size = 0; size < MemSize::Count; size++)
|
|
{
|
|
for (int op = 0; op < MemOp::Count; op++)
|
|
{
|
|
if ((void *)MemHandlers[MemType::Fast][size][op] != ca)
|
|
continue;
|
|
|
|
//found !
|
|
const u8 *start = getCurr();
|
|
u32 memAddress = context.r9;
|
|
if (op == MemOp::W && size >= MemSize::S32 && (memAddress >> 26) == 0x38)
|
|
call(MemHandlers[MemType::StoreQueue][size][MemOp::W]);
|
|
else
|
|
call(MemHandlers[MemType::Slow][size][op]);
|
|
verify(getCurr() - start == 5);
|
|
|
|
ready();
|
|
|
|
context.pc = (uintptr_t)(retAddr - 5);
|
|
// remove the call from the stack
|
|
context.rsp += 8;
|
|
//restore the addr from r9 to arg0 (rcx or rdi) so it's valid again
|
|
#ifdef _WIN32
|
|
context.rcx = memAddress;
|
|
#else
|
|
context.rdi = memAddress;
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
}
|
|
ERROR_LOG(DYNAREC, "rewriteMemAccess code not found: host pc %p", (void *)context.pc);
|
|
die("Failed to match the code");
|
|
|
|
return false;
|
|
}
|
|
|
|
private:
|
|
void genMmuLookup(const RuntimeBlockInfo* block, const shil_opcode& op, u32 write)
|
|
{
|
|
if (mmu_enabled())
|
|
{
|
|
#ifdef FAST_MMU
|
|
Xbyak::Label inCache;
|
|
Xbyak::Label done;
|
|
|
|
mov(eax, call_regs[0]);
|
|
shr(eax, 12);
|
|
if ((uintptr_t)mmuAddressLUT >> 32 != 0)
|
|
{
|
|
mov(r9, (uintptr_t)mmuAddressLUT);
|
|
mov(eax, dword[r9 + rax * 4]);
|
|
}
|
|
else
|
|
{
|
|
mov(eax, dword[(uintptr_t)mmuAddressLUT + rax * 4]);
|
|
}
|
|
test(eax, eax);
|
|
jne(inCache);
|
|
#endif
|
|
mov(call_regs[1], write);
|
|
mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); // pc
|
|
GenCall(mmuDynarecLookup);
|
|
mov(call_regs[0], eax);
|
|
#ifdef FAST_MMU
|
|
jmp(done);
|
|
L(inCache);
|
|
and_(call_regs[0], 0xFFF);
|
|
or_(call_regs[0], eax);
|
|
L(done);
|
|
#endif
|
|
}
|
|
}
|
|
bool GenReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* block)
|
|
{
|
|
if (!op.rs1.is_imm())
|
|
return false;
|
|
void *ptr;
|
|
bool isram;
|
|
u32 addr;
|
|
if (!rdv_readMemImmediate(op.rs1._imm, op.size, ptr, isram, addr, block))
|
|
return false;
|
|
|
|
if (isram)
|
|
{
|
|
// Immediate pointer to RAM: super-duper fast access
|
|
mov(rax, reinterpret_cast<uintptr_t>(ptr));
|
|
switch (op.size)
|
|
{
|
|
case 1:
|
|
if (regalloc.IsAllocg(op.rd))
|
|
movsx(regalloc.MapRegister(op.rd), byte[rax]);
|
|
else
|
|
{
|
|
movsx(eax, byte[rax]);
|
|
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
|
mov(dword[rcx], eax);
|
|
}
|
|
break;
|
|
|
|
case 2:
|
|
if (regalloc.IsAllocg(op.rd))
|
|
movsx(regalloc.MapRegister(op.rd), word[rax]);
|
|
else
|
|
{
|
|
movsx(eax, word[rax]);
|
|
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
|
mov(dword[rcx], eax);
|
|
}
|
|
break;
|
|
|
|
case 4:
|
|
if (regalloc.IsAllocg(op.rd))
|
|
mov(regalloc.MapRegister(op.rd), dword[rax]);
|
|
else if (regalloc.IsAllocf(op.rd))
|
|
movd(regalloc.MapXRegister(op.rd), dword[rax]);
|
|
else
|
|
{
|
|
mov(eax, dword[rax]);
|
|
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
|
mov(dword[rcx], eax);
|
|
}
|
|
break;
|
|
|
|
case 8:
|
|
#if ALLOC_F64 == false
|
|
mov(rcx, qword[rax]);
|
|
mov(rax, (uintptr_t)op.rd.reg_ptr());
|
|
mov(qword[rax], rcx);
|
|
#else
|
|
movd(regalloc.MapXRegister(op.rd, 0), dword[rax]);
|
|
movd(regalloc.MapXRegister(op.rd, 1), dword[rax + 4]);
|
|
#endif
|
|
break;
|
|
|
|
default:
|
|
die("Invalid immediate size");
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Not RAM: the returned pointer is a memory handler
|
|
if (op.size == 8)
|
|
{
|
|
// Need to call the handler twice
|
|
mov(call_regs[0], addr);
|
|
GenCall((void (*)())ptr);
|
|
#if ALLOC_F64 == false
|
|
mov(rcx, (size_t)op.rd.reg_ptr());
|
|
mov(dword[rcx], eax);
|
|
#else
|
|
movd(regalloc.MapXRegister(op.rd, 0), eax);
|
|
#endif
|
|
|
|
mov(call_regs[0], addr + 4);
|
|
GenCall((void (*)())ptr);
|
|
#if ALLOC_F64 == false
|
|
mov(rcx, (size_t)op.rd.reg_ptr() + 4);
|
|
mov(dword[rcx], eax);
|
|
#else
|
|
movd(regalloc.MapXRegister(op.rd, 1), eax);
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
mov(call_regs[0], addr);
|
|
|
|
switch(op.size)
|
|
{
|
|
case 1:
|
|
GenCall((void (*)())ptr);
|
|
movsx(eax, al);
|
|
break;
|
|
|
|
case 2:
|
|
GenCall((void (*)())ptr);
|
|
movsx(eax, ax);
|
|
break;
|
|
|
|
case 4:
|
|
GenCall((void (*)())ptr);
|
|
break;
|
|
|
|
default:
|
|
die("Invalid immediate size");
|
|
break;
|
|
}
|
|
mov(ecx, eax);
|
|
host_reg_to_shil_param(op.rd, ecx);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GenWriteMemImmediate(const shil_opcode& op, RuntimeBlockInfo* block)
|
|
{
|
|
if (!op.rs1.is_imm())
|
|
return false;
|
|
void *ptr;
|
|
bool isram;
|
|
u32 addr;
|
|
if (!rdv_writeMemImmediate(op.rs1._imm, op.size, ptr, isram, addr, block))
|
|
return false;
|
|
if (isram)
|
|
{
|
|
// Immediate pointer to RAM: super-duper fast access
|
|
mov(rax, reinterpret_cast<uintptr_t>(ptr));
|
|
switch (op.size)
|
|
{
|
|
case 1:
|
|
if (regalloc.IsAllocg(op.rs2))
|
|
mov(byte[rax], regalloc.MapRegister(op.rs2).cvt8());
|
|
else if (op.rs2.is_imm())
|
|
mov(byte[rax], (u8)op.rs2._imm);
|
|
else
|
|
{
|
|
mov(rcx, (uintptr_t)op.rs2.reg_ptr());
|
|
mov(cl, byte[rcx]);
|
|
mov(byte[rax], cl);
|
|
}
|
|
break;
|
|
|
|
case 2:
|
|
if (regalloc.IsAllocg(op.rs2))
|
|
mov(word[rax], regalloc.MapRegister(op.rs2).cvt16());
|
|
else if (op.rs2.is_imm())
|
|
mov(word[rax], (u16)op.rs2._imm);
|
|
else
|
|
{
|
|
mov(rcx, (uintptr_t)op.rs2.reg_ptr());
|
|
mov(cx, word[rcx]);
|
|
mov(word[rax], cx);
|
|
}
|
|
break;
|
|
|
|
case 4:
|
|
if (regalloc.IsAllocg(op.rs2))
|
|
mov(dword[rax], regalloc.MapRegister(op.rs2));
|
|
else if (regalloc.IsAllocf(op.rs2))
|
|
movd(dword[rax], regalloc.MapXRegister(op.rs2));
|
|
else if (op.rs2.is_imm())
|
|
mov(dword[rax], op.rs2._imm);
|
|
else
|
|
{
|
|
mov(rcx, (uintptr_t)op.rs2.reg_ptr());
|
|
mov(ecx, dword[rcx]);
|
|
mov(dword[rax], ecx);
|
|
}
|
|
break;
|
|
|
|
case 8:
|
|
#if ALLOC_F64 == false
|
|
mov(rcx, (uintptr_t)op.rs2.reg_ptr());
|
|
mov(rcx, qword[rcx]);
|
|
mov(qword[rax], rcx);
|
|
#else
|
|
movd(dword[rax], regalloc.MapXRegister(op.rs2, 0));
|
|
movd(dword[rax + 4], regalloc.MapXRegister(op.rs2, 1));
|
|
#endif
|
|
break;
|
|
|
|
default:
|
|
die("Invalid immediate size");
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Not RAM: the returned pointer is a memory handler
|
|
mov(call_regs[0], addr);
|
|
shil_param_to_host_reg(op.rs2, call_regs[1]);
|
|
|
|
GenCall((void (*)())ptr);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void CheckBlock(bool force_checks, RuntimeBlockInfo* block)
|
|
{
|
|
if (mmu_enabled() || force_checks)
|
|
mov(call_regs[0], block->addr);
|
|
|
|
// FIXME This test shouldn't be necessary
|
|
// However the decoder makes various assumptions about the current PC value, which are simply not
|
|
// true in a virtualized memory model. So this can only work if virtual and phy addresses are the
|
|
// same at compile and run times.
|
|
if (mmu_enabled())
|
|
{
|
|
mov(rax, (uintptr_t)&next_pc);
|
|
cmp(dword[rax], block->vaddr);
|
|
jne(reinterpret_cast<const void*>(&ngen_blockcheckfail));
|
|
}
|
|
|
|
if (!force_checks)
|
|
return;
|
|
|
|
s32 sz=block->sh4_code_size;
|
|
u32 sa=block->addr;
|
|
|
|
void* ptr = (void*)GetMemPtr(sa, sz > 8 ? 8 : sz);
|
|
if (ptr)
|
|
{
|
|
while (sz > 0)
|
|
{
|
|
uintptr_t uintptr = reinterpret_cast<uintptr_t>(ptr);
|
|
mov(rax, uintptr);
|
|
|
|
if (sz >= 8 && !(uintptr & 7)) {
|
|
mov(rdx, *(u64*)ptr);
|
|
cmp(qword[rax], rdx);
|
|
sz -= 8;
|
|
sa += 8;
|
|
}
|
|
else if (sz >= 4 && !(uintptr & 3)) {
|
|
mov(edx, *(u32*)ptr);
|
|
cmp(dword[rax], edx);
|
|
sz -= 4;
|
|
sa += 4;
|
|
}
|
|
else {
|
|
mov(edx, *(u16*)ptr);
|
|
cmp(word[rax],dx);
|
|
sz -= 2;
|
|
sa += 2;
|
|
}
|
|
jne(reinterpret_cast<const void*>(CC_RX2RW(&ngen_blockcheckfail)));
|
|
ptr = (void*)GetMemPtr(sa, sz > 8 ? 8 : sz);
|
|
}
|
|
}
|
|
}
|
|
|
|
void genMemHandlers()
|
|
{
|
|
// make sure the memory handlers are set
|
|
verify(ReadMem8 != nullptr);
|
|
|
|
MemHandlerStart = getCurr();
|
|
for (int type = 0; type < MemType::Count; type++)
|
|
{
|
|
for (int size = 0; size < MemSize::Count; size++)
|
|
{
|
|
for (int op = 0; op < MemOp::Count; op++)
|
|
{
|
|
MemHandlers[type][size][op] = getCurr();
|
|
if (type == MemType::Fast && addrspace::virtmemEnabled())
|
|
{
|
|
mov(rax, (uintptr_t)addrspace::ram_base);
|
|
mov(r9, call_regs64[0]);
|
|
and_(call_regs[0], 0x1FFFFFFF);
|
|
|
|
switch (size)
|
|
{
|
|
case MemSize::S8:
|
|
if (op == MemOp::R)
|
|
movsx(eax, byte[rax + call_regs64[0]]);
|
|
else
|
|
mov(byte[rax + call_regs64[0]], call_regs[1].cvt8());
|
|
break;
|
|
|
|
case MemSize::S16:
|
|
if (op == MemOp::R)
|
|
movsx(eax, word[rax + call_regs64[0]]);
|
|
else
|
|
mov(word[rax + call_regs64[0]], call_regs[1].cvt16());
|
|
break;
|
|
|
|
case MemSize::S32:
|
|
if (op == MemOp::R)
|
|
mov(eax, dword[rax + call_regs64[0]]);
|
|
else
|
|
mov(dword[rax + call_regs64[0]], call_regs[1]);
|
|
break;
|
|
|
|
case MemSize::S64:
|
|
if (op == MemOp::R)
|
|
mov(rax, qword[rax + call_regs64[0]]);
|
|
else
|
|
mov(qword[rax + call_regs64[0]], call_regs64[1]);
|
|
break;
|
|
}
|
|
}
|
|
else if (type == MemType::StoreQueue)
|
|
{
|
|
if (op != MemOp::W || size < MemSize::S32)
|
|
continue;
|
|
Xbyak::Label no_sqw;
|
|
|
|
mov(r9d, call_regs[0]);
|
|
shr(r9d, 26);
|
|
cmp(r9d, 0x38);
|
|
jne(no_sqw);
|
|
mov(rax, (uintptr_t)p_sh4rcb->sq_buffer);
|
|
and_(call_regs[0], 0x3F);
|
|
|
|
if (size == MemSize::S32)
|
|
mov(dword[rax + call_regs64[0]], call_regs[1]);
|
|
else
|
|
mov(qword[rax + call_regs64[0]], call_regs64[1]);
|
|
ret();
|
|
L(no_sqw);
|
|
if (size == MemSize::S32)
|
|
jmp((const void *)addrspace::write32); // tail call
|
|
else
|
|
jmp((const void *)addrspace::write64); // tail call
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
// Slow path
|
|
if (op == MemOp::R)
|
|
{
|
|
switch (size) {
|
|
case MemSize::S8:
|
|
sub(rsp, STACK_ALIGN);
|
|
call((const void *)addrspace::read8);
|
|
movsx(eax, al);
|
|
add(rsp, STACK_ALIGN);
|
|
break;
|
|
case MemSize::S16:
|
|
sub(rsp, STACK_ALIGN);
|
|
call((const void *)addrspace::read16);
|
|
movsx(eax, ax);
|
|
add(rsp, STACK_ALIGN);
|
|
break;
|
|
case MemSize::S32:
|
|
jmp((const void *)addrspace::read32); // tail call
|
|
continue;
|
|
case MemSize::S64:
|
|
jmp((const void *)addrspace::read64); // tail call
|
|
continue;
|
|
default:
|
|
die("1..8 bytes");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
switch (size) {
|
|
case MemSize::S8:
|
|
jmp((const void *)addrspace::write8); // tail call
|
|
continue;
|
|
case MemSize::S16:
|
|
jmp((const void *)addrspace::write16); // tail call
|
|
continue;
|
|
case MemSize::S32:
|
|
jmp((const void *)addrspace::write32); // tail call
|
|
continue;
|
|
case MemSize::S64:
|
|
jmp((const void *)addrspace::write64); // tail call
|
|
continue;
|
|
default:
|
|
die("1..8 bytes");
|
|
}
|
|
}
|
|
}
|
|
ret();
|
|
}
|
|
}
|
|
}
|
|
MemHandlerEnd = getCurr();
|
|
}
|
|
|
|
void saveXmmRegisters()
|
|
{
|
|
#ifndef _WIN32
|
|
if (current_opid == (size_t)-1)
|
|
return;
|
|
|
|
if (regalloc.IsMapped(xmm8, current_opid))
|
|
movd(ptr[rip + &xmmSave[0]], xmm8);
|
|
if (regalloc.IsMapped(xmm9, current_opid))
|
|
movd(ptr[rip + &xmmSave[1]], xmm9);
|
|
if (regalloc.IsMapped(xmm10, current_opid))
|
|
movd(ptr[rip + &xmmSave[2]], xmm10);
|
|
if (regalloc.IsMapped(xmm11, current_opid))
|
|
movd(ptr[rip + &xmmSave[3]], xmm11);
|
|
#endif
|
|
}
|
|
|
|
void restoreXmmRegisters()
|
|
{
|
|
#ifndef _WIN32
|
|
if (current_opid == (size_t)-1)
|
|
return;
|
|
|
|
if (regalloc.IsMapped(xmm8, current_opid))
|
|
movd(xmm8, ptr[rip + &xmmSave[0]]);
|
|
if (regalloc.IsMapped(xmm9, current_opid))
|
|
movd(xmm9, ptr[rip + &xmmSave[1]]);
|
|
if (regalloc.IsMapped(xmm10, current_opid))
|
|
movd(xmm10, ptr[rip + &xmmSave[2]]);
|
|
if (regalloc.IsMapped(xmm11, current_opid))
|
|
movd(xmm11, ptr[rip + &xmmSave[3]]);
|
|
#endif
|
|
}
|
|
|
|
template<class Ret, class... Params>
|
|
void GenCall(Ret(*function)(Params...), bool skip_floats = false)
|
|
{
|
|
if (!skip_floats)
|
|
saveXmmRegisters();
|
|
call(CC_RX2RW(function));
|
|
if (!skip_floats)
|
|
restoreXmmRegisters();
|
|
}
|
|
|
|
struct CC_PS
|
|
{
|
|
CanonicalParamType type;
|
|
const shil_param* prm;
|
|
};
|
|
std::vector<CC_PS> CC_pars;
|
|
|
|
X64RegAlloc regalloc;
|
|
Xbyak::util::Cpu cpu;
|
|
size_t current_opid;
|
|
Xbyak::Label exit_block;
|
|
};
|
|
|
|
void X64RegAlloc::Preload(u32 reg, Xbyak::Operand::Code nreg)
|
|
{
|
|
compiler->RegPreload(reg, nreg);
|
|
}
|
|
void X64RegAlloc::Writeback(u32 reg, Xbyak::Operand::Code nreg)
|
|
{
|
|
compiler->RegWriteback(reg, nreg);
|
|
}
|
|
void X64RegAlloc::Preload_FPU(u32 reg, s8 nreg)
|
|
{
|
|
compiler->RegPreload_FPU(reg, nreg);
|
|
}
|
|
void X64RegAlloc::Writeback_FPU(u32 reg, s8 nreg)
|
|
{
|
|
compiler->RegWriteback_FPU(reg, nreg);
|
|
}
|
|
|
|
class X64Dynarec : public Sh4Dynarec
|
|
{
|
|
public:
|
|
X64Dynarec() {
|
|
sh4Dynarec = this;
|
|
}
|
|
|
|
void compile(RuntimeBlockInfo* block, bool smc_checks, bool optimise) override
|
|
{
|
|
void* protStart = codeBuffer->get();
|
|
size_t protSize = codeBuffer->getFreeSpace();
|
|
virtmem::jit_set_exec(protStart, protSize, false);
|
|
|
|
ccCompiler = new BlockCompiler(*codeBuffer);
|
|
try {
|
|
ccCompiler->compile(block, smc_checks, optimise);
|
|
} catch (const Xbyak::Error& e) {
|
|
ERROR_LOG(DYNAREC, "Fatal xbyak error: %s", e.what());
|
|
}
|
|
delete ccCompiler;
|
|
ccCompiler = nullptr;
|
|
virtmem::jit_set_exec(protStart, protSize, true);
|
|
}
|
|
|
|
void init(Sh4CodeBuffer& codeBuffer) override
|
|
{
|
|
this->codeBuffer = &codeBuffer;
|
|
}
|
|
|
|
void mainloop(void *) override
|
|
{
|
|
verify(::mainloop != nullptr);
|
|
try {
|
|
::mainloop();
|
|
} catch (const SH4ThrownException& ex) {
|
|
ERROR_LOG(DYNAREC, "SH4ThrownException in mainloop code %x", ex.expEvn);
|
|
throw FlycastException("Fatal: Unhandled SH4 exception");
|
|
}
|
|
}
|
|
|
|
void canonStart(const shil_opcode* op) override {
|
|
ccCompiler->canonStart(*op);
|
|
}
|
|
|
|
void canonParam(const shil_opcode* op, const shil_param* par, CanonicalParamType tp) override {
|
|
ccCompiler->canonParam(*op, *par, tp);
|
|
}
|
|
|
|
void canonCall(const shil_opcode* op, void* function) override {
|
|
ccCompiler->canonCall(*op, function);
|
|
}
|
|
|
|
void canonFinish(const shil_opcode* op) override {
|
|
}
|
|
|
|
bool rewrite(host_context_t &context, void *faultAddress) override
|
|
{
|
|
if (codeBuffer == nullptr)
|
|
// init() not called yet
|
|
return false;
|
|
void* protStart = codeBuffer->get();
|
|
size_t protSize = codeBuffer->getFreeSpace();
|
|
virtmem::jit_set_exec(protStart, protSize, false);
|
|
|
|
u8 *retAddr = *(u8 **)context.rsp - 5;
|
|
BlockCompiler compiler(*codeBuffer, retAddr);
|
|
bool rc = false;
|
|
try {
|
|
rc = compiler.rewriteMemAccess(context);
|
|
} catch (const Xbyak::Error& e) {
|
|
ERROR_LOG(DYNAREC, "Fatal xbyak error: %s", e.what());
|
|
}
|
|
virtmem::jit_set_exec(protStart, protSize, true);
|
|
return rc;
|
|
}
|
|
|
|
void handleException(host_context_t &context) override
|
|
{
|
|
context.pc = (uintptr_t)::handleException;
|
|
}
|
|
|
|
void reset() override
|
|
{
|
|
unwinder.clear();
|
|
// Avoid generating the main loop more than once
|
|
if (::mainloop != nullptr && ::mainloop != codeBuffer->get())
|
|
return;
|
|
|
|
void* protStart = codeBuffer->get();
|
|
size_t protSize = codeBuffer->getFreeSpace();
|
|
virtmem::jit_set_exec(protStart, protSize, false);
|
|
|
|
BlockCompiler compiler(*codeBuffer);
|
|
try {
|
|
compiler.genMainloop();
|
|
} catch (const Xbyak::Error& e) {
|
|
ERROR_LOG(DYNAREC, "Fatal xbyak error: %s", e.what());
|
|
}
|
|
virtmem::jit_set_exec(protStart, protSize, true);
|
|
}
|
|
|
|
private:
|
|
Sh4CodeBuffer *codeBuffer = nullptr;
|
|
BlockCompiler *ccCompiler = nullptr;
|
|
};
|
|
|
|
static X64Dynarec instance;
|
|
|
|
#endif
|