Merge remote-tracking branch 'origin/master' into fh/x64-arm-jit

This commit is contained in:
Flyinghead 2021-01-29 11:36:18 +01:00
commit 1b47ef0ab1
8 changed files with 329 additions and 159 deletions

View File

@ -28,7 +28,7 @@
#include "deps/vixl/aarch64/macro-assembler-aarch64.h"
using namespace vixl::aarch64;
static u8 *WritableCodeBuffer;
static u8 *pCodeBuffer;
class DSPAssembler : public MacroAssembler
{
@ -447,18 +447,18 @@ void dsp_recompile()
break;
}
}
DSPAssembler assembler(WritableCodeBuffer, sizeof(dsp.DynCode));
DSPAssembler assembler(pCodeBuffer, sizeof(dsp.DynCode));
assembler.Compile(&dsp);
}
void dsp_rec_init()
{
if (!vmem_platform_prepare_jit_block(dsp.DynCode, sizeof(dsp.DynCode), (void**)&WritableCodeBuffer))
if (!vmem_platform_prepare_jit_block(dsp.DynCode, sizeof(dsp.DynCode), (void**)&pCodeBuffer))
die("mprotect failed in arm64 dsp");
}
void dsp_rec_step()
{
((void (*)())dsp.DynCode)();
((void (*)())pCodeBuffer)();
}
#endif

View File

@ -41,7 +41,7 @@ alignas(4096) static u8 CodeBuffer[32 * 1024]
#else
#error CodeBuffer code section unknown
#endif
static u8 *WritableCodeBuffer;
static u8 *pCodeBuffer;
class X64DSPAssembler : public Xbyak::CodeGenerator
{
@ -420,18 +420,18 @@ void dsp_recompile()
break;
}
}
X64DSPAssembler assembler(WritableCodeBuffer, sizeof(CodeBuffer));
X64DSPAssembler assembler(pCodeBuffer, sizeof(CodeBuffer));
assembler.Compile(&dsp);
}
void dsp_rec_init()
{
if (!vmem_platform_prepare_jit_block(CodeBuffer, sizeof(CodeBuffer), (void**)&WritableCodeBuffer))
if (!vmem_platform_prepare_jit_block(CodeBuffer, sizeof(CodeBuffer), (void**)&pCodeBuffer))
die("mprotect failed in x64 dsp");
}
void dsp_rec_step()
{
((void (*)())&CodeBuffer[0])();
((void (*)())&pCodeBuffer[0])();
}
#endif

View File

@ -42,7 +42,7 @@ alignas(4096) static u8 CodeBuffer[32 * 1024]
#else
#error CodeBuffer code section unknown
#endif
static u8 *WritableCodeBuffer;
static u8 *pCodeBuffer;
class X86DSPAssembler : public Xbyak::CodeGenerator
{
@ -386,18 +386,18 @@ void dsp_recompile()
break;
}
}
X86DSPAssembler assembler(WritableCodeBuffer, sizeof(CodeBuffer));
X86DSPAssembler assembler(pCodeBuffer, sizeof(CodeBuffer));
assembler.Compile(&dsp);
}
void dsp_rec_init()
{
if (!vmem_platform_prepare_jit_block(CodeBuffer, sizeof(CodeBuffer), (void**)&WritableCodeBuffer))
if (!vmem_platform_prepare_jit_block(CodeBuffer, sizeof(CodeBuffer), (void**)&pCodeBuffer))
die("mprotect failed in x86 dsp");
}
void dsp_rec_step()
{
((void (*)())&CodeBuffer[0])();
((void (*)())&pCodeBuffer[0])();
}
#endif

View File

@ -501,7 +501,10 @@ void dc_init()
set_platform(DC_PLATFORM_DREAMCAST);
plugins_Init();
mem_Init();
reios_init();
// the recompiler may start generating code at this point and needs a fully configured machine
#if FEAT_SHREC != DYNAREC_NONE
Get_Sh4Recompiler(&sh4_cpu);
sh4_cpu.Init(); // Also initialize the interpreter
@ -517,9 +520,6 @@ void dc_init()
INFO_LOG(INTERPRETER, "Using Interpreter");
}
mem_Init();
reios_init();
init_done = true;
}

View File

@ -219,28 +219,9 @@ u32 X86Compiler::relinkBlock(RuntimeBlockInfo* block)
case BET_DynamicRet:
case BET_DynamicCall:
case BET_DynamicJump:
if (block->relink_data == 0)
{
if (block->pBranchBlock)
{
cmp(dword[GetRegPtr(reg_pc_dyn)], block->pBranchBlock->addr);
je((const void *)block->pBranchBlock->code);
call(ngen_LinkBlock_Generic_stub);
}
else
{
// dummy cmp/je to keep same code size in both cases
cmp(dword[GetRegPtr(reg_pc_dyn)], 0xFABCDECF);
call(ngen_LinkBlock_Generic_stub);
je((const void *)ngen_LinkBlock_Generic_stub);
}
}
else
{
verify(block->pBranchBlock == nullptr);
mov(ecx, dword[GetRegPtr(reg_pc_dyn)]);
jmp((const void *)no_update);
}
mov(ecx, dword[GetRegPtr(reg_pc_dyn)]);
jmp((const void *)no_update);
break;
case BET_StaticCall:
@ -398,9 +379,10 @@ void X86Compiler::genMainloop()
//no_update:
Xbyak::Label no_updateLabel;
L(no_updateLabel);
mov(esi, ecx); // save sh4 pc in ESI, used below if the jump table is still empty for this address
call((void *)bm_GetCodeByVAddr);
jmp(eax);
mov(esi, ecx); // save sh4 pc in ESI, used below if FPCB is still empty for this address
mov(eax, (size_t)&p_sh4rcb->fpcb[0]);
and_(ecx, RAM_SIZE_MAX - 2);
jmp(dword[eax + ecx * 2]);
//intc_sched:
Xbyak::Label intc_schedLabel;
@ -473,6 +455,8 @@ void X86Compiler::genMainloop()
call((void *)rdv_BlockCheckFail);
jmp(eax);
genMemHandlers();
ready();
mainloop = (void (*)())getCode();
@ -599,34 +583,6 @@ bool X86Compiler::genReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* b
return true;
}
bool X86Compiler::genReadMemoryFast(const shil_opcode& op, RuntimeBlockInfo* block)
{
// TODO
return false;
}
void X86Compiler::genReadMemorySlow(const shil_opcode& op, RuntimeBlockInfo* block)
{
u32 size = op.flags & 0x7f;
switch (size) {
case 1:
genCall(ReadMem8);
movsx(eax, al);
break;
case 2:
genCall(ReadMem16);
movsx(eax, ax);
break;
case 4:
genCall(ReadMem32);
break;
case 8:
genCall(ReadMem64);
break;
default:
die("1..8 bytes");
}
}
bool X86Compiler::genWriteMemImmediate(const shil_opcode& op, RuntimeBlockInfo* block)
{
@ -721,32 +677,6 @@ bool X86Compiler::genWriteMemImmediate(const shil_opcode& op, RuntimeBlockInfo*
return true;
}
bool X86Compiler::genWriteMemoryFast(const shil_opcode& op, RuntimeBlockInfo* block)
{
// TODO
return false;
}
void X86Compiler::genWriteMemorySlow(const shil_opcode& op, RuntimeBlockInfo* block)
{
u32 size = op.flags & 0x7f;
switch (size) {
case 1:
genCall(WriteMem8);
break;
case 2:
genCall(WriteMem16);
break;
case 4:
genCall(WriteMem32);
break;
case 8:
genCall(WriteMem64);
break;
default:
die("1..8 bytes");
}
}
void X86Compiler::checkBlock(bool smc_checks, RuntimeBlockInfo* block)
{
if (!smc_checks)
@ -783,7 +713,11 @@ void ngen_ResetBlocks()
compiler = new X86Compiler();
compiler->genMainloop();
try {
compiler->genMainloop();
} catch (const Xbyak::Error& e) {
ERROR_LOG(DYNAREC, "Fatal xbyak error: %s", e.what());
}
delete compiler;
compiler = nullptr;
@ -818,10 +752,13 @@ void ngen_Compile(RuntimeBlockInfo* block, bool smc_checks, bool, bool, bool opt
delete compiler;
}
bool ngen_Rewrite(unat& host_pc, unat, unat)
bool ngen_Rewrite(size_t& host_pc, size_t addr, size_t acc)
{
// TODO
return false;
X86Compiler *compiler = new X86Compiler((u8*)(addr - 5));
bool rv = compiler->rewriteMemAccess(host_pc, addr, acc);
delete compiler;
return rv;
}
void ngen_CC_Start(shil_opcode* op)

View File

@ -75,17 +75,22 @@ public:
void genMainloop();
u32 relinkBlock(RuntimeBlockInfo *block);
bool rewriteMemAccess(size_t& host_pc, size_t retadr, size_t acc);
private:
void genOpcode(RuntimeBlockInfo *block, bool optimise, shil_opcode& op);
bool genReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo *block);
bool genReadMemoryFast(const shil_opcode& op, RuntimeBlockInfo *block);
void genReadMemorySlow(const shil_opcode& op, RuntimeBlockInfo *block);
bool genWriteMemImmediate(const shil_opcode& op, RuntimeBlockInfo *block);
bool genWriteMemoryFast(const shil_opcode& op, RuntimeBlockInfo *block);
void genWriteMemorySlow(const shil_opcode& op, RuntimeBlockInfo *block);
void genMemHandlers();
void alignStack(int amount) {
#ifndef _WIN32
if (amount > 0)
add(esp, amount);
else
sub(esp, -amount);
#endif
}
void checkBlock(bool smc_checks, RuntimeBlockInfo *block);
void freezeXMM();

View File

@ -25,16 +25,191 @@
#include "rec_x86.h"
#include "hw/sh4/sh4_opcode_list.h"
#include "hw/sh4/sh4_core.h"
#include "hw/mem/_vmem.h"
namespace MemOp {
enum Size {
S8,
S16,
S32,
F32,
F64,
SizeCount
};
enum Op {
R,
W,
OpCount
};
enum Type {
Fast,
Slow,
TypeCount
};
}
static const void *MemHandlers[MemOp::TypeCount][MemOp::SizeCount][MemOp::OpCount];
static const u8 *MemHandlerStart, *MemHandlerEnd;
void X86Compiler::genMemHandlers()
{
// make sure the memory handlers are set
verify(ReadMem8 != nullptr);
MemHandlerStart = getCurr();
for (int type = 0; type < MemOp::TypeCount; type++)
{
for (int size = 0; size < MemOp::SizeCount; size++)
{
for (int op = 0; op < MemOp::OpCount; op++)
{
MemHandlers[type][size][op] = getCurr();
if (type == MemOp::Fast && _nvmem_enabled())
{
mov(eax, ecx);
and_(ecx, 0x1FFFFFFF);
Xbyak::Address address = dword[ecx];
Xbyak::Reg reg;
switch (size)
{
case MemOp::S8:
address = byte[ecx + (size_t)virt_ram_base];
reg = op == MemOp::R ? (Xbyak::Reg)eax : (Xbyak::Reg)dl;
break;
case MemOp::S16:
address = word[ecx + (size_t)virt_ram_base];
reg = op == MemOp::R ? (Xbyak::Reg)eax : (Xbyak::Reg)dx;
break;
case MemOp::S32:
address = dword[ecx + (size_t)virt_ram_base];
reg = op == MemOp::R ? eax : edx;
break;
default:
address = dword[ecx + (size_t)virt_ram_base];
break;
}
if (size >= MemOp::F32)
{
if (op == MemOp::R)
movss(xmm0, address);
else
movss(address, xmm0);
if (size == MemOp::F64)
{
address = dword[ecx + (size_t)virt_ram_base + 4];
if (op == MemOp::R)
movss(xmm1, address);
else
movss(address, xmm1);
}
}
else
{
if (op == MemOp::R)
{
if (size <= MemOp::S16)
movsx(reg, address);
else
mov(reg, address);
}
else
mov(address, reg);
}
}
else
{
// Slow path
if (op == MemOp::R)
{
switch (size) {
case MemOp::S8:
// 16-byte alignment
alignStack(-12);
call((const void *)ReadMem8);
movsx(eax, al);
alignStack(12);
break;
case MemOp::S16:
// 16-byte alignment
alignStack(-12);
call((const void *)ReadMem16);
movsx(eax, ax);
alignStack(12);
break;
case MemOp::S32:
jmp((const void *)ReadMem32); // tail call
continue;
case MemOp::F32:
// 16-byte alignment
alignStack(-12);
call((const void *)ReadMem32);
movd(xmm0, eax);
alignStack(12);
break;
case MemOp::F64:
// 16-byte alignment
alignStack(-12);
call((const void *)ReadMem64);
movd(xmm0, eax);
movd(xmm1, edx);
alignStack(12);
break;
default:
die("1..8 bytes");
}
}
else
{
switch (size) {
case MemOp::S8:
jmp((const void *)WriteMem8); // tail call
continue;
case MemOp::S16:
jmp((const void *)WriteMem16); // tail call
continue;
case MemOp::S32:
jmp((const void *)WriteMem32); // tail call
continue;
case MemOp::F32:
movd(edx, xmm0);
jmp((const void *)WriteMem32); // tail call
continue;
case MemOp::F64:
#ifndef _WIN32
// 16-byte alignment
alignStack(-12);
#else
sub(esp, 8);
#endif
movss(dword[esp], xmm0);
movss(dword[esp + 4], xmm1);
call((const void *)WriteMem64); // dynacall adds 8 to esp
alignStack(4);
break;
default:
die("1..8 bytes");
}
}
}
ret();
}
}
}
MemHandlerEnd = getCurr();
}
void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode& op)
{
switch (op.op)
{
case shop_ifb:
if (op.rs1._imm)
mov(dword[&next_pc], op.rs2._imm);
mov(ecx, op.rs3._imm);
genCall(OpDesc[op.rs3._imm]->oph);
if (op.rs1.is_imm() && op.rs1.imm_value())
mov(dword[&next_pc], op.rs2.imm_value());
mov(ecx, op.rs3.imm_value());
genCall(OpDesc[op.rs3.imm_value()]->oph);
break;
@ -66,25 +241,52 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode&
else
add(ecx, dword[op.rs3.reg_ptr()]);
}
if (!optimise || !genReadMemoryFast(op, block))
genReadMemorySlow(op, block);
u32 size = op.flags & 0x7f;
if (size != 8)
int memOpSize;
switch (op.flags & 0x7f)
{
case 1:
memOpSize = MemOp::S8;
break;
case 2:
memOpSize = MemOp::S16;
break;
case 4:
memOpSize = regalloc.IsAllocf(op.rd) ? MemOp::F32 : MemOp::S32;
break;
case 8:
memOpSize = MemOp::F64;
break;
}
freezeXMM();
const u8 *start = getCurr();
call(MemHandlers[optimise ? MemOp::Fast : MemOp::Slow][memOpSize][MemOp::R]);
verify(getCurr() - start == 5);
thawXMM();
if (memOpSize <= MemOp::S32)
{
host_reg_to_shil_param(op.rd, eax);
else {
}
else if (memOpSize == MemOp::F32)
{
host_reg_to_shil_param(op.rd, xmm0);
}
else
{
#ifdef EXPLODE_SPANS
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1))
{
movd(regalloc.MapXRegister(op.rd, 0), eax);
movd(regalloc.MapXRegister(op.rd, 1), edx);
mov(regalloc.MapXRegister(op.rd, 0), xmm0);
mov(regalloc.MapXRegister(op.rd, 1), xmm1);
}
else
#endif
{
verify(!regalloc.IsAllocAny(op.rd));
mov(dword[op.rd.reg_ptr()], eax);
mov(dword[op.rd.reg_ptr() + 1], edx);
movss(dword[op.rd.reg_ptr()], xmm0);
movss(dword[op.rd.reg_ptr() + 1], xmm1);
}
}
}
@ -104,29 +306,46 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode&
add(ecx, dword[op.rs3.reg_ptr()]);
}
u32 size = op.flags & 0x7f;
if (size != 8)
int memOpSize;
switch (op.flags & 0x7f)
{
case 1:
memOpSize = MemOp::S8;
break;
case 2:
memOpSize = MemOp::S16;
break;
case 4:
memOpSize = regalloc.IsAllocf(op.rs2) ? MemOp::F32 : MemOp::S32;
break;
case 8:
memOpSize = MemOp::F64;
break;
}
if (memOpSize <= MemOp::S32)
shil_param_to_host_reg(op.rs2, edx);
else if (memOpSize == MemOp::F32)
shil_param_to_host_reg(op.rs2, xmm0);
else {
#ifdef EXPLODE_SPANS
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1))
{
sub(esp, 8);
movsd(dword[esp + 4], regalloc.MapXRegister(op.rs2, 1));
movsd(dword[esp], regalloc.MapXRegister(op.rs2, 0));
mov(xmm0, regalloc.MapXRegister(op.rs2, 0));
mov(xmm1, regalloc.MapXRegister(op.rs2, 1));
}
else
#endif
{
sub(esp, 8);
mov(eax, dword[op.rs2.reg_ptr() + 1]);
mov(dword[esp + 4], eax);
mov(eax, dword[op.rs2.reg_ptr()]);
mov(dword[esp], eax);
movd(xmm0, dword[op.rs2.reg_ptr()]);
movd(xmm1, dword[op.rs2.reg_ptr() + 1]);
}
}
if (!optimise || !genWriteMemoryFast(op, block))
genWriteMemorySlow(op, block);
freezeXMM();
const u8 *start = getCurr();
call(MemHandlers[optimise ? MemOp::Fast : MemOp::Slow][memOpSize][MemOp::W]);
verify(getCurr() - start == 5);
thawXMM();
}
break;
@ -203,4 +422,36 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode&
}
}
bool X86Compiler::rewriteMemAccess(size_t& host_pc, size_t retadr, size_t acc)
{
//DEBUG_LOG(DYNAREC, "rewriteMemAccess hpc %08x retadr %08x", host_pc, retadr);
if (host_pc < (size_t)MemHandlerStart || host_pc >= (size_t)MemHandlerEnd)
return false;
u32 ca = *(u32 *)(retadr - 4) + retadr;
for (int size = 0; size < MemOp::SizeCount; size++)
{
for (int op = 0; op < MemOp::OpCount; op++)
{
if ((u32)MemHandlers[MemOp::Fast][size][op] != ca)
continue;
//found !
const u8 *start = getCurr();
call(MemHandlers[MemOp::Slow][size][op]);
verify(getCurr() - start == 5);
ready();
host_pc = retadr - 5;
return true;
}
}
ERROR_LOG(DYNAREC, "rewriteMemAccess code not found: hpc %08x retadr %08x acc %08x", host_pc, retadr, acc);
die("Failed to match the code");
return false;
}
#endif

View File

@ -368,16 +368,6 @@ static void reios_sys_flashrom() {
}
}
static void reios_sys_gd()
{
gdrom_hle_op();
}
static void reios_sys_gd2()
{
gdrom_hle_op();
}
static void reios_sys_misc()
{
INFO_LOG(REIOS, "reios_sys_misc - r7: 0x%08X, r4 0x%08X, r5 0x%08X, r6 0x%08X", r[7], r[4], r[5], r[6]);
@ -395,7 +385,6 @@ static void reios_sys_misc()
}
typedef void hook_fp();
static u32 hook_addr(hook_fp* fn);
static void setup_syscall(u32 hook_addr, u32 syscall_addr) {
WriteMem32(syscall_addr, hook_addr);
@ -615,12 +604,12 @@ static void reios_boot()
memset(GetMemPtr(0x8C000000, 0), 0xFF, 64 * 1024);
setup_syscall(hook_addr(&reios_sys_system), dc_bios_syscall_system);
setup_syscall(hook_addr(&reios_sys_font), dc_bios_syscall_font);
setup_syscall(hook_addr(&reios_sys_flashrom), dc_bios_syscall_flashrom);
setup_syscall(hook_addr(&reios_sys_gd), dc_bios_syscall_gd);
setup_syscall(hook_addr(&reios_sys_gd2), dc_bios_syscall_gd2);
setup_syscall(hook_addr(&reios_sys_misc), dc_bios_syscall_misc);
setup_syscall(0x8C001000, dc_bios_syscall_system);
setup_syscall(0x8C001002, dc_bios_syscall_font);
setup_syscall(0x8C001004, dc_bios_syscall_flashrom);
setup_syscall(0x8C001006, dc_bios_syscall_gd);
setup_syscall(dc_bios_entrypoint_gd2, dc_bios_syscall_gd2);
setup_syscall(0x8C001008, dc_bios_syscall_misc);
//Infinite loop for arm !
WriteMem32(0x80800000, 0xEAFFFFFE);
@ -669,13 +658,11 @@ static void reios_boot()
}
static std::map<u32, hook_fp*> hooks;
static std::map<hook_fp*, u32> hooks_rev;
#define SYSCALL_ADDR_MAP(addr) (((addr) & 0x1FFFFFFF) | 0x80000000)
static void register_hook(u32 pc, hook_fp* fn) {
hooks[SYSCALL_ADDR_MAP(pc)] = fn;
hooks_rev[fn] = pc;
}
void DYNACALL reios_trap(u32 op) {
@ -693,16 +680,6 @@ void DYNACALL reios_trap(u32 op) {
next_pc = pr;
}
static u32 hook_addr(hook_fp* fn) {
if (hooks_rev.count(fn))
return hooks_rev[fn];
else {
ERROR_LOG(REIOS, "hook_addr: Failed to reverse lookup %p", fn);
verify(false);
return 0;
}
}
bool reios_init()
{
INFO_LOG(REIOS, "reios: Init");
@ -712,10 +689,10 @@ bool reios_init()
register_hook(0x8C001000, reios_sys_system);
register_hook(0x8C001002, reios_sys_font);
register_hook(0x8C001004, reios_sys_flashrom);
register_hook(0x8C001006, reios_sys_gd);
register_hook(0x8C001006, gdrom_hle_op);
register_hook(0x8C001008, reios_sys_misc);
register_hook(dc_bios_entrypoint_gd2, reios_sys_gd2);
register_hook(dc_bios_entrypoint_gd2, gdrom_hle_op);
return true;
}