2019-01-22 15:17:25 +00:00
|
|
|
#include "build.h"
|
2015-07-13 21:56:42 +00:00
|
|
|
|
2015-07-25 06:16:53 +00:00
|
|
|
#if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X64
|
2019-04-15 16:02:34 +00:00
|
|
|
#include <setjmp.h>
|
|
|
|
|
2019-06-10 11:57:10 +00:00
|
|
|
//#define EXPLODE_SPANS
|
2019-01-14 20:15:36 +00:00
|
|
|
//#define PROFILING
|
2019-06-10 11:57:10 +00:00
|
|
|
//#define CANONICAL_TEST
|
2019-01-14 20:15:36 +00:00
|
|
|
|
2020-03-22 09:08:05 +00:00
|
|
|
#include <xbyak/xbyak.h>
|
|
|
|
#include <xbyak/xbyak_util.h>
|
2019-01-22 15:17:25 +00:00
|
|
|
|
|
|
|
#include "types.h"
|
2015-07-13 21:56:42 +00:00
|
|
|
#include "hw/sh4/sh4_opcode_list.h"
|
2019-01-12 22:48:48 +00:00
|
|
|
#include "hw/sh4/dyna/ngen.h"
|
2015-07-13 21:56:42 +00:00
|
|
|
#include "hw/sh4/modules/ccn.h"
|
2019-03-25 10:53:13 +00:00
|
|
|
#include "hw/sh4/modules/mmu.h"
|
2015-07-13 21:56:42 +00:00
|
|
|
#include "hw/sh4/sh4_interrupts.h"
|
|
|
|
|
|
|
|
#include "hw/sh4/sh4_core.h"
|
|
|
|
#include "hw/sh4/sh4_mem.h"
|
2019-01-13 11:27:02 +00:00
|
|
|
#include "hw/sh4/sh4_rom.h"
|
2019-04-29 16:23:00 +00:00
|
|
|
#include "hw/mem/vmem32.h"
|
2015-07-13 21:56:42 +00:00
|
|
|
#include "profiler/profiler.h"
|
|
|
|
#include "oslib/oslib.h"
|
2019-01-12 22:48:48 +00:00
|
|
|
#include "x64_regalloc.h"
|
2015-07-13 21:56:42 +00:00
|
|
|
|
|
|
|
struct DynaRBI : RuntimeBlockInfo
|
|
|
|
{
|
|
|
|
virtual u32 Relink() {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual void Relocate(void* dst) {
|
|
|
|
verify(false);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-03-30 18:26:05 +00:00
|
|
|
extern "C" {
|
|
|
|
int cycle_counter;
|
|
|
|
}
|
2015-07-13 21:56:42 +00:00
|
|
|
|
2019-06-10 11:57:10 +00:00
|
|
|
u64 host_cpu_time;
|
2019-01-14 20:15:36 +00:00
|
|
|
|
2019-05-26 11:30:05 +00:00
|
|
|
u32 mem_writes, mem_reads;
|
|
|
|
u32 mem_rewrites_w, mem_rewrites_r;
|
|
|
|
|
2019-01-14 20:15:36 +00:00
|
|
|
#ifdef PROFILING
|
2019-06-10 11:57:10 +00:00
|
|
|
static clock_t slice_start;
|
|
|
|
int start_cycle;
|
2019-01-14 20:15:36 +00:00
|
|
|
extern "C"
|
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
static __attribute((used)) void* start_slice(void *p)
|
2019-01-14 20:15:36 +00:00
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
slice_start = clock();
|
|
|
|
start_cycle = cycle_counter;
|
|
|
|
return p;
|
2019-01-14 20:15:36 +00:00
|
|
|
}
|
|
|
|
static __attribute((used)) void end_slice()
|
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
clock_t now = clock();
|
|
|
|
if (slice_start != 0)
|
|
|
|
{
|
|
|
|
host_cpu_time += now - slice_start;
|
|
|
|
guest_cpu_cycles += start_cycle - cycle_counter;
|
|
|
|
}
|
|
|
|
slice_start = now;
|
|
|
|
start_cycle = cycle_counter;
|
2019-01-14 20:15:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2019-01-16 12:39:52 +00:00
|
|
|
#ifdef __MACH__
|
|
|
|
#define _U "_"
|
|
|
|
#else
|
|
|
|
#define _U
|
|
|
|
#endif
|
|
|
|
|
2019-01-18 16:02:50 +00:00
|
|
|
#ifdef _WIN32
|
|
|
|
#define WIN32_ONLY(x) x
|
|
|
|
#else
|
|
|
|
#define WIN32_ONLY(x)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define STRINGIFY(x) #x
|
|
|
|
#define _S(x) STRINGIFY(x)
|
2019-01-24 08:48:58 +00:00
|
|
|
#if RAM_SIZE_MAX == 16*1024*1024
|
2019-01-18 16:02:50 +00:00
|
|
|
#define CPU_RUNNING 68157284
|
|
|
|
#define PC 68157256
|
2019-01-24 08:48:58 +00:00
|
|
|
#elif RAM_SIZE_MAX == 32*1024*1024
|
2019-01-18 16:02:50 +00:00
|
|
|
#define CPU_RUNNING 135266148
|
|
|
|
#define PC 135266120
|
|
|
|
#else
|
2019-01-24 08:48:58 +00:00
|
|
|
#error RAM_SIZE_MAX unknown
|
2019-01-18 16:02:50 +00:00
|
|
|
#endif
|
|
|
|
|
2019-04-15 16:02:34 +00:00
|
|
|
jmp_buf jmp_env;
|
|
|
|
|
2019-03-30 18:26:05 +00:00
|
|
|
#ifndef _MSC_VER
|
|
|
|
|
2019-01-18 16:02:50 +00:00
|
|
|
#ifdef _WIN32
|
|
|
|
// Fully naked function in win32 for proper SEH prologue
|
|
|
|
__asm__ (
|
|
|
|
".text \n\t"
|
|
|
|
".p2align 4,,15 \n\t"
|
|
|
|
".globl ngen_mainloop \n\t"
|
|
|
|
".def ngen_mainloop; .scl 2; .type 32; .endef \n\t"
|
|
|
|
".seh_proc ngen_mainloop \n\t"
|
|
|
|
"ngen_mainloop: \n\t"
|
|
|
|
#else
|
2015-07-13 21:56:42 +00:00
|
|
|
void ngen_mainloop(void* v_cntx)
|
|
|
|
{
|
2019-01-18 16:02:50 +00:00
|
|
|
__asm__ (
|
|
|
|
#endif
|
|
|
|
"pushq %rbx \n\t"
|
|
|
|
WIN32_ONLY( ".seh_pushreg %rbx \n\t")
|
2019-01-16 13:44:40 +00:00
|
|
|
#ifndef __MACH__ // rbp is pushed in the standard function prologue
|
2019-01-18 16:02:50 +00:00
|
|
|
"pushq %rbp \n\t"
|
2019-01-16 13:44:40 +00:00
|
|
|
#endif
|
2019-01-16 12:39:52 +00:00
|
|
|
#ifdef _WIN32
|
2019-01-18 16:02:50 +00:00
|
|
|
".seh_pushreg %rbp \n\t"
|
|
|
|
"pushq %rdi \n\t"
|
|
|
|
".seh_pushreg %rdi \n\t"
|
|
|
|
"pushq %rsi \n\t"
|
|
|
|
".seh_pushreg %rsi \n\t"
|
2019-01-16 12:39:52 +00:00
|
|
|
#endif
|
2019-01-18 16:02:50 +00:00
|
|
|
"pushq %r12 \n\t"
|
|
|
|
WIN32_ONLY( ".seh_pushreg %r12 \n\t")
|
|
|
|
"pushq %r13 \n\t"
|
|
|
|
WIN32_ONLY( ".seh_pushreg %r13 \n\t")
|
|
|
|
"pushq %r14 \n\t"
|
|
|
|
WIN32_ONLY( ".seh_pushreg %r14 \n\t")
|
|
|
|
"pushq %r15 \n\t"
|
|
|
|
#ifdef _WIN32
|
|
|
|
".seh_pushreg %r15 \n\t"
|
|
|
|
"subq $40, %rsp \n\t" // 32-byte shadow space + 8 for stack 16-byte alignment
|
|
|
|
".seh_stackalloc 40 \n\t"
|
|
|
|
".seh_endprologue \n\t"
|
2019-01-16 12:39:52 +00:00
|
|
|
#else
|
2019-01-18 16:02:50 +00:00
|
|
|
"subq $8, %rsp \n\t" // 8 for stack 16-byte alignment
|
2019-01-14 20:15:36 +00:00
|
|
|
#endif
|
2019-05-16 14:57:35 +00:00
|
|
|
"movl $" _S(SH4_TIMESLICE) "," _U "cycle_counter(%rip) \n\t"
|
2019-01-12 22:48:48 +00:00
|
|
|
|
2019-05-16 14:57:35 +00:00
|
|
|
#ifdef _WIN32
|
|
|
|
"leaq " _U "jmp_env(%rip), %rcx \n\t" // SETJMP
|
|
|
|
"xor %rdx, %rdx \n\t" // no frame pointer
|
|
|
|
#else
|
|
|
|
"leaq " _U "jmp_env(%rip), %rdi \n\t"
|
2019-05-10 20:31:59 +00:00
|
|
|
#endif
|
2019-05-16 14:57:35 +00:00
|
|
|
"call " _U "setjmp \n"
|
2019-04-15 16:02:34 +00:00
|
|
|
|
2019-01-18 16:02:50 +00:00
|
|
|
"1: \n\t" // run_loop
|
|
|
|
"movq " _U "p_sh4rcb(%rip), %rax \n\t"
|
|
|
|
"movl " _S(CPU_RUNNING) "(%rax), %edx \n\t"
|
|
|
|
"testl %edx, %edx \n\t"
|
|
|
|
"je 3f \n" // end_run_loop
|
2019-01-22 15:17:25 +00:00
|
|
|
#ifdef PROFILING
|
|
|
|
"call start_slice \n\t"
|
|
|
|
#endif
|
2019-01-18 16:02:50 +00:00
|
|
|
|
|
|
|
"2: \n\t" // slice_loop
|
|
|
|
"movq " _U "p_sh4rcb(%rip), %rax \n\t"
|
2019-01-12 22:48:48 +00:00
|
|
|
#ifdef _WIN32
|
2019-01-18 16:02:50 +00:00
|
|
|
"movl " _S(PC)"(%rax), %ecx \n\t"
|
2019-01-12 22:48:48 +00:00
|
|
|
#else
|
2019-01-18 16:02:50 +00:00
|
|
|
"movl " _S(PC)"(%rax), %edi \n\t"
|
2019-01-12 22:48:48 +00:00
|
|
|
#endif
|
2019-04-15 16:02:34 +00:00
|
|
|
"call " _U "bm_GetCodeByVAddr \n\t"
|
2019-01-18 16:02:50 +00:00
|
|
|
"call *%rax \n\t"
|
2019-06-10 11:57:10 +00:00
|
|
|
#ifdef PROFILING
|
|
|
|
"call end_slice \n\t"
|
|
|
|
#endif
|
2019-01-18 16:02:50 +00:00
|
|
|
"movl " _U "cycle_counter(%rip), %ecx \n\t"
|
|
|
|
"testl %ecx, %ecx \n\t"
|
|
|
|
"jg 2b \n\t" // slice_loop
|
|
|
|
|
|
|
|
"addl $" _S(SH4_TIMESLICE) ", %ecx \n\t"
|
|
|
|
"movl %ecx, " _U "cycle_counter(%rip) \n\t"
|
|
|
|
"call " _U "UpdateSystem_INTC \n\t"
|
|
|
|
"jmp 1b \n" // run_loop
|
|
|
|
|
|
|
|
"3: \n\t" // end_run_loop
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
"addq $40, %rsp \n\t"
|
|
|
|
#else
|
|
|
|
"addq $8, %rsp \n\t"
|
|
|
|
#endif
|
|
|
|
"popq %r15 \n\t"
|
|
|
|
"popq %r14 \n\t"
|
|
|
|
"popq %r13 \n\t"
|
|
|
|
"popq %r12 \n\t"
|
2019-01-16 12:39:52 +00:00
|
|
|
#ifdef _WIN32
|
2019-01-18 16:02:50 +00:00
|
|
|
"popq %rsi \n\t"
|
|
|
|
"popq %rdi \n\t"
|
2019-01-16 12:39:52 +00:00
|
|
|
#endif
|
2019-01-16 13:44:40 +00:00
|
|
|
#ifndef __MACH__
|
2019-01-18 16:02:50 +00:00
|
|
|
"popq %rbp \n\t"
|
2019-01-16 13:44:40 +00:00
|
|
|
#endif
|
2019-01-18 16:02:50 +00:00
|
|
|
"popq %rbx \n\t"
|
|
|
|
#ifdef _WIN32
|
|
|
|
"ret \n\t"
|
|
|
|
".seh_endproc \n"
|
|
|
|
);
|
|
|
|
#else
|
2019-01-12 22:48:48 +00:00
|
|
|
);
|
2015-07-13 21:56:42 +00:00
|
|
|
}
|
2019-01-18 16:02:50 +00:00
|
|
|
#endif
|
|
|
|
|
2019-03-30 18:26:05 +00:00
|
|
|
#endif // !_MSC_VER
|
2019-01-18 16:02:50 +00:00
|
|
|
#undef _U
|
|
|
|
#undef _S
|
2015-07-13 21:56:42 +00:00
|
|
|
|
|
|
|
void ngen_init()
|
|
|
|
{
|
2019-01-18 16:02:50 +00:00
|
|
|
verify(CPU_RUNNING == offsetof(Sh4RCB, cntx.CpuRunning));
|
|
|
|
verify(PC == offsetof(Sh4RCB, cntx.pc));
|
2015-07-13 21:56:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void ngen_ResetBlocks()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void ngen_GetFeatures(ngen_features* dst)
|
|
|
|
{
|
2015-07-14 18:17:45 +00:00
|
|
|
dst->InterpreterFallback = false;
|
2015-08-05 03:00:01 +00:00
|
|
|
dst->OnlyDynamicEnds = false;
|
2015-07-13 21:56:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
RuntimeBlockInfo* ngen_AllocateBlock()
|
|
|
|
{
|
|
|
|
return new DynaRBI();
|
|
|
|
}
|
|
|
|
|
2019-01-11 22:52:20 +00:00
|
|
|
static void ngen_blockcheckfail(u32 pc) {
|
2019-04-19 09:45:05 +00:00
|
|
|
//printf("X64 JIT: SMC invalidation at %08X\n", pc);
|
2018-06-29 13:19:49 +00:00
|
|
|
rdv_BlockCheckFail(pc);
|
|
|
|
}
|
|
|
|
|
2019-04-29 16:23:00 +00:00
|
|
|
static void handle_mem_exception(u32 exception_raised, u32 pc)
|
2019-03-25 10:53:13 +00:00
|
|
|
{
|
2019-04-15 16:02:34 +00:00
|
|
|
if (exception_raised)
|
|
|
|
{
|
2019-03-25 10:53:13 +00:00
|
|
|
if (pc & 1)
|
|
|
|
// Delay slot
|
2019-04-15 16:02:34 +00:00
|
|
|
spc = pc - 1;
|
|
|
|
else
|
|
|
|
spc = pc;
|
2019-04-29 16:23:00 +00:00
|
|
|
cycle_counter += CPU_RATIO * 2; // probably more is needed but no easy way to find out
|
2019-04-15 16:02:34 +00:00
|
|
|
longjmp(jmp_env, 1);
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
2019-04-29 16:23:00 +00:00
|
|
|
}
|
|
|
|
|
2019-05-10 20:31:59 +00:00
|
|
|
static u32 exception_raised;
|
|
|
|
|
2019-04-29 16:23:00 +00:00
|
|
|
template<typename T>
|
|
|
|
static T ReadMemNoEx(u32 addr, u32 pc)
|
|
|
|
{
|
|
|
|
#ifndef NO_MMU
|
|
|
|
T rv = mmu_ReadMemNoEx<T>(addr, &exception_raised);
|
|
|
|
handle_mem_exception(exception_raised, pc);
|
|
|
|
|
2019-04-15 16:02:34 +00:00
|
|
|
return rv;
|
2019-04-19 09:56:11 +00:00
|
|
|
#else
|
|
|
|
// not used
|
|
|
|
return (T)0;
|
|
|
|
#endif
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template<typename T>
|
2019-05-10 20:31:59 +00:00
|
|
|
static u32 WriteMemNoEx(u32 addr, T data, u32 pc)
|
2019-03-25 10:53:13 +00:00
|
|
|
{
|
2019-04-19 09:56:11 +00:00
|
|
|
#ifndef NO_MMU
|
2019-04-29 16:23:00 +00:00
|
|
|
u32 exception_raised = mmu_WriteMemNoEx<T>(addr, data);
|
|
|
|
handle_mem_exception(exception_raised, pc);
|
2019-05-10 20:31:59 +00:00
|
|
|
return exception_raised;
|
2019-04-29 16:23:00 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static void handle_sh4_exception(SH4ThrownException& ex, u32 pc)
|
|
|
|
{
|
|
|
|
if (pc & 1)
|
2019-04-15 16:02:34 +00:00
|
|
|
{
|
2019-04-29 16:23:00 +00:00
|
|
|
// Delay slot
|
|
|
|
AdjustDelaySlotException(ex);
|
|
|
|
pc--;
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
2019-04-29 16:23:00 +00:00
|
|
|
Do_Exception(pc, ex.expEvn, ex.callVect);
|
|
|
|
cycle_counter += CPU_RATIO * 4; // probably more is needed
|
|
|
|
longjmp(jmp_env, 1);
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
|
|
|
|
2019-05-16 14:57:35 +00:00
|
|
|
static void interpreter_fallback(u16 op, OpCallFP *oph, u32 pc)
|
2019-03-25 10:53:13 +00:00
|
|
|
{
|
|
|
|
try {
|
2019-04-18 12:15:01 +00:00
|
|
|
oph(op);
|
2019-03-25 10:53:13 +00:00
|
|
|
} catch (SH4ThrownException& ex) {
|
2019-04-29 16:23:00 +00:00
|
|
|
handle_sh4_exception(ex, pc);
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-16 14:57:35 +00:00
|
|
|
static void do_sqw_mmu_no_ex(u32 addr, u32 pc)
|
2019-03-25 10:53:13 +00:00
|
|
|
{
|
|
|
|
try {
|
|
|
|
do_sqw_mmu(addr);
|
|
|
|
} catch (SH4ThrownException& ex) {
|
2019-04-29 16:23:00 +00:00
|
|
|
handle_sh4_exception(ex, pc);
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void do_sqw_nommu_local(u32 addr, u8* sqb)
|
|
|
|
{
|
|
|
|
do_sqw_nommu(addr, sqb);
|
|
|
|
}
|
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
class BlockCompiler : public Xbyak::CodeGenerator
|
|
|
|
{
|
2015-07-13 21:56:42 +00:00
|
|
|
public:
|
2019-04-29 16:23:00 +00:00
|
|
|
BlockCompiler() : BlockCompiler((u8 *)emit_GetCCPtr()) {}
|
|
|
|
|
|
|
|
BlockCompiler(u8 *code_ptr) : Xbyak::CodeGenerator(emit_FreeSpace(), code_ptr), regalloc(this)
|
2019-01-12 22:48:48 +00:00
|
|
|
{
|
2019-08-25 17:29:56 +00:00
|
|
|
#ifdef _WIN32
|
2015-07-14 18:17:45 +00:00
|
|
|
call_regs.push_back(ecx);
|
|
|
|
call_regs.push_back(edx);
|
|
|
|
call_regs.push_back(r8d);
|
|
|
|
call_regs.push_back(r9d);
|
|
|
|
|
|
|
|
call_regs64.push_back(rcx);
|
|
|
|
call_regs64.push_back(rdx);
|
|
|
|
call_regs64.push_back(r8);
|
|
|
|
call_regs64.push_back(r9);
|
|
|
|
#else
|
|
|
|
call_regs.push_back(edi);
|
|
|
|
call_regs.push_back(esi);
|
|
|
|
call_regs.push_back(edx);
|
|
|
|
call_regs.push_back(ecx);
|
|
|
|
|
|
|
|
call_regs64.push_back(rdi);
|
|
|
|
call_regs64.push_back(rsi);
|
|
|
|
call_regs64.push_back(rdx);
|
|
|
|
call_regs64.push_back(rcx);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
call_regsxmm.push_back(xmm0);
|
|
|
|
call_regsxmm.push_back(xmm1);
|
|
|
|
call_regsxmm.push_back(xmm2);
|
|
|
|
call_regsxmm.push_back(xmm3);
|
2015-07-13 21:56:42 +00:00
|
|
|
}
|
|
|
|
|
2019-09-29 16:18:46 +00:00
|
|
|
void compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise)
|
2019-01-12 22:48:48 +00:00
|
|
|
{
|
|
|
|
//printf("X86_64 compiling %08x to %p\n", block->addr, emit_GetCCPtr());
|
2019-04-29 16:23:00 +00:00
|
|
|
current_opid = -1;
|
2019-05-22 12:13:00 +00:00
|
|
|
|
2019-09-29 16:18:46 +00:00
|
|
|
CheckBlock(force_checks, block);
|
2018-06-29 13:19:49 +00:00
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
#ifdef _WIN32
|
|
|
|
sub(rsp, 0x28); // 32-byte shadow space + 8 byte alignment
|
|
|
|
#else
|
|
|
|
sub(rsp, 0x8); // align stack
|
|
|
|
#endif
|
2019-03-25 10:53:13 +00:00
|
|
|
if (mmu_enabled() && block->has_fpu_op)
|
|
|
|
{
|
|
|
|
Xbyak::Label fpu_enabled;
|
|
|
|
mov(rax, (uintptr_t)&sr);
|
2019-04-18 12:15:01 +00:00
|
|
|
test(dword[rax], 0x8000); // test SR.FD bit
|
2019-03-25 10:53:13 +00:00
|
|
|
jz(fpu_enabled);
|
|
|
|
mov(call_regs[0], block->vaddr); // pc
|
|
|
|
mov(call_regs[1], 0x800); // event
|
|
|
|
mov(call_regs[2], 0x100); // vector
|
|
|
|
GenCall(Do_Exception);
|
|
|
|
jmp(exit_block, T_NEAR);
|
|
|
|
L(fpu_enabled);
|
|
|
|
}
|
2019-05-22 12:13:00 +00:00
|
|
|
#ifdef FEAT_NO_RWX_PAGES
|
2019-05-12 20:02:57 +00:00
|
|
|
// Use absolute addressing for this one
|
|
|
|
// TODO(davidgfnet) remove the ifsef using CC_RX2RW/CC_RW2RX
|
|
|
|
mov(rax, (uintptr_t)&cycle_counter);
|
|
|
|
sub(dword[rax], block->guest_cycles);
|
2019-05-22 12:13:00 +00:00
|
|
|
#else
|
2019-04-29 16:23:00 +00:00
|
|
|
sub(dword[rip + &cycle_counter], block->guest_cycles);
|
|
|
|
#endif
|
|
|
|
regalloc.DoAlloc(block);
|
2019-04-18 12:15:01 +00:00
|
|
|
|
2019-03-25 10:53:13 +00:00
|
|
|
for (current_opid = 0; current_opid < block->oplist.size(); current_opid++)
|
2019-01-12 22:48:48 +00:00
|
|
|
{
|
2019-03-25 10:53:13 +00:00
|
|
|
shil_opcode& op = block->oplist[current_opid];
|
2019-01-12 22:48:48 +00:00
|
|
|
|
2019-03-25 10:53:13 +00:00
|
|
|
regalloc.OpBegin(&op, current_opid);
|
2019-01-12 22:48:48 +00:00
|
|
|
|
2019-05-10 20:31:59 +00:00
|
|
|
switch (op.op)
|
|
|
|
{
|
2015-07-13 21:56:42 +00:00
|
|
|
case shop_ifb:
|
2019-05-10 20:31:59 +00:00
|
|
|
if (mmu_enabled())
|
2015-07-13 21:56:42 +00:00
|
|
|
{
|
2019-05-10 20:31:59 +00:00
|
|
|
mov(call_regs64[1], reinterpret_cast<uintptr_t>(*OpDesc[op.rs3._imm]->oph)); // op handler
|
|
|
|
mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
|
|
|
}
|
2019-04-18 12:15:01 +00:00
|
|
|
|
2019-05-10 20:31:59 +00:00
|
|
|
if (op.rs1._imm)
|
|
|
|
{
|
|
|
|
mov(rax, (size_t)&next_pc);
|
|
|
|
mov(dword[rax], op.rs2._imm);
|
|
|
|
}
|
2019-03-25 10:53:13 +00:00
|
|
|
|
2019-05-10 20:31:59 +00:00
|
|
|
mov(call_regs[0], op.rs3._imm);
|
2019-03-25 10:53:13 +00:00
|
|
|
|
2019-05-10 20:31:59 +00:00
|
|
|
if (!mmu_enabled())
|
|
|
|
GenCall(OpDesc[op.rs3._imm]->oph);
|
|
|
|
else
|
|
|
|
GenCall(interpreter_fallback);
|
2019-05-16 14:57:35 +00:00
|
|
|
|
2015-07-13 21:56:42 +00:00
|
|
|
break;
|
|
|
|
|
2015-08-05 03:00:01 +00:00
|
|
|
case shop_jcond:
|
2015-07-13 21:56:42 +00:00
|
|
|
case shop_jdyn:
|
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
|
2019-06-18 11:16:42 +00:00
|
|
|
// This shouldn't happen since the block type would have been changed to static.
|
|
|
|
// But it doesn't hurt and is handy when partially disabling ssa for testing
|
|
|
|
if (op.rs1.is_imm())
|
|
|
|
{
|
|
|
|
if (op.rs2.is_imm())
|
|
|
|
mov(rd, op.rs1._imm + op.rs2._imm);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(rd, op.rs1._imm);
|
|
|
|
verify(op.rs2.is_null());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Xbyak::Reg32 rs1 = regalloc.MapRegister(op.rs1);
|
|
|
|
if (rd != rs1)
|
|
|
|
mov(rd, rs1);
|
|
|
|
if (op.rs2.is_imm())
|
|
|
|
add(rd, op.rs2._imm);
|
|
|
|
}
|
2015-07-13 21:56:42 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_mov32:
|
|
|
|
{
|
|
|
|
verify(op.rd.is_reg());
|
|
|
|
verify(op.rs1.is_reg() || op.rs1.is_imm());
|
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
if (regalloc.IsAllocf(op.rd))
|
|
|
|
shil_param_to_host_reg(op.rs1, regalloc.MapXRegister(op.rd));
|
|
|
|
else
|
|
|
|
shil_param_to_host_reg(op.rs1, regalloc.MapRegister(op.rd));
|
2015-07-14 18:17:45 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_mov64:
|
|
|
|
{
|
2019-01-14 20:15:36 +00:00
|
|
|
verify(op.rd.is_r64());
|
|
|
|
verify(op.rs1.is_r64());
|
2015-07-14 18:17:45 +00:00
|
|
|
|
2019-01-14 20:15:36 +00:00
|
|
|
#ifdef EXPLODE_SPANS
|
|
|
|
movss(regalloc.MapXRegister(op.rd, 0), regalloc.MapXRegister(op.rs1, 0));
|
|
|
|
movss(regalloc.MapXRegister(op.rd, 1), regalloc.MapXRegister(op.rs1, 1));
|
|
|
|
#else
|
|
|
|
mov(rax, (uintptr_t)op.rs1.reg_ptr());
|
|
|
|
mov(rax, qword[rax]);
|
2019-01-12 22:48:48 +00:00
|
|
|
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
|
|
|
mov(qword[rcx], rax);
|
2019-01-14 20:15:36 +00:00
|
|
|
#endif
|
2015-07-14 18:17:45 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_readm:
|
2019-04-29 16:23:00 +00:00
|
|
|
if (!GenReadMemImmediate(op, block))
|
2019-01-16 12:39:52 +00:00
|
|
|
{
|
|
|
|
// Not an immediate address
|
|
|
|
shil_param_to_host_reg(op.rs1, call_regs[0]);
|
|
|
|
if (!op.rs3.is_null())
|
|
|
|
{
|
|
|
|
if (op.rs3.is_imm())
|
|
|
|
add(call_regs[0], op.rs3._imm);
|
2019-03-25 10:53:13 +00:00
|
|
|
else if (regalloc.IsAllocg(op.rs3))
|
2019-01-18 16:02:50 +00:00
|
|
|
add(call_regs[0], regalloc.MapRegister(op.rs3));
|
2019-03-25 10:53:13 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(rax, (uintptr_t)op.rs3.reg_ptr());
|
|
|
|
add(call_regs[0], dword[rax]);
|
|
|
|
}
|
2019-01-16 12:39:52 +00:00
|
|
|
}
|
2019-04-29 16:23:00 +00:00
|
|
|
if (!optimise || !GenReadMemoryFast(op, block))
|
|
|
|
GenReadMemorySlow(op, block);
|
2019-03-25 10:53:13 +00:00
|
|
|
|
2019-04-29 16:23:00 +00:00
|
|
|
u32 size = op.flags & 0x7f;
|
2019-01-16 12:39:52 +00:00
|
|
|
if (size != 8)
|
2019-06-10 12:48:54 +00:00
|
|
|
host_reg_to_shil_param(op.rd, eax);
|
2019-01-16 12:39:52 +00:00
|
|
|
else {
|
2019-01-14 20:15:36 +00:00
|
|
|
#ifdef EXPLODE_SPANS
|
2019-03-25 10:53:13 +00:00
|
|
|
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1))
|
|
|
|
{
|
2019-06-10 12:48:54 +00:00
|
|
|
movd(regalloc.MapXRegister(op.rd, 0), eax);
|
|
|
|
shr(rax, 32);
|
|
|
|
movd(regalloc.MapXRegister(op.rd, 1), eax);
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
|
|
|
else
|
2019-01-14 20:15:36 +00:00
|
|
|
#endif
|
2019-03-25 10:53:13 +00:00
|
|
|
{
|
2019-06-10 12:48:54 +00:00
|
|
|
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
|
|
|
mov(qword[rcx], rax);
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
2019-01-16 12:39:52 +00:00
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
}
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
2015-07-13 21:56:42 +00:00
|
|
|
|
2015-07-14 18:17:45 +00:00
|
|
|
case shop_writem:
|
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
if (!GenWriteMemImmediate(op, block))
|
2019-01-12 22:48:48 +00:00
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
shil_param_to_host_reg(op.rs1, call_regs[0]);
|
|
|
|
if (!op.rs3.is_null())
|
2019-03-25 10:53:13 +00:00
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
if (op.rs3.is_imm())
|
|
|
|
add(call_regs[0], op.rs3._imm);
|
|
|
|
else if (regalloc.IsAllocg(op.rs3))
|
|
|
|
add(call_regs[0], regalloc.MapRegister(op.rs3));
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(rax, (uintptr_t)op.rs3.reg_ptr());
|
|
|
|
add(call_regs[0], dword[rax]);
|
|
|
|
}
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
2015-07-14 18:17:45 +00:00
|
|
|
|
2019-06-10 11:57:10 +00:00
|
|
|
u32 size = op.flags & 0x7f;
|
|
|
|
if (size != 8)
|
|
|
|
shil_param_to_host_reg(op.rs2, call_regs[1]);
|
|
|
|
else {
|
2019-01-14 20:15:36 +00:00
|
|
|
#ifdef EXPLODE_SPANS
|
2019-06-10 11:57:10 +00:00
|
|
|
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1))
|
|
|
|
{
|
|
|
|
movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1));
|
|
|
|
shl(call_regs64[1], 32);
|
|
|
|
movd(eax, regalloc.MapXRegister(op.rs2, 0));
|
|
|
|
or_(call_regs64[1], rax);
|
|
|
|
}
|
|
|
|
else
|
2019-01-14 20:15:36 +00:00
|
|
|
#endif
|
2019-06-10 11:57:10 +00:00
|
|
|
{
|
|
|
|
mov(rax, (uintptr_t)op.rs2.reg_ptr());
|
|
|
|
mov(call_regs64[1], qword[rax]);
|
|
|
|
}
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
2019-06-10 11:57:10 +00:00
|
|
|
if (!optimise || !GenWriteMemoryFast(op, block))
|
|
|
|
GenWriteMemorySlow(op, block);
|
2019-01-12 22:48:48 +00:00
|
|
|
}
|
2015-07-13 21:56:42 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2019-01-14 20:15:36 +00:00
|
|
|
#ifndef CANONICAL_TEST
|
2019-01-12 22:48:48 +00:00
|
|
|
case shop_sync_sr:
|
2019-01-14 20:15:36 +00:00
|
|
|
GenCall(UpdateSR);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
case shop_sync_fpscr:
|
2019-01-14 20:15:36 +00:00
|
|
|
GenCall(UpdateFPSCR);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
2019-01-14 20:15:36 +00:00
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
case shop_swaplb:
|
2019-01-14 20:15:36 +00:00
|
|
|
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
|
|
|
|
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
|
2019-06-10 11:57:10 +00:00
|
|
|
ror(regalloc.MapRegister(op.rd).cvt16(), 8);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
2019-01-14 20:15:36 +00:00
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
case shop_neg:
|
|
|
|
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
|
|
|
|
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
|
|
|
|
neg(regalloc.MapRegister(op.rd));
|
|
|
|
break;
|
|
|
|
case shop_not:
|
|
|
|
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
|
|
|
|
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
|
2019-01-16 13:44:40 +00:00
|
|
|
not_(regalloc.MapRegister(op.rd));
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_and:
|
2019-01-16 13:44:40 +00:00
|
|
|
GenBinaryOp(op, &BlockCompiler::and_);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
case shop_or:
|
2019-01-16 13:44:40 +00:00
|
|
|
GenBinaryOp(op, &BlockCompiler::or_);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
case shop_xor:
|
2019-01-16 13:44:40 +00:00
|
|
|
GenBinaryOp(op, &BlockCompiler::xor_);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
case shop_add:
|
|
|
|
GenBinaryOp(op, &BlockCompiler::add);
|
|
|
|
break;
|
|
|
|
case shop_sub:
|
|
|
|
GenBinaryOp(op, &BlockCompiler::sub);
|
|
|
|
break;
|
|
|
|
|
|
|
|
#define SHIFT_OP(natop) \
|
|
|
|
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1)) \
|
|
|
|
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1)); \
|
|
|
|
if (op.rs2.is_imm()) \
|
|
|
|
natop(regalloc.MapRegister(op.rd), op.rs2._imm); \
|
2019-04-29 16:23:00 +00:00
|
|
|
else \
|
|
|
|
die("Unsupported operand");
|
2019-01-12 22:48:48 +00:00
|
|
|
case shop_shl:
|
|
|
|
SHIFT_OP(shl)
|
|
|
|
break;
|
|
|
|
case shop_shr:
|
|
|
|
SHIFT_OP(shr)
|
|
|
|
break;
|
|
|
|
case shop_sar:
|
|
|
|
SHIFT_OP(sar)
|
|
|
|
break;
|
|
|
|
case shop_ror:
|
|
|
|
SHIFT_OP(ror)
|
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_adc:
|
2019-06-10 11:57:10 +00:00
|
|
|
{
|
|
|
|
cmp(regalloc.MapRegister(op.rs3), 1); // C = ~rs3
|
|
|
|
Xbyak::Reg32 rs2;
|
|
|
|
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
|
|
|
|
if (op.rs2.is_reg())
|
|
|
|
{
|
|
|
|
rs2 = regalloc.MapRegister(op.rs2);
|
|
|
|
if (regalloc.mapg(op.rd) == regalloc.mapg(op.rs2))
|
|
|
|
{
|
|
|
|
mov(ecx, rs2);
|
|
|
|
rs2 = ecx;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (op.rs1.is_imm())
|
|
|
|
mov(rd, op.rs1.imm_value());
|
|
|
|
else if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
|
|
|
|
mov(rd, regalloc.MapRegister(op.rs1));
|
|
|
|
cmc(); // C = rs3
|
|
|
|
if (op.rs2.is_reg())
|
|
|
|
adc(rd, rs2); // (C,rd)=rs1+rs2+rs3(C)
|
|
|
|
else
|
|
|
|
adc(rd, op.rs2.imm_value());
|
|
|
|
setc(regalloc.MapRegister(op.rd2).cvt8()); // rd2 = C
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
2019-05-26 11:30:05 +00:00
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
/* FIXME buggy
|
|
|
|
case shop_sbc:
|
|
|
|
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
|
|
|
|
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
|
|
|
|
cmp(regalloc.MapRegister(op.rs3), 1); // C = ~rs3
|
|
|
|
cmc(); // C = rs3
|
|
|
|
mov(ecx, 1);
|
|
|
|
mov(regalloc.MapRegister(op.rd2), 0);
|
|
|
|
mov(eax, regalloc.MapRegister(op.rs2));
|
|
|
|
neg(eax);
|
|
|
|
adc(regalloc.MapRegister(op.rd), eax); // (C,rd)=rs1-rs2+rs3(C)
|
|
|
|
cmovc(regalloc.MapRegister(op.rd2), ecx); // rd2 = C
|
|
|
|
break;
|
|
|
|
*/
|
2019-05-26 11:30:05 +00:00
|
|
|
case shop_negc:
|
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
Xbyak::Reg32 rs2;
|
|
|
|
if (op.rs2.is_reg())
|
|
|
|
{
|
|
|
|
rs2 = regalloc.MapRegister(op.rs2);
|
|
|
|
if (regalloc.mapg(op.rd) == regalloc.mapg(op.rs2))
|
|
|
|
{
|
|
|
|
mov(ecx, rs2);
|
|
|
|
rs2 = ecx;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
|
|
|
|
if (op.rs1.is_imm())
|
|
|
|
mov(rd, op.rs1.imm_value());
|
|
|
|
else if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
|
|
|
|
mov(rd, regalloc.MapRegister(op.rs1));
|
|
|
|
Xbyak::Reg64 rd64 = rd.cvt64();
|
2019-05-26 11:30:05 +00:00
|
|
|
neg(rd64);
|
2019-06-10 11:57:10 +00:00
|
|
|
if (op.rs2.is_imm())
|
|
|
|
sub(rd64, op.rs2.imm_value());
|
|
|
|
else
|
|
|
|
sub(rd64, rs2.cvt64());
|
2019-05-26 11:30:05 +00:00
|
|
|
Xbyak::Reg64 rd2_64 = regalloc.MapRegister(op.rd2).cvt64();
|
|
|
|
mov(rd2_64, rd64);
|
|
|
|
shr(rd2_64, 63);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
case shop_rocr:
|
|
|
|
case shop_rocl:
|
2019-06-10 11:57:10 +00:00
|
|
|
{
|
|
|
|
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
|
|
|
|
cmp(regalloc.MapRegister(op.rs2), 1); // C = ~rs2
|
|
|
|
if (op.rs1.is_imm())
|
|
|
|
mov(rd, op.rs1.imm_value());
|
|
|
|
else if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
|
|
|
|
mov(rd, regalloc.MapRegister(op.rs1));
|
|
|
|
cmc(); // C = rs2
|
|
|
|
if (op.op == shop_rocr)
|
|
|
|
rcr(rd, 1);
|
|
|
|
else
|
|
|
|
rcl(rd, 1);
|
|
|
|
setc(al);
|
|
|
|
movzx(regalloc.MapRegister(op.rd2), al); // rd2 = C
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_shld:
|
|
|
|
case shop_shad:
|
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
if (op.rs2.is_reg())
|
|
|
|
mov(ecx, regalloc.MapRegister(op.rs2));
|
|
|
|
else
|
|
|
|
// This shouldn't happen. If arg is imm -> shop_shl/shr/sar
|
|
|
|
mov(ecx, op.rs2.imm_value());
|
|
|
|
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
|
|
|
|
if (op.rs1.is_imm())
|
|
|
|
mov(rd, op.rs1.imm_value());
|
|
|
|
else if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
|
|
|
|
mov(rd, regalloc.MapRegister(op.rs1));
|
2019-01-12 22:48:48 +00:00
|
|
|
Xbyak::Label negative_shift;
|
|
|
|
Xbyak::Label non_zero;
|
|
|
|
Xbyak::Label exit;
|
|
|
|
|
|
|
|
cmp(ecx, 0);
|
|
|
|
js(negative_shift);
|
2019-06-10 11:57:10 +00:00
|
|
|
shl(rd, cl);
|
2019-01-12 22:48:48 +00:00
|
|
|
jmp(exit);
|
|
|
|
|
|
|
|
L(negative_shift);
|
|
|
|
test(ecx, 0x1f);
|
|
|
|
jnz(non_zero);
|
|
|
|
if (op.op == shop_shld)
|
2019-06-10 11:57:10 +00:00
|
|
|
xor_(rd, rd);
|
2019-01-12 22:48:48 +00:00
|
|
|
else
|
2019-06-10 11:57:10 +00:00
|
|
|
sar(rd, 31);
|
2019-01-12 22:48:48 +00:00
|
|
|
jmp(exit);
|
|
|
|
|
|
|
|
L(non_zero);
|
|
|
|
neg(ecx);
|
|
|
|
if (op.op == shop_shld)
|
2019-06-10 11:57:10 +00:00
|
|
|
shr(rd, cl);
|
2019-01-12 22:48:48 +00:00
|
|
|
else
|
2019-06-10 11:57:10 +00:00
|
|
|
sar(rd, cl);
|
2019-01-12 22:48:48 +00:00
|
|
|
L(exit);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_test:
|
|
|
|
case shop_seteq:
|
|
|
|
case shop_setge:
|
|
|
|
case shop_setgt:
|
|
|
|
case shop_setae:
|
|
|
|
case shop_setab:
|
|
|
|
{
|
|
|
|
if (op.op == shop_test)
|
|
|
|
{
|
|
|
|
if (op.rs2.is_imm())
|
|
|
|
test(regalloc.MapRegister(op.rs1), op.rs2._imm);
|
|
|
|
else
|
|
|
|
test(regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (op.rs2.is_imm())
|
|
|
|
cmp(regalloc.MapRegister(op.rs1), op.rs2._imm);
|
|
|
|
else
|
|
|
|
cmp(regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
|
|
|
|
}
|
|
|
|
switch (op.op)
|
|
|
|
{
|
|
|
|
case shop_test:
|
|
|
|
case shop_seteq:
|
2019-01-13 11:27:02 +00:00
|
|
|
sete(al);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
case shop_setge:
|
2019-01-13 11:27:02 +00:00
|
|
|
setge(al);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
case shop_setgt:
|
2019-01-13 11:27:02 +00:00
|
|
|
setg(al);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
case shop_setae:
|
2019-01-13 11:27:02 +00:00
|
|
|
setae(al);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
case shop_setab:
|
2019-01-13 11:27:02 +00:00
|
|
|
seta(al);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
die("invalid case");
|
|
|
|
break;
|
|
|
|
}
|
2019-01-13 11:27:02 +00:00
|
|
|
movzx(regalloc.MapRegister(op.rd), al);
|
2019-01-12 22:48:48 +00:00
|
|
|
}
|
|
|
|
break;
|
2019-06-18 11:16:42 +00:00
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
case shop_setpeq:
|
2019-06-18 11:16:42 +00:00
|
|
|
{
|
|
|
|
Xbyak::Label end;
|
|
|
|
mov(ecx, regalloc.MapRegister(op.rs1));
|
|
|
|
if (op.rs2.is_r32i())
|
|
|
|
xor_(ecx, regalloc.MapRegister(op.rs2));
|
|
|
|
else
|
|
|
|
xor_(ecx, op.rs2._imm);
|
|
|
|
|
|
|
|
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
|
|
|
|
mov(rd, 1);
|
|
|
|
test(ecx, 0xFF000000);
|
|
|
|
je(end);
|
|
|
|
test(ecx, 0x00FF0000);
|
|
|
|
je(end);
|
|
|
|
test(ecx, 0x0000FF00);
|
|
|
|
je(end);
|
|
|
|
xor_(rd, rd);
|
|
|
|
test(cl, cl);
|
|
|
|
sete(rd.cvt8());
|
|
|
|
L(end);
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
2019-06-18 11:16:42 +00:00
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
case shop_mul_u16:
|
2019-06-10 11:57:10 +00:00
|
|
|
movzx(eax, regalloc.MapRegister(op.rs1).cvt16());
|
|
|
|
if (op.rs2.is_reg())
|
|
|
|
movzx(ecx, regalloc.MapRegister(op.rs2).cvt16());
|
|
|
|
else
|
|
|
|
mov(ecx, op.rs2._imm & 0xFFFF);
|
2019-01-12 22:48:48 +00:00
|
|
|
mul(ecx);
|
|
|
|
mov(regalloc.MapRegister(op.rd), eax);
|
|
|
|
break;
|
|
|
|
case shop_mul_s16:
|
2019-06-10 11:57:10 +00:00
|
|
|
movsx(eax, regalloc.MapRegister(op.rs1).cvt16());
|
|
|
|
if (op.rs2.is_reg())
|
|
|
|
movsx(ecx, regalloc.MapRegister(op.rs2).cvt16());
|
|
|
|
else
|
|
|
|
mov(ecx, (s32)(s16)op.rs2._imm);
|
2019-01-12 22:48:48 +00:00
|
|
|
mul(ecx);
|
|
|
|
mov(regalloc.MapRegister(op.rd), eax);
|
|
|
|
break;
|
|
|
|
case shop_mul_i32:
|
|
|
|
mov(eax, regalloc.MapRegister(op.rs1));
|
2019-06-10 11:57:10 +00:00
|
|
|
if (op.rs2.is_reg())
|
|
|
|
mul(regalloc.MapRegister(op.rs2));
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(ecx, op.rs2._imm);
|
|
|
|
mul(ecx);
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
mov(regalloc.MapRegister(op.rd), eax);
|
|
|
|
break;
|
|
|
|
case shop_mul_u64:
|
|
|
|
mov(eax, regalloc.MapRegister(op.rs1));
|
2019-06-10 11:57:10 +00:00
|
|
|
if (op.rs2.is_reg())
|
|
|
|
mov(ecx, regalloc.MapRegister(op.rs2));
|
|
|
|
else
|
|
|
|
mov(ecx, op.rs2._imm);
|
2019-01-14 20:15:36 +00:00
|
|
|
mul(rcx);
|
2019-01-12 22:48:48 +00:00
|
|
|
mov(regalloc.MapRegister(op.rd), eax);
|
2019-01-14 20:15:36 +00:00
|
|
|
shr(rax, 32);
|
|
|
|
mov(regalloc.MapRegister(op.rd2), eax);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
case shop_mul_s64:
|
2019-01-14 20:15:36 +00:00
|
|
|
movsxd(rax, regalloc.MapRegister(op.rs1));
|
2019-06-10 11:57:10 +00:00
|
|
|
if (op.rs2.is_reg())
|
|
|
|
movsxd(rcx, regalloc.MapRegister(op.rs2));
|
|
|
|
else
|
|
|
|
mov(rcx, (s64)(s32)op.rs2._imm);
|
2019-01-14 20:15:36 +00:00
|
|
|
mul(rcx);
|
2019-01-12 22:48:48 +00:00
|
|
|
mov(regalloc.MapRegister(op.rd), eax);
|
2019-01-14 20:15:36 +00:00
|
|
|
shr(rax, 32);
|
|
|
|
mov(regalloc.MapRegister(op.rd2), eax);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
2019-03-25 10:53:13 +00:00
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
case shop_pref:
|
2019-06-10 11:57:10 +00:00
|
|
|
if (op.rs1.is_imm())
|
|
|
|
{
|
|
|
|
// this test shouldn't be necessary
|
|
|
|
if ((op.rs1._imm & 0xFC000000) == 0xE0000000)
|
|
|
|
{
|
|
|
|
mov(call_regs[0], op.rs1._imm);
|
|
|
|
if (mmu_enabled())
|
|
|
|
{
|
|
|
|
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
|
|
|
|
|
|
|
GenCall(do_sqw_mmu_no_ex);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (CCN_MMUCR.AT == 1)
|
|
|
|
{
|
|
|
|
GenCall(do_sqw_mmu);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(call_regs64[1], (uintptr_t)sq_both);
|
|
|
|
GenCall(&do_sqw_nommu_local);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
2019-03-25 10:53:13 +00:00
|
|
|
{
|
|
|
|
Xbyak::Reg32 rn;
|
|
|
|
if (regalloc.IsAllocg(op.rs1))
|
|
|
|
{
|
|
|
|
rn = regalloc.MapRegister(op.rs1);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(rax, (uintptr_t)op.rs1.reg_ptr());
|
|
|
|
mov(eax, dword[rax]);
|
|
|
|
rn = eax;
|
|
|
|
}
|
|
|
|
mov(ecx, rn);
|
|
|
|
shr(ecx, 26);
|
|
|
|
cmp(ecx, 0x38);
|
|
|
|
Xbyak::Label no_sqw;
|
|
|
|
jne(no_sqw);
|
|
|
|
|
|
|
|
mov(call_regs[0], rn);
|
|
|
|
if (mmu_enabled())
|
|
|
|
{
|
|
|
|
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
|
|
|
|
|
|
|
GenCall(do_sqw_mmu_no_ex);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (CCN_MMUCR.AT == 1)
|
|
|
|
{
|
|
|
|
GenCall(do_sqw_mmu);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(call_regs64[1], (uintptr_t)sq_both);
|
|
|
|
GenCall(&do_sqw_nommu_local);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
L(no_sqw);
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
2019-03-25 10:53:13 +00:00
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
case shop_ext_s8:
|
|
|
|
mov(eax, regalloc.MapRegister(op.rs1));
|
|
|
|
movsx(regalloc.MapRegister(op.rd), al);
|
|
|
|
break;
|
|
|
|
case shop_ext_s16:
|
2019-06-10 11:57:10 +00:00
|
|
|
movsx(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1).cvt16());
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
|
2019-05-26 11:30:05 +00:00
|
|
|
case shop_xtrct:
|
2019-06-10 11:57:10 +00:00
|
|
|
{
|
|
|
|
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
|
2019-06-24 16:56:09 +00:00
|
|
|
Xbyak::Reg32 rs1 = ecx;
|
|
|
|
if (op.rs1.is_reg())
|
|
|
|
rs1 = regalloc.MapRegister(op.rs1);
|
|
|
|
else
|
|
|
|
mov(rs1, op.rs1.imm_value());
|
|
|
|
Xbyak::Reg32 rs2 = eax;
|
|
|
|
if (op.rs2.is_reg())
|
|
|
|
rs2 = regalloc.MapRegister(op.rs2);
|
|
|
|
else
|
|
|
|
mov(rs2, op.rs2.imm_value());
|
|
|
|
if (rd == rs2)
|
2019-06-10 11:57:10 +00:00
|
|
|
{
|
|
|
|
shl(rd, 16);
|
|
|
|
mov(eax, rs1);
|
|
|
|
shr(eax, 16);
|
|
|
|
or_(rd, eax);
|
|
|
|
break;
|
|
|
|
}
|
2019-06-24 16:56:09 +00:00
|
|
|
else if (rd != rs1)
|
2019-06-10 11:57:10 +00:00
|
|
|
{
|
|
|
|
mov(rd, rs1);
|
|
|
|
}
|
|
|
|
shr(rd, 16);
|
|
|
|
mov(eax, rs2);
|
|
|
|
shl(eax, 16);
|
|
|
|
or_(rd, eax);
|
|
|
|
}
|
2019-05-26 11:30:05 +00:00
|
|
|
break;
|
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
//
|
|
|
|
// FPU
|
|
|
|
//
|
|
|
|
|
|
|
|
case shop_fadd:
|
|
|
|
GenBinaryFOp(op, &BlockCompiler::addss);
|
|
|
|
break;
|
|
|
|
case shop_fsub:
|
|
|
|
GenBinaryFOp(op, &BlockCompiler::subss);
|
|
|
|
break;
|
|
|
|
case shop_fmul:
|
|
|
|
GenBinaryFOp(op, &BlockCompiler::mulss);
|
|
|
|
break;
|
|
|
|
case shop_fdiv:
|
|
|
|
GenBinaryFOp(op, &BlockCompiler::divss);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_fabs:
|
2019-06-18 11:16:42 +00:00
|
|
|
movd(eax, regalloc.MapXRegister(op.rs1));
|
|
|
|
and_(eax, 0x7FFFFFFF);
|
|
|
|
movd(regalloc.MapXRegister(op.rd), eax);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
case shop_fneg:
|
2019-06-18 11:16:42 +00:00
|
|
|
movd(eax, regalloc.MapXRegister(op.rs1));
|
|
|
|
xor_(eax, 0x80000000);
|
|
|
|
movd(regalloc.MapXRegister(op.rd), eax);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_fsqrt:
|
|
|
|
sqrtss(regalloc.MapXRegister(op.rd), regalloc.MapXRegister(op.rs1));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_fmac:
|
2019-01-18 16:02:50 +00:00
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
Xbyak::Xmm rs1 = regalloc.MapXRegister(op.rs1);
|
|
|
|
Xbyak::Xmm rs2 = regalloc.MapXRegister(op.rs2);
|
|
|
|
Xbyak::Xmm rs3 = regalloc.MapXRegister(op.rs3);
|
|
|
|
Xbyak::Xmm rd = regalloc.MapXRegister(op.rd);
|
|
|
|
if (rd == rs2)
|
|
|
|
{
|
|
|
|
movss(xmm1, rs2);
|
|
|
|
rs2 = xmm1;
|
|
|
|
}
|
|
|
|
if (rd == rs3)
|
|
|
|
{
|
|
|
|
movss(xmm2, rs3);
|
|
|
|
rs3 = xmm2;
|
|
|
|
}
|
|
|
|
if (op.rs1.is_imm())
|
|
|
|
{
|
|
|
|
mov(eax, op.rs1._imm);
|
|
|
|
movd(rd, eax);
|
|
|
|
}
|
|
|
|
else if (rd != rs1)
|
|
|
|
{
|
|
|
|
movss(rd, rs1);
|
|
|
|
}
|
|
|
|
if (cpu.has(Xbyak::util::Cpu::tFMA))
|
|
|
|
vfmadd231ss(rd, rs2, rs3);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
movss(xmm0, rs2);
|
|
|
|
mulss(xmm0, rs3);
|
|
|
|
addss(rd, xmm0);
|
|
|
|
}
|
2019-01-18 16:02:50 +00:00
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_fsrra:
|
2019-02-04 09:48:22 +00:00
|
|
|
// RSQRTSS has an |error| <= 1.5*2^-12 where the SH4 FSRRA needs |error| <= 2^-21
|
|
|
|
sqrtss(xmm0, regalloc.MapXRegister(op.rs1));
|
|
|
|
mov(eax, 0x3f800000); // 1.0
|
|
|
|
movd(regalloc.MapXRegister(op.rd), eax);
|
|
|
|
divss(regalloc.MapXRegister(op.rd), xmm0);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_fsetgt:
|
|
|
|
case shop_fseteq:
|
2019-01-13 11:27:02 +00:00
|
|
|
ucomiss(regalloc.MapXRegister(op.rs1), regalloc.MapXRegister(op.rs2));
|
2019-01-12 22:48:48 +00:00
|
|
|
if (op.op == shop_fsetgt)
|
2019-01-13 11:27:02 +00:00
|
|
|
{
|
|
|
|
seta(al);
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
else
|
2019-01-13 11:27:02 +00:00
|
|
|
{
|
|
|
|
//special case
|
|
|
|
//We want to take in account the 'unordered' case on the fpu
|
|
|
|
lahf();
|
|
|
|
test(ah, 0x44);
|
|
|
|
setnp(al);
|
|
|
|
}
|
|
|
|
movzx(regalloc.MapRegister(op.rd), al);
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_fsca:
|
2019-06-18 11:16:42 +00:00
|
|
|
if (op.rs1.is_imm())
|
|
|
|
mov(rax, op.rs1._imm & 0xFFFF);
|
|
|
|
else
|
|
|
|
movzx(rax, regalloc.MapRegister(op.rs1).cvt16());
|
2019-01-13 11:27:02 +00:00
|
|
|
mov(rcx, (uintptr_t)&sin_table);
|
2019-01-14 20:15:36 +00:00
|
|
|
#ifdef EXPLODE_SPANS
|
|
|
|
movss(regalloc.MapXRegister(op.rd, 0), dword[rcx + rax * 8]);
|
|
|
|
movss(regalloc.MapXRegister(op.rd, 1), dword[rcx + (rax * 8) + 4]);
|
|
|
|
#else
|
2019-01-13 11:27:02 +00:00
|
|
|
mov(rcx, qword[rcx + rax * 8]);
|
|
|
|
mov(rdx, (uintptr_t)op.rd.reg_ptr());
|
|
|
|
mov(qword[rdx], rcx);
|
2019-01-14 20:15:36 +00:00
|
|
|
#endif
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
2019-12-17 19:23:58 +00:00
|
|
|
/*
|
2019-01-12 22:48:48 +00:00
|
|
|
case shop_fipr:
|
2019-01-18 16:02:50 +00:00
|
|
|
{
|
2019-12-13 11:27:43 +00:00
|
|
|
// Using doubles for better precision
|
2019-01-18 16:02:50 +00:00
|
|
|
const Xbyak::Xmm &rd = regalloc.MapXRegister(op.rd);
|
2019-12-13 11:27:43 +00:00
|
|
|
mov(rax, (size_t)op.rs1.reg_ptr());
|
|
|
|
mov(rcx, (size_t)op.rs2.reg_ptr());
|
|
|
|
pxor(xmm1, xmm1);
|
|
|
|
pxor(xmm0, xmm0);
|
|
|
|
pxor(xmm2, xmm2);
|
|
|
|
cvtss2sd(xmm1, dword[rax]);
|
|
|
|
cvtss2sd(xmm0, dword[rcx]);
|
|
|
|
mulsd(xmm0, xmm1);
|
|
|
|
pxor(xmm1, xmm1);
|
|
|
|
cvtss2sd(xmm2, dword[rax + 4]);
|
|
|
|
cvtss2sd(xmm1, dword[rcx + 4]);
|
|
|
|
mulsd(xmm1, xmm2);
|
|
|
|
pxor(xmm2, xmm2);
|
|
|
|
cvtss2sd(xmm2, dword[rax + 8]);
|
|
|
|
addsd(xmm1, xmm0);
|
|
|
|
pxor(xmm0, xmm0);
|
|
|
|
cvtss2sd(xmm0, dword[rcx + 8]);
|
|
|
|
mulsd(xmm0, xmm2);
|
|
|
|
pxor(xmm2, xmm2);
|
|
|
|
cvtss2sd(xmm2, dword[rax + 12]);
|
|
|
|
addsd(xmm1, xmm0);
|
|
|
|
pxor(xmm0, xmm0);
|
|
|
|
cvtss2sd(xmm0, dword[rcx + 12]);
|
|
|
|
mulsd(xmm0, xmm2);
|
|
|
|
addsd(xmm0, xmm1);
|
|
|
|
cvtsd2ss(rd, xmm0);
|
2019-01-18 16:02:50 +00:00
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case shop_ftrv:
|
2019-01-14 20:15:36 +00:00
|
|
|
mov(rax, (uintptr_t)op.rs1.reg_ptr());
|
2019-05-10 20:31:59 +00:00
|
|
|
#if 0 // vfmadd231ps and vmulps cause rounding problems
|
2019-01-18 16:02:50 +00:00
|
|
|
if (cpu.has(Xbyak::util::Cpu::tFMA))
|
|
|
|
{
|
|
|
|
movaps(xmm0, xword[rax]); // fn[0-4]
|
|
|
|
mov(rax, (uintptr_t)op.rs2.reg_ptr()); // fm[0-15]
|
|
|
|
|
|
|
|
pshufd(xmm1, xmm0, 0x00); // fn[0]
|
|
|
|
vmulps(xmm2, xmm1, xword[rax]); // fm[0-3]
|
|
|
|
pshufd(xmm1, xmm0, 0x55); // fn[1]
|
|
|
|
vfmadd231ps(xmm2, xmm1, xword[rax + 16]); // fm[4-7]
|
|
|
|
pshufd(xmm1, xmm0, 0xaa); // fn[2]
|
|
|
|
vfmadd231ps(xmm2, xmm1, xword[rax + 32]); // fm[8-11]
|
|
|
|
pshufd(xmm1, xmm0, 0xff); // fn[3]
|
|
|
|
vfmadd231ps(xmm2, xmm1, xword[rax + 48]); // fm[12-15]
|
|
|
|
mov(rax, (uintptr_t)op.rd.reg_ptr());
|
|
|
|
movaps(xword[rax], xmm2);
|
|
|
|
}
|
|
|
|
else
|
2019-01-22 15:17:25 +00:00
|
|
|
#endif
|
2019-01-18 16:02:50 +00:00
|
|
|
{
|
|
|
|
movaps(xmm3, xword[rax]); //xmm0=vector
|
|
|
|
pshufd(xmm0, xmm3, 0); //xmm0={v0}
|
|
|
|
pshufd(xmm1, xmm3, 0x55); //xmm1={v1}
|
|
|
|
pshufd(xmm2, xmm3, 0xaa); //xmm2={v2}
|
|
|
|
pshufd(xmm3, xmm3, 0xff); //xmm3={v3}
|
|
|
|
|
|
|
|
//do the matrix mult !
|
|
|
|
mov(rax, (uintptr_t)op.rs2.reg_ptr());
|
|
|
|
mulps(xmm0, xword[rax + 0]); //v0*=vm0
|
|
|
|
mulps(xmm1, xword[rax + 16]); //v1*=vm1
|
|
|
|
mulps(xmm2, xword[rax + 32]); //v2*=vm2
|
|
|
|
mulps(xmm3, xword[rax + 48]); //v3*=vm3
|
|
|
|
|
|
|
|
addps(xmm0, xmm1); //sum it all up
|
|
|
|
addps(xmm2, xmm3);
|
|
|
|
addps(xmm0, xmm2);
|
|
|
|
|
|
|
|
mov(rax, (uintptr_t)op.rd.reg_ptr());
|
|
|
|
movaps(xword[rax], xmm0);
|
|
|
|
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
2019-12-17 19:23:58 +00:00
|
|
|
*/
|
2019-01-12 22:48:48 +00:00
|
|
|
case shop_frswap:
|
2019-01-14 20:15:36 +00:00
|
|
|
mov(rax, (uintptr_t)op.rs1.reg_ptr());
|
|
|
|
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
2019-01-18 16:02:50 +00:00
|
|
|
if (cpu.has(Xbyak::util::Cpu::tAVX512F))
|
|
|
|
{
|
|
|
|
vmovaps(zmm0, zword[rax]);
|
|
|
|
vmovaps(zmm1, zword[rcx]);
|
|
|
|
vmovaps(zword[rax], zmm1);
|
|
|
|
vmovaps(zword[rcx], zmm0);
|
|
|
|
}
|
|
|
|
else if (cpu.has(Xbyak::util::Cpu::tAVX))
|
|
|
|
{
|
|
|
|
vmovaps(ymm0, yword[rax]);
|
|
|
|
vmovaps(ymm1, yword[rcx]);
|
|
|
|
vmovaps(yword[rax], ymm1);
|
|
|
|
vmovaps(yword[rcx], ymm0);
|
|
|
|
|
|
|
|
vmovaps(ymm0, yword[rax + 32]);
|
|
|
|
vmovaps(ymm1, yword[rcx + 32]);
|
|
|
|
vmovaps(yword[rax + 32], ymm1);
|
|
|
|
vmovaps(yword[rcx + 32], ymm0);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (int i = 0; i < 4; i++)
|
|
|
|
{
|
|
|
|
movaps(xmm0, xword[rax + (i * 16)]);
|
|
|
|
movaps(xmm1, xword[rcx + (i * 16)]);
|
|
|
|
movaps(xword[rax + (i * 16)], xmm1);
|
|
|
|
movaps(xword[rcx + (i * 16)], xmm0);
|
|
|
|
}
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
2019-01-14 20:15:36 +00:00
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
case shop_cvt_f2i_t:
|
2019-06-18 11:16:42 +00:00
|
|
|
{
|
|
|
|
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
|
|
|
|
cvttss2si(rd, regalloc.MapXRegister(op.rs1));
|
|
|
|
mov(eax, 0x7fffffff);
|
|
|
|
cmp(rd, 0x7fffff80); // 2147483520.0f
|
|
|
|
cmovge(rd, eax);
|
2019-09-07 10:08:08 +00:00
|
|
|
cmp(rd, 0x80000000); // indefinite integer
|
|
|
|
Xbyak::Label done;
|
|
|
|
jne(done, T_SHORT);
|
|
|
|
movd(ecx, regalloc.MapXRegister(op.rs1));
|
|
|
|
cmp(ecx, 0);
|
|
|
|
cmovge(rd, eax); // restore the correct sign
|
|
|
|
L(done);
|
2019-06-18 11:16:42 +00:00
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
break;
|
|
|
|
case shop_cvt_i2f_n:
|
|
|
|
case shop_cvt_i2f_z:
|
|
|
|
cvtsi2ss(regalloc.MapXRegister(op.rd), regalloc.MapRegister(op.rs1));
|
|
|
|
break;
|
2019-01-14 20:15:36 +00:00
|
|
|
#endif
|
2019-01-12 22:48:48 +00:00
|
|
|
|
2015-07-13 21:56:42 +00:00
|
|
|
default:
|
|
|
|
shil_chf[op.op](&op);
|
|
|
|
break;
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
regalloc.OpEnd(&op);
|
2015-07-13 21:56:42 +00:00
|
|
|
}
|
2019-04-18 12:15:01 +00:00
|
|
|
regalloc.Cleanup();
|
2019-04-29 16:23:00 +00:00
|
|
|
current_opid = -1;
|
2015-07-13 21:56:42 +00:00
|
|
|
|
2015-08-05 03:00:01 +00:00
|
|
|
mov(rax, (size_t)&next_pc);
|
|
|
|
|
|
|
|
switch (block->BlockType) {
|
|
|
|
|
|
|
|
case BET_StaticJump:
|
|
|
|
case BET_StaticCall:
|
|
|
|
//next_pc = block->BranchBlock;
|
|
|
|
mov(dword[rax], block->BranchBlock);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BET_Cond_0:
|
|
|
|
case BET_Cond_1:
|
|
|
|
{
|
|
|
|
//next_pc = next_pc_value;
|
|
|
|
//if (*jdyn == 0)
|
|
|
|
//next_pc = branch_pc_value;
|
|
|
|
|
|
|
|
mov(dword[rax], block->NextBlock);
|
|
|
|
|
|
|
|
if (block->has_jcond)
|
|
|
|
mov(rdx, (size_t)&Sh4cntx.jdyn);
|
|
|
|
else
|
|
|
|
mov(rdx, (size_t)&sr.T);
|
|
|
|
|
|
|
|
cmp(dword[rdx], block->BlockType & 1);
|
|
|
|
Xbyak::Label branch_not_taken;
|
|
|
|
|
|
|
|
jne(branch_not_taken, T_SHORT);
|
|
|
|
mov(dword[rax], block->BranchBlock);
|
|
|
|
L(branch_not_taken);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BET_DynamicJump:
|
|
|
|
case BET_DynamicCall:
|
|
|
|
case BET_DynamicRet:
|
|
|
|
//next_pc = *jdyn;
|
|
|
|
mov(rdx, (size_t)&Sh4cntx.jdyn);
|
|
|
|
mov(edx, dword[rdx]);
|
|
|
|
mov(dword[rax], edx);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BET_DynamicIntr:
|
|
|
|
case BET_StaticIntr:
|
|
|
|
if (block->BlockType == BET_DynamicIntr) {
|
|
|
|
//next_pc = *jdyn;
|
|
|
|
mov(rdx, (size_t)&Sh4cntx.jdyn);
|
|
|
|
mov(edx, dword[rdx]);
|
|
|
|
mov(dword[rax], edx);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
//next_pc = next_pc_value;
|
|
|
|
mov(dword[rax], block->NextBlock);
|
|
|
|
}
|
|
|
|
|
2019-01-14 20:15:36 +00:00
|
|
|
GenCall(UpdateINTC);
|
2015-08-05 03:00:01 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
die("Invalid block end type");
|
|
|
|
}
|
|
|
|
|
2019-03-25 10:53:13 +00:00
|
|
|
L(exit_block);
|
2019-01-12 22:48:48 +00:00
|
|
|
#ifdef _WIN32
|
2015-07-14 18:17:45 +00:00
|
|
|
add(rsp, 0x28);
|
2019-01-12 22:48:48 +00:00
|
|
|
#else
|
|
|
|
add(rsp, 0x8);
|
|
|
|
#endif
|
2015-07-13 21:56:42 +00:00
|
|
|
ret();
|
|
|
|
|
|
|
|
ready();
|
|
|
|
|
|
|
|
block->code = (DynarecCodeEntryPtr)getCode();
|
2019-03-25 10:53:13 +00:00
|
|
|
block->host_code_size = getSize();
|
2015-07-13 21:56:42 +00:00
|
|
|
|
|
|
|
emit_Skip(getSize());
|
|
|
|
}
|
|
|
|
|
2019-04-29 16:23:00 +00:00
|
|
|
void GenReadMemorySlow(const shil_opcode& op, RuntimeBlockInfo* block)
|
|
|
|
{
|
|
|
|
const u8 *start_addr = getCurr();
|
|
|
|
if (mmu_enabled())
|
|
|
|
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
|
|
|
|
|
|
|
u32 size = op.flags & 0x7f;
|
|
|
|
switch (size) {
|
|
|
|
case 1:
|
|
|
|
if (!mmu_enabled())
|
|
|
|
GenCall(ReadMem8);
|
|
|
|
else
|
2019-05-28 17:43:56 +00:00
|
|
|
GenCall(ReadMemNoEx<u8>, true);
|
2019-06-10 12:48:54 +00:00
|
|
|
movsx(eax, al);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
if (!mmu_enabled())
|
|
|
|
GenCall(ReadMem16);
|
|
|
|
else
|
2019-05-28 17:43:56 +00:00
|
|
|
GenCall(ReadMemNoEx<u16>, true);
|
2019-06-10 12:48:54 +00:00
|
|
|
movsx(eax, ax);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 4:
|
|
|
|
if (!mmu_enabled())
|
|
|
|
GenCall(ReadMem32);
|
|
|
|
else
|
2019-05-28 17:43:56 +00:00
|
|
|
GenCall(ReadMemNoEx<u32>, true);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
case 8:
|
|
|
|
if (!mmu_enabled())
|
|
|
|
GenCall(ReadMem64);
|
|
|
|
else
|
2019-05-28 17:43:56 +00:00
|
|
|
GenCall(ReadMemNoEx<u64>, true);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
die("1..8 bytes");
|
|
|
|
}
|
|
|
|
|
2019-05-10 20:31:59 +00:00
|
|
|
if (mmu_enabled() && vmem32_enabled())
|
2019-04-29 16:23:00 +00:00
|
|
|
{
|
|
|
|
Xbyak::Label quick_exit;
|
|
|
|
if (getCurr() - start_addr <= read_mem_op_size - 6)
|
|
|
|
jmp(quick_exit, T_NEAR);
|
|
|
|
while (getCurr() - start_addr < read_mem_op_size)
|
|
|
|
nop();
|
|
|
|
L(quick_exit);
|
|
|
|
verify(getCurr() - start_addr == read_mem_op_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void GenWriteMemorySlow(const shil_opcode& op, RuntimeBlockInfo* block)
|
|
|
|
{
|
|
|
|
const u8 *start_addr = getCurr();
|
|
|
|
if (mmu_enabled())
|
|
|
|
mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
|
|
|
|
|
|
|
u32 size = op.flags & 0x7f;
|
|
|
|
switch (size) {
|
|
|
|
case 1:
|
|
|
|
if (!mmu_enabled())
|
|
|
|
GenCall(WriteMem8);
|
|
|
|
else
|
2019-05-28 17:43:56 +00:00
|
|
|
GenCall(WriteMemNoEx<u8>, true);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
if (!mmu_enabled())
|
|
|
|
GenCall(WriteMem16);
|
|
|
|
else
|
2019-05-28 17:43:56 +00:00
|
|
|
GenCall(WriteMemNoEx<u16>, true);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
if (!mmu_enabled())
|
|
|
|
GenCall(WriteMem32);
|
|
|
|
else
|
2019-05-28 17:43:56 +00:00
|
|
|
GenCall(WriteMemNoEx<u32>, true);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
case 8:
|
|
|
|
if (!mmu_enabled())
|
|
|
|
GenCall(WriteMem64);
|
|
|
|
else
|
2019-05-28 17:43:56 +00:00
|
|
|
GenCall(WriteMemNoEx<u64>, true);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
die("1..8 bytes");
|
|
|
|
}
|
2019-05-10 20:31:59 +00:00
|
|
|
if (mmu_enabled() && vmem32_enabled())
|
2019-04-29 16:23:00 +00:00
|
|
|
{
|
|
|
|
Xbyak::Label quick_exit;
|
|
|
|
if (getCurr() - start_addr <= write_mem_op_size - 6)
|
|
|
|
jmp(quick_exit, T_NEAR);
|
|
|
|
while (getCurr() - start_addr < write_mem_op_size)
|
|
|
|
nop();
|
|
|
|
L(quick_exit);
|
|
|
|
verify(getCurr() - start_addr == write_mem_op_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void InitializeRewrite(RuntimeBlockInfo *block, size_t opid)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void FinalizeRewrite()
|
|
|
|
{
|
|
|
|
ready();
|
|
|
|
}
|
|
|
|
|
2019-01-14 20:15:36 +00:00
|
|
|
void ngen_CC_Start(const shil_opcode& op)
|
2015-07-14 18:17:45 +00:00
|
|
|
{
|
|
|
|
CC_pars.clear();
|
|
|
|
}
|
2015-07-13 21:56:42 +00:00
|
|
|
|
2019-01-14 20:15:36 +00:00
|
|
|
void ngen_CC_param(const shil_opcode& op, const shil_param& prm, CanonicalParamType tp) {
|
2015-07-14 18:17:45 +00:00
|
|
|
switch (tp)
|
|
|
|
{
|
2015-07-13 21:56:42 +00:00
|
|
|
|
2015-07-14 18:17:45 +00:00
|
|
|
case CPT_u32:
|
|
|
|
case CPT_ptr:
|
|
|
|
case CPT_f32:
|
2015-07-13 21:56:42 +00:00
|
|
|
{
|
2015-07-14 18:17:45 +00:00
|
|
|
CC_PS t = { tp, &prm };
|
|
|
|
CC_pars.push_back(t);
|
2015-07-13 21:56:42 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
2019-01-14 20:15:36 +00:00
|
|
|
// store from EAX
|
2015-07-14 18:17:45 +00:00
|
|
|
case CPT_u64rvL:
|
|
|
|
case CPT_u32rv:
|
|
|
|
mov(rcx, rax);
|
2019-01-12 22:48:48 +00:00
|
|
|
host_reg_to_shil_param(prm, ecx);
|
2015-07-14 18:17:45 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case CPT_u64rvH:
|
2019-01-14 20:15:36 +00:00
|
|
|
// assuming CPT_u64rvL has just been called
|
2015-07-14 18:17:45 +00:00
|
|
|
shr(rcx, 32);
|
2019-01-12 22:48:48 +00:00
|
|
|
host_reg_to_shil_param(prm, ecx);
|
2015-07-14 18:17:45 +00:00
|
|
|
break;
|
|
|
|
|
2019-01-14 20:15:36 +00:00
|
|
|
// store from xmm0
|
2015-07-14 18:17:45 +00:00
|
|
|
case CPT_f32rv:
|
2019-01-12 22:48:48 +00:00
|
|
|
host_reg_to_shil_param(prm, xmm0);
|
2019-01-14 20:15:36 +00:00
|
|
|
#ifdef EXPLODE_SPANS
|
|
|
|
// The x86 dynarec saves to mem as well
|
|
|
|
//mov(rax, (uintptr_t)prm.reg_ptr());
|
|
|
|
//movd(dword[rax], xmm0);
|
|
|
|
#endif
|
2015-07-14 18:17:45 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-14 20:15:36 +00:00
|
|
|
void ngen_CC_Call(const shil_opcode& op, void* function)
|
2015-07-14 18:17:45 +00:00
|
|
|
{
|
|
|
|
int regused = 0;
|
|
|
|
int xmmused = 0;
|
|
|
|
|
|
|
|
for (int i = CC_pars.size(); i-- > 0;)
|
2015-07-13 21:56:42 +00:00
|
|
|
{
|
2015-07-14 18:17:45 +00:00
|
|
|
verify(xmmused < 4 && regused < 4);
|
2019-01-14 20:15:36 +00:00
|
|
|
const shil_param& prm = *CC_pars[i].prm;
|
2015-07-14 18:17:45 +00:00
|
|
|
switch (CC_pars[i].type) {
|
|
|
|
//push the contents
|
|
|
|
|
|
|
|
case CPT_u32:
|
2019-01-12 22:48:48 +00:00
|
|
|
shil_param_to_host_reg(prm, call_regs[regused++]);
|
2015-07-14 18:17:45 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case CPT_f32:
|
2019-01-12 22:48:48 +00:00
|
|
|
shil_param_to_host_reg(prm, call_regsxmm[xmmused++]);
|
2015-07-14 18:17:45 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
//push the ptr itself
|
|
|
|
case CPT_ptr:
|
|
|
|
verify(prm.is_reg());
|
|
|
|
|
|
|
|
mov(call_regs64[regused++], (size_t)prm.reg_ptr());
|
|
|
|
|
|
|
|
break;
|
2019-01-16 12:39:52 +00:00
|
|
|
default:
|
|
|
|
// Other cases handled in ngen_CC_param
|
|
|
|
break;
|
2015-07-13 21:56:42 +00:00
|
|
|
}
|
|
|
|
}
|
2019-01-14 20:15:36 +00:00
|
|
|
GenCall((void (*)())function);
|
2015-07-14 18:17:45 +00:00
|
|
|
}
|
2015-07-13 21:56:42 +00:00
|
|
|
|
2019-01-12 22:48:48 +00:00
|
|
|
void RegPreload(u32 reg, Xbyak::Operand::Code nreg)
|
|
|
|
{
|
|
|
|
mov(rax, (size_t)GetRegPtr(reg));
|
|
|
|
mov(Xbyak::Reg32(nreg), dword[rax]);
|
|
|
|
}
|
|
|
|
void RegWriteback(u32 reg, Xbyak::Operand::Code nreg)
|
|
|
|
{
|
|
|
|
mov(rax, (size_t)GetRegPtr(reg));
|
|
|
|
mov(dword[rax], Xbyak::Reg32(nreg));
|
|
|
|
}
|
|
|
|
void RegPreload_FPU(u32 reg, s8 nreg)
|
|
|
|
{
|
|
|
|
mov(rax, (size_t)GetRegPtr(reg));
|
|
|
|
movss(Xbyak::Xmm(nreg), dword[rax]);
|
|
|
|
}
|
|
|
|
void RegWriteback_FPU(u32 reg, s8 nreg)
|
|
|
|
{
|
|
|
|
mov(rax, (size_t)GetRegPtr(reg));
|
|
|
|
movss(dword[rax], Xbyak::Xmm(nreg));
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
typedef void (BlockCompiler::*X64BinaryOp)(const Xbyak::Operand&, const Xbyak::Operand&);
|
|
|
|
typedef void (BlockCompiler::*X64BinaryFOp)(const Xbyak::Xmm&, const Xbyak::Operand&);
|
|
|
|
|
2019-04-29 16:23:00 +00:00
|
|
|
bool GenReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* block)
|
|
|
|
{
|
|
|
|
if (!op.rs1.is_imm())
|
|
|
|
return false;
|
|
|
|
u32 size = op.flags & 0x7f;
|
|
|
|
u32 addr = op.rs1._imm;
|
|
|
|
if (mmu_enabled())
|
|
|
|
{
|
|
|
|
if ((addr >> 12) != (block->vaddr >> 12))
|
|
|
|
// When full mmu is on, only consider addresses in the same 4k page
|
|
|
|
return false;
|
|
|
|
|
|
|
|
u32 paddr;
|
|
|
|
u32 rv;
|
2019-06-26 17:06:30 +00:00
|
|
|
switch (size)
|
|
|
|
{
|
|
|
|
case 1:
|
|
|
|
rv = mmu_data_translation<MMU_TT_DREAD, u8>(addr, paddr);
|
|
|
|
break;
|
|
|
|
case 2:
|
2019-04-29 16:23:00 +00:00
|
|
|
rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
|
2019-06-26 17:06:30 +00:00
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
case 8:
|
2019-04-29 16:23:00 +00:00
|
|
|
rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
|
2019-06-26 17:06:30 +00:00
|
|
|
break;
|
|
|
|
default:
|
2019-04-29 16:23:00 +00:00
|
|
|
die("Invalid immediate size");
|
2019-06-26 17:06:30 +00:00
|
|
|
break;
|
|
|
|
}
|
2019-04-29 16:23:00 +00:00
|
|
|
if (rv != MMU_ERROR_NONE)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
addr = paddr;
|
|
|
|
}
|
|
|
|
bool isram = false;
|
2019-06-26 17:06:30 +00:00
|
|
|
void* ptr = _vmem_read_const(addr, isram, size > 4 ? 4 : size);
|
2019-04-29 16:23:00 +00:00
|
|
|
|
|
|
|
if (isram)
|
|
|
|
{
|
|
|
|
// Immediate pointer to RAM: super-duper fast access
|
|
|
|
mov(rax, reinterpret_cast<uintptr_t>(ptr));
|
|
|
|
switch (size)
|
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
case 1:
|
|
|
|
if (regalloc.IsAllocg(op.rd))
|
|
|
|
movsx(regalloc.MapRegister(op.rd), byte[rax]);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
movsx(eax, byte[rax]);
|
|
|
|
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
|
|
|
mov(dword[rcx], eax);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2019-04-29 16:23:00 +00:00
|
|
|
case 2:
|
|
|
|
if (regalloc.IsAllocg(op.rd))
|
|
|
|
movsx(regalloc.MapRegister(op.rd), word[rax]);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
movsx(eax, word[rax]);
|
|
|
|
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
|
|
|
mov(dword[rcx], eax);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 4:
|
|
|
|
if (regalloc.IsAllocg(op.rd))
|
|
|
|
mov(regalloc.MapRegister(op.rd), dword[rax]);
|
|
|
|
else if (regalloc.IsAllocf(op.rd))
|
|
|
|
movd(regalloc.MapXRegister(op.rd), dword[rax]);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(eax, dword[rax]);
|
|
|
|
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
|
|
|
mov(dword[rcx], eax);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2019-06-10 11:57:10 +00:00
|
|
|
case 8:
|
|
|
|
mov(rcx, qword[rax]);
|
|
|
|
#ifdef EXPLODE_SPANS
|
|
|
|
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1))
|
|
|
|
{
|
|
|
|
movd(regalloc.MapXRegister(op.rd, 0), ecx);
|
|
|
|
shr(rcx, 32);
|
|
|
|
movd(regalloc.MapXRegister(op.rd, 1), ecx);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
mov(rax, (uintptr_t)op.rd.reg_ptr());
|
|
|
|
mov(qword[rax], rcx);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2019-04-29 16:23:00 +00:00
|
|
|
default:
|
|
|
|
die("Invalid immediate size");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Not RAM: the returned pointer is a memory handler
|
2019-06-26 17:06:30 +00:00
|
|
|
if (size == 8)
|
2019-04-29 16:23:00 +00:00
|
|
|
{
|
2019-06-26 17:06:30 +00:00
|
|
|
verify(!regalloc.IsAllocAny(op.rd));
|
2019-06-10 11:57:10 +00:00
|
|
|
|
2019-06-26 17:06:30 +00:00
|
|
|
// Need to call the handler twice
|
|
|
|
mov(call_regs[0], addr);
|
2019-04-29 16:23:00 +00:00
|
|
|
GenCall((void (*)())ptr);
|
2019-06-26 17:06:30 +00:00
|
|
|
mov(rcx, (size_t)op.rd.reg_ptr());
|
|
|
|
mov(dword[rcx], eax);
|
2019-04-29 16:23:00 +00:00
|
|
|
|
2019-06-26 17:06:30 +00:00
|
|
|
mov(call_regs[0], addr + 4);
|
2019-04-29 16:23:00 +00:00
|
|
|
GenCall((void (*)())ptr);
|
2019-06-26 17:06:30 +00:00
|
|
|
mov(rcx, (size_t)op.rd.reg_ptr() + 4);
|
|
|
|
mov(dword[rcx], eax);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(call_regs[0], addr);
|
2019-04-29 16:23:00 +00:00
|
|
|
|
2019-06-26 17:06:30 +00:00
|
|
|
switch(size)
|
|
|
|
{
|
|
|
|
case 1:
|
|
|
|
GenCall((void (*)())ptr);
|
|
|
|
movsx(eax, al);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
GenCall((void (*)())ptr);
|
|
|
|
movsx(eax, ax);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 4:
|
|
|
|
GenCall((void (*)())ptr);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
2019-06-26 17:06:30 +00:00
|
|
|
|
|
|
|
default:
|
|
|
|
die("Invalid immediate size");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
host_reg_to_shil_param(op.rd, eax);
|
2019-04-29 16:23:00 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-06-10 11:57:10 +00:00
|
|
|
bool GenWriteMemImmediate(const shil_opcode& op, RuntimeBlockInfo* block)
|
|
|
|
{
|
|
|
|
if (!op.rs1.is_imm())
|
|
|
|
return false;
|
|
|
|
u32 size = op.flags & 0x7f;
|
|
|
|
u32 addr = op.rs1._imm;
|
|
|
|
if (mmu_enabled())
|
|
|
|
{
|
|
|
|
if ((addr >> 12) != (block->vaddr >> 12))
|
|
|
|
// When full mmu is on, only consider addresses in the same 4k page
|
|
|
|
return false;
|
|
|
|
|
|
|
|
u32 paddr;
|
|
|
|
u32 rv;
|
|
|
|
switch (size)
|
|
|
|
{
|
|
|
|
case 1:
|
|
|
|
rv = mmu_data_translation<MMU_TT_DWRITE, u8>(addr, paddr);
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
rv = mmu_data_translation<MMU_TT_DWRITE, u16>(addr, paddr);
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
case 8:
|
|
|
|
rv = mmu_data_translation<MMU_TT_DWRITE, u32>(addr, paddr);
|
|
|
|
break;
|
2019-06-26 17:06:30 +00:00
|
|
|
default:
|
|
|
|
die("Invalid immediate size");
|
|
|
|
break;
|
2019-06-10 11:57:10 +00:00
|
|
|
}
|
|
|
|
if (rv != MMU_ERROR_NONE)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
addr = paddr;
|
|
|
|
}
|
|
|
|
bool isram = false;
|
2019-06-26 17:06:30 +00:00
|
|
|
void* ptr = _vmem_write_const(addr, isram, size > 4 ? 4 : size);
|
2019-06-10 11:57:10 +00:00
|
|
|
|
|
|
|
if (isram)
|
|
|
|
{
|
|
|
|
// Immediate pointer to RAM: super-duper fast access
|
|
|
|
mov(rax, reinterpret_cast<uintptr_t>(ptr));
|
|
|
|
switch (size)
|
|
|
|
{
|
|
|
|
case 1:
|
|
|
|
if (regalloc.IsAllocg(op.rs2))
|
2019-06-18 11:16:42 +00:00
|
|
|
mov(byte[rax], regalloc.MapRegister(op.rs2).cvt8());
|
2019-06-10 11:57:10 +00:00
|
|
|
else if (op.rs2.is_imm())
|
2019-06-18 11:16:42 +00:00
|
|
|
mov(byte[rax], (u8)op.rs2._imm);
|
2019-06-10 11:57:10 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(rcx, (uintptr_t)op.rs2.reg_ptr());
|
2019-06-18 11:16:42 +00:00
|
|
|
mov(cl, byte[rcx]);
|
|
|
|
mov(byte[rax], cl);
|
2019-06-10 11:57:10 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
if (regalloc.IsAllocg(op.rs2))
|
2019-06-18 11:16:42 +00:00
|
|
|
mov(word[rax], regalloc.MapRegister(op.rs2).cvt16());
|
2019-06-10 11:57:10 +00:00
|
|
|
else if (op.rs2.is_imm())
|
2019-06-18 11:16:42 +00:00
|
|
|
mov(word[rax], (u16)op.rs2._imm);
|
2019-06-10 11:57:10 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(rcx, (uintptr_t)op.rs2.reg_ptr());
|
2019-06-18 11:16:42 +00:00
|
|
|
mov(cx, word[rcx]);
|
|
|
|
mov(word[rax], cx);
|
2019-06-10 11:57:10 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 4:
|
|
|
|
if (regalloc.IsAllocg(op.rs2))
|
|
|
|
mov(dword[rax], regalloc.MapRegister(op.rs2));
|
|
|
|
else if (regalloc.IsAllocf(op.rs2))
|
|
|
|
movd(dword[rax], regalloc.MapXRegister(op.rs2));
|
|
|
|
else if (op.rs2.is_imm())
|
|
|
|
mov(dword[rax], op.rs2._imm);
|
|
|
|
else
|
|
|
|
{
|
2019-06-24 16:56:09 +00:00
|
|
|
mov(rcx, (uintptr_t)op.rs2.reg_ptr());
|
2019-06-10 11:57:10 +00:00
|
|
|
mov(ecx, dword[rcx]);
|
|
|
|
mov(dword[rax], ecx);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 8:
|
|
|
|
#ifdef EXPLODE_SPANS
|
|
|
|
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1))
|
|
|
|
{
|
|
|
|
movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1));
|
|
|
|
shl(call_regs64[1], 32);
|
|
|
|
movd(eax, regalloc.MapXRegister(op.rs2, 0));
|
|
|
|
or_(call_regs64[1], rax);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
{
|
2019-06-24 16:56:09 +00:00
|
|
|
mov(rcx, (uintptr_t)op.rs2.reg_ptr());
|
2019-06-10 11:57:10 +00:00
|
|
|
mov(rcx, qword[rcx]);
|
|
|
|
mov(qword[rax], rcx);
|
|
|
|
}
|
2019-06-18 11:16:42 +00:00
|
|
|
break;
|
2019-06-10 11:57:10 +00:00
|
|
|
|
|
|
|
default:
|
|
|
|
die("Invalid immediate size");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Not RAM: the returned pointer is a memory handler
|
|
|
|
mov(call_regs[0], addr);
|
|
|
|
shil_param_to_host_reg(op.rs2, call_regs[1]);
|
|
|
|
|
|
|
|
GenCall((void (*)())ptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-04-29 16:23:00 +00:00
|
|
|
bool GenReadMemoryFast(const shil_opcode& op, RuntimeBlockInfo* block)
|
|
|
|
{
|
|
|
|
if (!mmu_enabled() || !vmem32_enabled())
|
|
|
|
return false;
|
2019-05-26 11:30:05 +00:00
|
|
|
mem_reads++;
|
2019-04-29 16:23:00 +00:00
|
|
|
const u8 *start_addr = getCurr();
|
|
|
|
|
|
|
|
mov(rax, (uintptr_t)&p_sh4rcb->cntx.exception_pc);
|
|
|
|
mov(dword[rax], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0));
|
|
|
|
|
2019-05-23 09:40:33 +00:00
|
|
|
mov(rax, (uintptr_t)virt_ram_base);
|
2019-04-29 16:23:00 +00:00
|
|
|
|
|
|
|
u32 size = op.flags & 0x7f;
|
2019-05-10 19:21:30 +00:00
|
|
|
//verify(getCurr() - start_addr == 26);
|
|
|
|
if (mem_access_offset == 0)
|
|
|
|
mem_access_offset = getCurr() - start_addr;
|
|
|
|
else
|
|
|
|
verify(getCurr() - start_addr == mem_access_offset);
|
2019-04-29 16:23:00 +00:00
|
|
|
|
|
|
|
block->memory_accesses[(void*)getCurr()] = (u32)current_opid;
|
|
|
|
switch (size)
|
|
|
|
{
|
|
|
|
case 1:
|
2019-06-10 12:48:54 +00:00
|
|
|
movsx(eax, byte[rax + call_regs64[0]]);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 2:
|
2019-06-10 12:48:54 +00:00
|
|
|
movsx(eax, word[rax + call_regs64[0]]);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 4:
|
2019-06-10 12:48:54 +00:00
|
|
|
mov(eax, dword[rax + call_regs64[0]]);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 8:
|
2019-06-10 12:48:54 +00:00
|
|
|
mov(rax, qword[rax + call_regs64[0]]);
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
die("1..8 bytes");
|
|
|
|
}
|
|
|
|
|
|
|
|
while (getCurr() - start_addr < read_mem_op_size)
|
|
|
|
nop();
|
|
|
|
verify(getCurr() - start_addr == read_mem_op_size);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool GenWriteMemoryFast(const shil_opcode& op, RuntimeBlockInfo* block)
|
|
|
|
{
|
|
|
|
if (!mmu_enabled() || !vmem32_enabled())
|
|
|
|
return false;
|
2019-05-26 11:30:05 +00:00
|
|
|
mem_writes++;
|
2019-04-29 16:23:00 +00:00
|
|
|
const u8 *start_addr = getCurr();
|
|
|
|
|
|
|
|
mov(rax, (uintptr_t)&p_sh4rcb->cntx.exception_pc);
|
|
|
|
mov(dword[rax], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0));
|
|
|
|
|
2019-05-23 09:40:33 +00:00
|
|
|
mov(rax, (uintptr_t)virt_ram_base);
|
2019-04-29 16:23:00 +00:00
|
|
|
|
|
|
|
u32 size = op.flags & 0x7f;
|
2019-05-10 19:21:30 +00:00
|
|
|
//verify(getCurr() - start_addr == 26);
|
|
|
|
if (mem_access_offset == 0)
|
|
|
|
mem_access_offset = getCurr() - start_addr;
|
|
|
|
else
|
|
|
|
verify(getCurr() - start_addr == mem_access_offset);
|
2019-04-29 16:23:00 +00:00
|
|
|
|
|
|
|
block->memory_accesses[(void*)getCurr()] = (u32)current_opid;
|
|
|
|
switch (size)
|
|
|
|
{
|
|
|
|
case 1:
|
2019-06-10 11:57:10 +00:00
|
|
|
mov(byte[rax + call_regs64[0] + 0], call_regs[1].cvt8());
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 2:
|
2019-06-10 11:57:10 +00:00
|
|
|
mov(word[rax + call_regs64[0]], call_regs[1].cvt16());
|
2019-04-29 16:23:00 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 4:
|
|
|
|
mov(dword[rax + call_regs64[0]], call_regs[1]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 8:
|
|
|
|
mov(qword[rax + call_regs64[0]], call_regs64[1]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
die("1..8 bytes");
|
|
|
|
}
|
|
|
|
|
|
|
|
while (getCurr() - start_addr < write_mem_op_size)
|
|
|
|
nop();
|
|
|
|
verify(getCurr() - start_addr == write_mem_op_size);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-09-29 16:18:46 +00:00
|
|
|
void CheckBlock(bool force_checks, RuntimeBlockInfo* block) {
|
2019-01-12 22:48:48 +00:00
|
|
|
mov(call_regs[0], block->addr);
|
|
|
|
|
2019-04-29 16:23:00 +00:00
|
|
|
// FIXME This test shouldn't be necessary
|
2019-04-15 16:02:34 +00:00
|
|
|
// However the decoder makes various assumptions about the current PC value, which are simply not
|
|
|
|
// true in a virtualized memory model. So this can only work if virtual and phy addresses are the
|
|
|
|
// same at compile and run times.
|
2019-03-25 10:53:13 +00:00
|
|
|
if (mmu_enabled())
|
|
|
|
{
|
|
|
|
mov(rax, (uintptr_t)&next_pc);
|
|
|
|
cmp(dword[rax], block->vaddr);
|
|
|
|
jne(reinterpret_cast<const void*>(&ngen_blockcheckfail));
|
|
|
|
}
|
|
|
|
|
2019-09-29 16:18:46 +00:00
|
|
|
if (!force_checks)
|
|
|
|
return;
|
2019-01-12 22:48:48 +00:00
|
|
|
|
2019-09-29 16:18:46 +00:00
|
|
|
s32 sz=block->sh4_code_size;
|
|
|
|
u32 sa=block->addr;
|
2019-01-12 22:48:48 +00:00
|
|
|
|
2019-09-29 16:18:46 +00:00
|
|
|
void* ptr = (void*)GetMemPtr(sa, sz > 8 ? 8 : sz);
|
|
|
|
if (ptr)
|
|
|
|
{
|
|
|
|
while (sz > 0)
|
|
|
|
{
|
2020-03-10 13:47:45 +00:00
|
|
|
uintptr_t uintptr = reinterpret_cast<uintptr_t>(ptr);
|
|
|
|
mov(rax, uintptr);
|
2019-03-30 05:33:52 +00:00
|
|
|
|
2020-03-10 13:47:45 +00:00
|
|
|
if (sz >= 8 && !(uintptr & 7)) {
|
2019-09-29 16:18:46 +00:00
|
|
|
mov(rdx, *(u64*)ptr);
|
|
|
|
cmp(qword[rax], rdx);
|
|
|
|
sz -= 8;
|
|
|
|
sa += 8;
|
|
|
|
}
|
2020-03-10 13:47:45 +00:00
|
|
|
else if (sz >= 4 && !(uintptr & 3)) {
|
2019-09-29 16:18:46 +00:00
|
|
|
mov(edx, *(u32*)ptr);
|
|
|
|
cmp(dword[rax], edx);
|
|
|
|
sz -= 4;
|
|
|
|
sa += 4;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
mov(edx, *(u16*)ptr);
|
|
|
|
cmp(word[rax],dx);
|
|
|
|
sz -= 2;
|
|
|
|
sa += 2;
|
|
|
|
}
|
|
|
|
jne(reinterpret_cast<const void*>(CC_RX2RW(&ngen_blockcheckfail)));
|
|
|
|
ptr = (void*)GetMemPtr(sa, sz > 8 ? 8 : sz);
|
|
|
|
}
|
2019-03-29 18:23:37 +00:00
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void GenBinaryOp(const shil_opcode &op, X64BinaryOp natop)
|
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
|
|
|
|
const shil_param *rs2 = &op.rs2;
|
2019-01-12 22:48:48 +00:00
|
|
|
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
|
2019-06-10 11:57:10 +00:00
|
|
|
{
|
|
|
|
if (op.rs2.is_reg() && regalloc.mapg(op.rd) == regalloc.mapg(op.rs2))
|
|
|
|
{
|
|
|
|
if (op.op == shop_sub)
|
|
|
|
{
|
|
|
|
// This op isn't commutative
|
2019-06-18 11:16:42 +00:00
|
|
|
neg(rd);
|
|
|
|
add(rd, regalloc.MapRegister(op.rs1));
|
2019-06-10 11:57:10 +00:00
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// otherwise just swap the operands
|
|
|
|
rs2 = &op.rs1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
mov(rd, regalloc.MapRegister(op.rs1));
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
if (op.rs2.is_imm())
|
|
|
|
{
|
|
|
|
mov(ecx, op.rs2._imm);
|
2019-06-10 11:57:10 +00:00
|
|
|
(this->*natop)(rd, ecx);
|
2019-01-12 22:48:48 +00:00
|
|
|
}
|
|
|
|
else
|
2019-06-10 11:57:10 +00:00
|
|
|
(this->*natop)(rd, regalloc.MapRegister(*rs2));
|
2019-01-12 22:48:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void GenBinaryFOp(const shil_opcode &op, X64BinaryFOp natop)
|
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
Xbyak::Xmm rd = regalloc.MapXRegister(op.rd);
|
|
|
|
const shil_param *rs2 = &op.rs2;
|
2019-01-12 22:48:48 +00:00
|
|
|
if (regalloc.mapf(op.rd) != regalloc.mapf(op.rs1))
|
2019-06-10 11:57:10 +00:00
|
|
|
{
|
|
|
|
if (op.rs2.is_reg() && regalloc.mapf(op.rd) == regalloc.mapf(op.rs2))
|
|
|
|
{
|
|
|
|
if (op.op == shop_fsub || op.op == shop_fdiv)
|
|
|
|
{
|
|
|
|
// these ops aren't commutative so we need a scratch reg
|
|
|
|
movss(xmm0, regalloc.MapXRegister(op.rs2));
|
|
|
|
movss(rd, regalloc.MapXRegister(op.rs1));
|
|
|
|
(this->*natop)(rd, xmm0);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// otherwise just swap the operands
|
|
|
|
rs2 = &op.rs1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
movss(rd, regalloc.MapXRegister(op.rs1));
|
|
|
|
}
|
|
|
|
if (op.rs2.is_imm())
|
|
|
|
{
|
|
|
|
mov(eax, op.rs2._imm);
|
|
|
|
movd(xmm0, eax);
|
|
|
|
(this->*natop)(rd, xmm0);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
(this->*natop)(rd, regalloc.MapXRegister(*rs2));
|
2019-01-12 22:48:48 +00:00
|
|
|
}
|
|
|
|
|
2019-01-14 20:15:36 +00:00
|
|
|
template<class Ret, class... Params>
|
2019-05-28 17:43:56 +00:00
|
|
|
void GenCall(Ret(*function)(Params...), bool skip_floats = false)
|
2019-01-14 20:15:36 +00:00
|
|
|
{
|
2019-01-18 16:02:50 +00:00
|
|
|
#ifndef _WIN32
|
2019-05-28 17:43:56 +00:00
|
|
|
bool xmm8_mapped = !skip_floats && current_opid != -1 && regalloc.IsMapped(xmm8, current_opid);
|
|
|
|
bool xmm9_mapped = !skip_floats && current_opid != -1 && regalloc.IsMapped(xmm9, current_opid);
|
|
|
|
bool xmm10_mapped = !skip_floats && current_opid != -1 && regalloc.IsMapped(xmm10, current_opid);
|
|
|
|
bool xmm11_mapped = !skip_floats && current_opid != -1 && regalloc.IsMapped(xmm11, current_opid);
|
2019-03-25 10:53:13 +00:00
|
|
|
|
2019-01-18 16:02:50 +00:00
|
|
|
// Need to save xmm registers as they are not preserved in linux/mach
|
2019-03-25 10:53:13 +00:00
|
|
|
int offset = 0;
|
2020-03-10 13:47:45 +00:00
|
|
|
u32 stack_size = 0;
|
2019-03-25 10:53:13 +00:00
|
|
|
if (xmm8_mapped || xmm9_mapped || xmm10_mapped || xmm11_mapped)
|
|
|
|
{
|
2020-03-10 13:47:45 +00:00
|
|
|
stack_size = 4 * (xmm8_mapped + xmm9_mapped + xmm10_mapped + xmm11_mapped);
|
|
|
|
stack_size = (((stack_size + 15) >> 4) << 4); // Stack needs to be 16-byte aligned before the call
|
|
|
|
sub(rsp, stack_size);
|
2019-03-25 10:53:13 +00:00
|
|
|
if (xmm8_mapped)
|
|
|
|
{
|
|
|
|
movd(ptr[rsp + offset], xmm8);
|
|
|
|
offset += 4;
|
|
|
|
}
|
|
|
|
if (xmm9_mapped)
|
|
|
|
{
|
|
|
|
movd(ptr[rsp + offset], xmm9);
|
|
|
|
offset += 4;
|
|
|
|
}
|
|
|
|
if (xmm10_mapped)
|
|
|
|
{
|
|
|
|
movd(ptr[rsp + offset], xmm10);
|
|
|
|
offset += 4;
|
|
|
|
}
|
|
|
|
if (xmm11_mapped)
|
|
|
|
{
|
|
|
|
movd(ptr[rsp + offset], xmm11);
|
|
|
|
offset += 4;
|
|
|
|
}
|
|
|
|
}
|
2019-01-18 16:02:50 +00:00
|
|
|
#endif
|
2019-01-14 20:15:36 +00:00
|
|
|
|
2019-05-12 20:02:57 +00:00
|
|
|
call(CC_RX2RW(function));
|
2019-01-14 20:15:36 +00:00
|
|
|
|
2019-01-18 16:02:50 +00:00
|
|
|
#ifndef _WIN32
|
2019-03-25 10:53:13 +00:00
|
|
|
if (xmm8_mapped || xmm9_mapped || xmm10_mapped || xmm11_mapped)
|
|
|
|
{
|
|
|
|
if (xmm11_mapped)
|
|
|
|
{
|
|
|
|
offset -= 4;
|
|
|
|
movd(xmm11, ptr[rsp + offset]);
|
|
|
|
}
|
|
|
|
if (xmm10_mapped)
|
|
|
|
{
|
|
|
|
offset -= 4;
|
|
|
|
movd(xmm10, ptr[rsp + offset]);
|
|
|
|
}
|
|
|
|
if (xmm9_mapped)
|
|
|
|
{
|
|
|
|
offset -= 4;
|
|
|
|
movd(xmm9, ptr[rsp + offset]);
|
|
|
|
}
|
|
|
|
if (xmm8_mapped)
|
|
|
|
{
|
|
|
|
offset -= 4;
|
|
|
|
movd(xmm8, ptr[rsp + offset]);
|
|
|
|
}
|
2020-03-10 13:47:45 +00:00
|
|
|
add(rsp, stack_size);
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
2019-01-18 16:02:50 +00:00
|
|
|
#endif
|
2019-01-14 20:15:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// uses eax/rax
|
2019-01-12 22:48:48 +00:00
|
|
|
void shil_param_to_host_reg(const shil_param& param, const Xbyak::Reg& reg)
|
|
|
|
{
|
|
|
|
if (param.is_imm())
|
|
|
|
{
|
|
|
|
if (!reg.isXMM())
|
|
|
|
mov(reg, param._imm);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(eax, param._imm);
|
|
|
|
movd((const Xbyak::Xmm &)reg, eax);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (param.is_reg())
|
|
|
|
{
|
2019-01-14 20:15:36 +00:00
|
|
|
if (param.is_r32f())
|
2019-01-12 22:48:48 +00:00
|
|
|
{
|
2019-03-25 10:53:13 +00:00
|
|
|
if (regalloc.IsAllocf(param))
|
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
Xbyak::Xmm sreg = regalloc.MapXRegister(param);
|
2019-03-25 10:53:13 +00:00
|
|
|
if (!reg.isXMM())
|
2019-06-10 11:57:10 +00:00
|
|
|
movd((const Xbyak::Reg32 &)reg, sreg);
|
|
|
|
else if (reg != sreg)
|
|
|
|
movss((const Xbyak::Xmm &)reg, sreg);
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
else
|
2019-03-25 10:53:13 +00:00
|
|
|
{
|
|
|
|
mov(rax, (size_t)param.reg_ptr());
|
2019-04-18 12:15:01 +00:00
|
|
|
verify(!reg.isXMM());
|
2019-03-25 10:53:13 +00:00
|
|
|
mov((const Xbyak::Reg32 &)reg, dword[rax]);
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-03-25 10:53:13 +00:00
|
|
|
if (regalloc.IsAllocg(param))
|
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
Xbyak::Reg32 sreg = regalloc.MapRegister(param);
|
|
|
|
if (reg.isXMM())
|
|
|
|
movd((const Xbyak::Xmm &)reg, sreg);
|
|
|
|
else if (reg != sreg)
|
|
|
|
mov((const Xbyak::Reg32 &)reg, sreg);
|
2019-03-25 10:53:13 +00:00
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
else
|
2019-03-25 10:53:13 +00:00
|
|
|
{
|
|
|
|
mov(rax, (size_t)param.reg_ptr());
|
|
|
|
if (!reg.isXMM())
|
|
|
|
mov((const Xbyak::Reg32 &)reg, dword[rax]);
|
|
|
|
else
|
|
|
|
movss((const Xbyak::Xmm &)reg, dword[rax]);
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
verify(param.is_null());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-14 20:15:36 +00:00
|
|
|
// uses rax
|
2019-01-12 22:48:48 +00:00
|
|
|
void host_reg_to_shil_param(const shil_param& param, const Xbyak::Reg& reg)
|
|
|
|
{
|
2019-01-14 20:15:36 +00:00
|
|
|
if (regalloc.IsAllocg(param))
|
2019-01-12 22:48:48 +00:00
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
Xbyak::Reg32 sreg = regalloc.MapRegister(param);
|
2019-01-12 22:48:48 +00:00
|
|
|
if (!reg.isXMM())
|
2019-06-10 11:57:10 +00:00
|
|
|
mov(sreg, (const Xbyak::Reg32 &)reg);
|
|
|
|
else if (reg != sreg)
|
|
|
|
movd(sreg, (const Xbyak::Xmm &)reg);
|
2019-01-12 22:48:48 +00:00
|
|
|
}
|
2019-03-25 10:53:13 +00:00
|
|
|
else if (regalloc.IsAllocf(param))
|
2019-01-12 22:48:48 +00:00
|
|
|
{
|
2019-06-10 11:57:10 +00:00
|
|
|
Xbyak::Xmm sreg = regalloc.MapXRegister(param);
|
2019-01-12 22:48:48 +00:00
|
|
|
if (!reg.isXMM())
|
2019-06-10 11:57:10 +00:00
|
|
|
movd(sreg, (const Xbyak::Reg32 &)reg);
|
|
|
|
else if (reg != sreg)
|
|
|
|
movss(sreg, (const Xbyak::Xmm &)reg);
|
2019-01-12 22:48:48 +00:00
|
|
|
}
|
2019-03-25 10:53:13 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
mov(rax, (size_t)param.reg_ptr());
|
|
|
|
if (!reg.isXMM())
|
|
|
|
mov(dword[rax], (const Xbyak::Reg32 &)reg);
|
|
|
|
else
|
|
|
|
movss(dword[rax], (const Xbyak::Xmm &)reg);
|
|
|
|
}
|
2019-01-12 22:48:48 +00:00
|
|
|
}
|
|
|
|
|
2020-03-29 17:29:14 +00:00
|
|
|
std::vector<Xbyak::Reg32> call_regs;
|
|
|
|
std::vector<Xbyak::Reg64> call_regs64;
|
|
|
|
std::vector<Xbyak::Xmm> call_regsxmm;
|
2019-01-12 22:48:48 +00:00
|
|
|
|
|
|
|
struct CC_PS
|
|
|
|
{
|
|
|
|
CanonicalParamType type;
|
2019-01-14 20:15:36 +00:00
|
|
|
const shil_param* prm;
|
2019-01-12 22:48:48 +00:00
|
|
|
};
|
2020-03-29 17:29:14 +00:00
|
|
|
std::vector<CC_PS> CC_pars;
|
2019-01-12 22:48:48 +00:00
|
|
|
|
|
|
|
X64RegAlloc regalloc;
|
2019-01-18 16:02:50 +00:00
|
|
|
Xbyak::util::Cpu cpu;
|
2019-03-25 10:53:13 +00:00
|
|
|
size_t current_opid;
|
2019-05-10 20:31:59 +00:00
|
|
|
Xbyak::Label exit_block;
|
2019-04-29 16:23:00 +00:00
|
|
|
static const u32 read_mem_op_size;
|
|
|
|
static const u32 write_mem_op_size;
|
2019-05-10 19:21:30 +00:00
|
|
|
public:
|
|
|
|
static u32 mem_access_offset;
|
2015-07-14 18:17:45 +00:00
|
|
|
};
|
2015-07-13 21:56:42 +00:00
|
|
|
|
2019-04-29 16:23:00 +00:00
|
|
|
const u32 BlockCompiler::read_mem_op_size = 30;
|
|
|
|
const u32 BlockCompiler::write_mem_op_size = 30;
|
2019-05-10 19:21:30 +00:00
|
|
|
u32 BlockCompiler::mem_access_offset = 0;
|
2019-01-12 22:48:48 +00:00
|
|
|
|
|
|
|
void X64RegAlloc::Preload(u32 reg, Xbyak::Operand::Code nreg)
|
|
|
|
{
|
|
|
|
compiler->RegPreload(reg, nreg);
|
|
|
|
}
|
|
|
|
void X64RegAlloc::Writeback(u32 reg, Xbyak::Operand::Code nreg)
|
|
|
|
{
|
|
|
|
compiler->RegWriteback(reg, nreg);
|
|
|
|
}
|
|
|
|
void X64RegAlloc::Preload_FPU(u32 reg, s8 nreg)
|
|
|
|
{
|
|
|
|
compiler->RegPreload_FPU(reg, nreg);
|
|
|
|
}
|
|
|
|
void X64RegAlloc::Writeback_FPU(u32 reg, s8 nreg)
|
|
|
|
{
|
|
|
|
compiler->RegWriteback_FPU(reg, nreg);
|
|
|
|
}
|
|
|
|
|
|
|
|
static BlockCompiler* compiler;
|
2015-07-13 21:56:42 +00:00
|
|
|
|
2019-09-29 16:18:46 +00:00
|
|
|
void ngen_Compile(RuntimeBlockInfo* block, bool smc_checks, bool reset, bool staging, bool optimise)
|
2015-07-14 18:17:45 +00:00
|
|
|
{
|
2015-07-21 13:42:10 +00:00
|
|
|
verify(emit_FreeSpace() >= 16 * 1024);
|
2015-07-13 21:56:42 +00:00
|
|
|
|
2015-07-14 18:17:45 +00:00
|
|
|
compiler = new BlockCompiler();
|
|
|
|
|
2019-03-30 05:33:52 +00:00
|
|
|
compiler->compile(block, smc_checks, reset, staging, optimise);
|
2015-07-13 21:56:42 +00:00
|
|
|
|
2015-07-14 18:17:45 +00:00
|
|
|
delete compiler;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ngen_CC_Start(shil_opcode* op)
|
|
|
|
{
|
2019-01-14 20:15:36 +00:00
|
|
|
compiler->ngen_CC_Start(*op);
|
2015-07-14 18:17:45 +00:00
|
|
|
}
|
2015-07-13 21:56:42 +00:00
|
|
|
|
2015-07-14 18:17:45 +00:00
|
|
|
void ngen_CC_Param(shil_opcode* op, shil_param* par, CanonicalParamType tp)
|
|
|
|
{
|
|
|
|
compiler->ngen_CC_param(*op, *par, tp);
|
2015-07-13 21:56:42 +00:00
|
|
|
}
|
|
|
|
|
2019-01-14 20:15:36 +00:00
|
|
|
void ngen_CC_Call(shil_opcode* op, void* function)
|
2015-07-13 21:56:42 +00:00
|
|
|
{
|
2019-01-14 20:15:36 +00:00
|
|
|
compiler->ngen_CC_Call(*op, function);
|
2015-07-13 21:56:42 +00:00
|
|
|
}
|
2015-07-14 18:17:45 +00:00
|
|
|
|
2015-07-13 21:56:42 +00:00
|
|
|
void ngen_CC_Finish(shil_opcode* op)
|
|
|
|
{
|
2015-07-14 01:35:34 +00:00
|
|
|
}
|
2019-04-29 16:23:00 +00:00
|
|
|
|
|
|
|
bool ngen_Rewrite(unat& host_pc, unat, unat)
|
|
|
|
{
|
|
|
|
if (!mmu_enabled() || !vmem32_enabled())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
//printf("ngen_Rewrite pc %p\n", host_pc);
|
2019-06-19 09:01:33 +00:00
|
|
|
RuntimeBlockInfoPtr block = bm_GetBlock((void *)host_pc);
|
2019-04-29 16:23:00 +00:00
|
|
|
if (block == NULL)
|
|
|
|
{
|
2019-07-01 16:23:10 +00:00
|
|
|
WARN_LOG(DYNAREC, "ngen_Rewrite: Block at %p not found", (void *)host_pc);
|
2019-04-29 16:23:00 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
u8 *code_ptr = (u8*)host_pc;
|
|
|
|
auto it = block->memory_accesses.find(code_ptr);
|
|
|
|
if (it == block->memory_accesses.end())
|
|
|
|
{
|
2019-07-01 16:23:10 +00:00
|
|
|
WARN_LOG(DYNAREC, "ngen_Rewrite: memory access at %p not found (%lu entries)", code_ptr, block->memory_accesses.size());
|
2019-04-29 16:23:00 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
u32 opid = it->second;
|
|
|
|
verify(opid < block->oplist.size());
|
|
|
|
const shil_opcode& op = block->oplist[opid];
|
|
|
|
|
2019-05-10 19:21:30 +00:00
|
|
|
BlockCompiler *assembler = new BlockCompiler(code_ptr - BlockCompiler::mem_access_offset);
|
2019-06-19 09:01:33 +00:00
|
|
|
assembler->InitializeRewrite(block.get(), opid);
|
2019-04-29 16:23:00 +00:00
|
|
|
if (op.op == shop_readm)
|
2019-05-26 11:30:05 +00:00
|
|
|
{
|
|
|
|
mem_rewrites_r++;
|
2019-06-19 09:01:33 +00:00
|
|
|
assembler->GenReadMemorySlow(op, block.get());
|
2019-05-26 11:30:05 +00:00
|
|
|
}
|
2019-04-29 16:23:00 +00:00
|
|
|
else
|
2019-05-26 11:30:05 +00:00
|
|
|
{
|
|
|
|
mem_rewrites_w++;
|
2019-06-19 09:01:33 +00:00
|
|
|
assembler->GenWriteMemorySlow(op, block.get());
|
2019-05-26 11:30:05 +00:00
|
|
|
}
|
2019-04-29 16:23:00 +00:00
|
|
|
assembler->FinalizeRewrite();
|
|
|
|
verify(block->host_code_size >= assembler->getSize());
|
|
|
|
delete assembler;
|
|
|
|
block->memory_accesses.erase(it);
|
2019-05-10 19:21:30 +00:00
|
|
|
host_pc = (unat)(code_ptr - BlockCompiler::mem_access_offset);
|
2019-04-29 16:23:00 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ngen_HandleException()
|
|
|
|
{
|
|
|
|
longjmp(jmp_env, 1);
|
|
|
|
}
|
2018-07-23 17:47:24 +00:00
|
|
|
#endif
|