arm64: use register spans allocation. Implement some opcodes natively
This commit is contained in:
parent
3d8b01c515
commit
a9a2aad8f6
|
@ -1298,7 +1298,7 @@ INLINE void DYNACALL do_sqw(u32 Dest)
|
|||
}
|
||||
|
||||
void DYNACALL do_sqw_mmu(u32 dst) { do_sqw<true>(dst); }
|
||||
#if HOST_CPU!=CPU_ARM
|
||||
#if HOST_CPU != CPU_ARM && HOST_CPU != CPU_ARM64
|
||||
//yes, this micro optimization makes a difference
|
||||
extern "C" void DYNACALL do_sqw_nommu_area_3(u32 dst,u8* sqb)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
Copyright 2019 flyinghead
|
||||
|
||||
This file is part of reicast.
|
||||
|
||||
reicast is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
reicast is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with reicast. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef CORE_REC_ARM64_ARM64_REGALLOC_H_
|
||||
#define CORE_REC_ARM64_ARM64_REGALLOC_H_
|
||||
|
||||
|
||||
#include "hw/sh4/dyna/regalloc.h"
|
||||
#include "deps/vixl/aarch64/macro-assembler-aarch64.h"
|
||||
using namespace vixl::aarch64;
|
||||
|
||||
enum eReg {
|
||||
W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, W16,
|
||||
W17, W18, W19, W20, W21, W22, W23, W24, W25, W26, W27, W28, W29, W30
|
||||
};
|
||||
enum eFReg {
|
||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16,
|
||||
S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31
|
||||
};
|
||||
|
||||
#if HOST_OS == OS_DARWIN
|
||||
#error "TODO static eReg alloc_regs[]={r5,r6,r7,r10,(eReg)-1};" // TODO
|
||||
#else
|
||||
static eReg alloc_regs[] = { W19, W20, W21, W22, W23, W24, W25, W26, (eReg)-1 };
|
||||
#endif
|
||||
static eFReg alloc_fregs[] = { S8, S9, S10, S11, S12, S13, S14, S15, (eFReg)-1 };
|
||||
|
||||
class Arm64Assembler;
|
||||
|
||||
struct Arm64RegAlloc : RegAlloc<eReg, eFReg, false>
|
||||
{
|
||||
Arm64RegAlloc(Arm64Assembler *assembler) : assembler(assembler) {}
|
||||
|
||||
void DoAlloc(RuntimeBlockInfo* block)
|
||||
{
|
||||
RegAlloc::DoAlloc(block, alloc_regs, alloc_fregs);
|
||||
}
|
||||
|
||||
virtual void Preload(u32 reg, eReg nreg) override;
|
||||
virtual void Writeback(u32 reg, eReg nreg) override;
|
||||
virtual void Preload_FPU(u32 reg, eFReg nreg) override;
|
||||
virtual void Writeback_FPU(u32 reg, eFReg nreg) override;
|
||||
|
||||
const Register& MapRegister(const shil_param& param)
|
||||
{
|
||||
eReg ereg = mapg(param);
|
||||
if (ereg == (eReg)-1)
|
||||
die("Register not allocated");
|
||||
return Register::GetWRegFromCode(ereg);
|
||||
}
|
||||
|
||||
const VRegister& MapVRegister(const shil_param& param)
|
||||
{
|
||||
eFReg ereg = mapf(param);
|
||||
if (ereg == (eFReg)-1)
|
||||
die("VRegister not allocated");
|
||||
return VRegister::GetSRegFromCode(ereg);
|
||||
}
|
||||
|
||||
Arm64Assembler *assembler;
|
||||
};
|
||||
|
||||
extern Arm64RegAlloc reg;
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* CORE_REC_ARM64_ARM64_REGALLOC_H_ */
|
|
@ -1,10 +1,28 @@
|
|||
/*
|
||||
Copyright 2019 flyinghead
|
||||
|
||||
This file is part of reicast.
|
||||
|
||||
reicast is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
reicast is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with reicast. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#if FEAT_SHREC == DYNAREC_JIT
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <map>
|
||||
|
||||
#include "deps/vixl/aarch64/macro-assembler-aarch64.h"
|
||||
using namespace vixl::aarch64;
|
||||
|
@ -16,6 +34,7 @@ using namespace vixl::aarch64;
|
|||
#include "hw/sh4/sh4_core.h"
|
||||
#include "hw/sh4/dyna/ngen.h"
|
||||
#include "hw/sh4/sh4_mem.h"
|
||||
#include "arm64_regalloc.h"
|
||||
|
||||
struct DynaRBI : RuntimeBlockInfo
|
||||
{
|
||||
|
@ -75,26 +94,75 @@ static void ngen_FailedToFindBlock_internal() {
|
|||
|
||||
void(*ngen_FailedToFindBlock)() = &ngen_FailedToFindBlock_internal;
|
||||
|
||||
static int cycle_counter;
|
||||
extern "C" {
|
||||
|
||||
void *bm_GetCodeInternal(u32 pc)
|
||||
{
|
||||
return (void*)bm_GetCode(pc);
|
||||
}
|
||||
|
||||
void UpdateSystemInternal(u32 pc)
|
||||
{
|
||||
if (UpdateSystem())
|
||||
rdv_DoInterrupts_pc(pc);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ngen_mainloop(void* v_cntx)
|
||||
{
|
||||
Sh4RCB* ctx = (Sh4RCB*)((u8*)v_cntx - sizeof(Sh4RCB));
|
||||
|
||||
cycle_counter = SH4_TIMESLICE;
|
||||
__asm__ volatile
|
||||
(
|
||||
"stp x19, x20, [sp, #-144]! \n\t"
|
||||
"stp x21, x22, [sp, #16] \n\t"
|
||||
"stp x23, x24, [sp, #32] \n\t"
|
||||
"stp x25, x26, [sp, #48] \n\t"
|
||||
"stp x27, x28, [sp, #64] \n\t"
|
||||
"stp s8, s9, [sp, #80] \n\t"
|
||||
"stp s10, s11, [sp, #96] \n\t"
|
||||
"stp s12, s13, [sp, #112] \n\t"
|
||||
"stp s14, s15, [sp, #128] \n\t"
|
||||
// Use x28 as sh4 context pointer
|
||||
"mov x28, %0 \n\t"
|
||||
// Use x27 as cycle_counter
|
||||
"mov w27, %2 \n\t" // SH4_TIMESLICE
|
||||
|
||||
while (sh4_int_bCpuRun) {
|
||||
do {
|
||||
DynarecCodeEntryPtr rcb = bm_GetCode(ctx->cntx.pc);
|
||||
rcb();
|
||||
} while (cycle_counter > 0);
|
||||
"run_loop: \n\t"
|
||||
"ldr w0, [x28, %3] \n\t" // CpuRunning
|
||||
"cmp w0, #0 \n\t"
|
||||
"b.eq end_run_loop \n\t"
|
||||
|
||||
cycle_counter += SH4_TIMESLICE;
|
||||
"slice_loop: \n\t"
|
||||
"ldr w0, [x28, %1] \n\t" // pc
|
||||
"bl bm_GetCodeInternal \n\t"
|
||||
"blr x0 \n\t"
|
||||
"cmp w27, #0 \n\t"
|
||||
"b.gt slice_loop \n\t"
|
||||
|
||||
if (UpdateSystem()) {
|
||||
rdv_DoInterrupts_pc(ctx->cntx.pc);
|
||||
}
|
||||
}
|
||||
"add w27, w27, %2 \n\t" // SH4_TIMESLICE
|
||||
"ldr w0, [x28, %1] \n\t" // pc
|
||||
"bl UpdateSystemInternal \n\t"
|
||||
"b run_loop \n\t"
|
||||
|
||||
"end_run_loop: \n\t"
|
||||
"ldp s14, s15, [sp, #128] \n\t"
|
||||
"ldp s12, s13, [sp, #112] \n\t"
|
||||
"ldp s10, s11, [sp, #96] \n\t"
|
||||
"ldp s8, s9, [sp, #80] \n\t"
|
||||
"ldp x27, x28, [sp, #64] \n\t"
|
||||
"ldp x25, x26, [sp, #48] \n\t"
|
||||
"ldp x23, x24, [sp, #32] \n\t"
|
||||
"ldp x21, x22, [sp, #16] \n\t"
|
||||
"ldp x19, x20, [sp], #144 \n\t"
|
||||
:
|
||||
: "r"(reinterpret_cast<uintptr_t>(&ctx->cntx)),
|
||||
"i"(offsetof(Sh4Context, pc)),
|
||||
"i"(SH4_TIMESLICE),
|
||||
"i"(offsetof(Sh4Context, CpuRunning))
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void ngen_init()
|
||||
|
@ -128,8 +196,12 @@ void ngen_blockcheckfail(u32 pc) {
|
|||
|
||||
class Arm64Assembler : public MacroAssembler
|
||||
{
|
||||
typedef void (MacroAssembler::*Arm64Op)(const Register&, const Register&, const Operand&);
|
||||
typedef void (MacroAssembler::*Arm64Op2)(const Register&, const Register&, const Register&);
|
||||
typedef void (MacroAssembler::*Arm64Op3)(const Register&, const Register&, const Operand&, enum FlagsUpdate);
|
||||
|
||||
public:
|
||||
Arm64Assembler() : MacroAssembler((u8 *)emit_GetCCPtr(), 64 * 1024)
|
||||
Arm64Assembler() : MacroAssembler((u8 *)emit_GetCCPtr(), 64 * 1024), regalloc(this)
|
||||
{
|
||||
call_regs.push_back(&w0);
|
||||
call_regs.push_back(&w1);
|
||||
|
@ -159,24 +231,45 @@ public:
|
|||
call_fregs.push_back(&s7);
|
||||
}
|
||||
|
||||
void ngen_BinaryOp(shil_opcode* op, Arm64Op arm_op, Arm64Op2 arm_op2, Arm64Op3 arm_op3)
|
||||
{
|
||||
const Register* reg3 = &wzr;
|
||||
if (op->rs2.is_imm())
|
||||
{
|
||||
Mov(w10, op->rs2._imm);
|
||||
reg3 = &w10;
|
||||
}
|
||||
else if (op->rs2.is_r32i())
|
||||
{
|
||||
reg3 = ®alloc.MapRegister(op->rs2);
|
||||
}
|
||||
if (arm_op != NULL)
|
||||
((*this).*arm_op)(regalloc.MapRegister(op->rd), regalloc.MapRegister(op->rs1), *reg3);
|
||||
else if (arm_op2 != NULL)
|
||||
((*this).*arm_op2)(regalloc.MapRegister(op->rd), regalloc.MapRegister(op->rs1), *reg3);
|
||||
else
|
||||
((*this).*arm_op3)(regalloc.MapRegister(op->rd), regalloc.MapRegister(op->rs1), *reg3, LeaveFlags);
|
||||
}
|
||||
|
||||
void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise)
|
||||
{
|
||||
//printf("REC-ARM64 compiling %08x\n", block->addr);
|
||||
if (force_checks)
|
||||
CheckBlock(block);
|
||||
|
||||
Stp(x28, x30, MemOperand(sp, -16, PreIndex));
|
||||
// Use x28 as sh4 context pointer
|
||||
Mov(x28, reinterpret_cast<uintptr_t>(&p_sh4rcb->cntx));
|
||||
Str(x30, MemOperand(sp, -16, PreIndex));
|
||||
|
||||
Mov(x9, reinterpret_cast<uintptr_t>(&cycle_counter));
|
||||
Ldr(w10, MemOperand(x9));
|
||||
Sub(w10, w10, block->guest_cycles);
|
||||
Str(w10, MemOperand(x9));
|
||||
// run register allocator
|
||||
regalloc.DoAlloc(block);
|
||||
|
||||
// scheduler
|
||||
Sub(w27, w27, block->guest_cycles);
|
||||
|
||||
for (size_t i = 0; i < block->oplist.size(); i++)
|
||||
{
|
||||
shil_opcode& op = block->oplist[i];
|
||||
regalloc.OpBegin(&op, i);
|
||||
|
||||
switch (op.op)
|
||||
{
|
||||
case shop_ifb: // Interpreter fallback
|
||||
|
@ -188,28 +281,42 @@ public:
|
|||
Mov(*call_regs[0], op.rs3._imm);
|
||||
|
||||
CallRuntime(OpDesc[op.rs3._imm]->oph);
|
||||
reg_cache.clear();
|
||||
break;
|
||||
|
||||
case shop_jcond:
|
||||
case shop_jdyn:
|
||||
Ldr(w10, sh4_context_mem_operand(op.rs1.reg_ptr()));
|
||||
Mov(w10, regalloc.MapRegister(op.rs1));
|
||||
|
||||
if (op.rs2.is_imm()) {
|
||||
Mov(w9, op.rs2._imm);
|
||||
Add(w10, w10, w9);
|
||||
}
|
||||
|
||||
Str(w10, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
reg_cache.clear();
|
||||
Mov(regalloc.MapRegister(op.rd), w10);
|
||||
break;
|
||||
|
||||
case shop_mov32:
|
||||
verify(op.rd.is_reg());
|
||||
verify(op.rs1.is_reg() || op.rs1.is_imm());
|
||||
|
||||
shil_param_to_host_reg(op.rs1, w10);
|
||||
host_reg_to_shil_param(op.rd, w10);
|
||||
if (regalloc.IsAllocf(op.rd))
|
||||
{
|
||||
if (op.rs1.is_imm())
|
||||
Fmov(regalloc.MapVRegister(op.rd), (float&)op.rs1._imm);
|
||||
else if (regalloc.IsAllocf(op.rs1))
|
||||
Fmov(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1));
|
||||
else
|
||||
Fmov(regalloc.MapVRegister(op.rd), regalloc.MapRegister(op.rs1));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op.rs1.is_imm())
|
||||
Mov(regalloc.MapRegister(op.rd), op.rs1._imm);
|
||||
else if (regalloc.IsAllocg(op.rs1))
|
||||
Mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
|
||||
else
|
||||
Fmov(regalloc.MapRegister(op.rd), regalloc.MapVRegister(op.rs1));
|
||||
}
|
||||
break;
|
||||
|
||||
case shop_mov64:
|
||||
|
@ -227,7 +334,6 @@ public:
|
|||
{
|
||||
shil_param_to_host_reg(op.rs3, w10);
|
||||
Add(*call_regs[0], *call_regs[0], w10);
|
||||
flush_reg_cache(*call_regs[0]);
|
||||
}
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
|
@ -256,7 +362,6 @@ public:
|
|||
die("1..8 bytes");
|
||||
break;
|
||||
}
|
||||
reg_cache.clear();
|
||||
|
||||
if (size != 8)
|
||||
host_reg_to_shil_param(op.rd, w0);
|
||||
|
@ -272,7 +377,6 @@ public:
|
|||
{
|
||||
shil_param_to_host_reg(op.rs3, w10);
|
||||
Add(*call_regs[0], *call_regs[0], w10);
|
||||
flush_reg_cache(*call_regs[0]);
|
||||
}
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
|
@ -303,14 +407,150 @@ public:
|
|||
die("1..8 bytes");
|
||||
break;
|
||||
}
|
||||
reg_cache.clear();
|
||||
}
|
||||
break;
|
||||
|
||||
case shop_sync_sr:
|
||||
CallRuntime(UpdateSR);
|
||||
break;
|
||||
|
||||
case shop_neg:
|
||||
Neg(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
|
||||
break;
|
||||
case shop_not:
|
||||
Mvn(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
|
||||
break;
|
||||
|
||||
case shop_shl:
|
||||
ngen_BinaryOp(&op, NULL, &MacroAssembler::Lsl, NULL);
|
||||
break;
|
||||
case shop_shr:
|
||||
ngen_BinaryOp(&op, NULL, &MacroAssembler::Lsr, NULL);
|
||||
break;
|
||||
case shop_sar:
|
||||
ngen_BinaryOp(& op, NULL, &MacroAssembler::Asr, NULL);
|
||||
break;
|
||||
case shop_and:
|
||||
ngen_BinaryOp(&op, &MacroAssembler::And, NULL, NULL);
|
||||
break;
|
||||
case shop_or:
|
||||
ngen_BinaryOp(&op, &MacroAssembler::Orr, NULL, NULL);
|
||||
break;
|
||||
case shop_xor:
|
||||
ngen_BinaryOp(&op, &MacroAssembler::Eor, NULL, NULL);
|
||||
break;
|
||||
case shop_add:
|
||||
ngen_BinaryOp(&op, NULL, NULL, &MacroAssembler::Add);
|
||||
break;
|
||||
case shop_sub:
|
||||
ngen_BinaryOp(&op, NULL, NULL, &MacroAssembler::Sub);
|
||||
break;
|
||||
case shop_ror:
|
||||
ngen_BinaryOp(&op, NULL, &MacroAssembler::Ror, NULL);
|
||||
break;
|
||||
|
||||
case shop_adc:
|
||||
Cmp(regalloc.MapRegister(op.rs3), 1); // C = rs3
|
||||
Adcs(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2)); // (C,rd)=rs1+rs2+rs3(C)
|
||||
Cset(regalloc.MapRegister(op.rd2), cs); // rd2 = C
|
||||
break;
|
||||
/* TODO
|
||||
case shop_rocl:
|
||||
Orr(reg.mapg(op->rd),reg.mapg(op->rs2),reg.mapg(op->rs1),true, S_LSL, 1); //(C,rd)= rs1<<1 + (|) rs2
|
||||
MOVW(reg.mapg(op->rd2),0); //clear rd2 (for ADC/MOVCS)
|
||||
ADC(reg.mapg(op->rd2),reg.mapg(op->rd2),0); //rd2=C (or MOVCS rd2, 1)
|
||||
}
|
||||
break;
|
||||
*/
|
||||
case shop_mul_u16:
|
||||
Uxth(w10, regalloc.MapRegister(op.rs1));
|
||||
Uxth(w11, regalloc.MapRegister(op.rs2));
|
||||
Mul(regalloc.MapRegister(op.rd), w10, w11);
|
||||
break;
|
||||
case shop_mul_s16:
|
||||
Sxth(w10, regalloc.MapRegister(op.rs1));
|
||||
Sxth(w11, regalloc.MapRegister(op.rs2));
|
||||
Mul(regalloc.MapRegister(op.rd), w10, w11);
|
||||
break;
|
||||
case shop_mul_i32:
|
||||
Mul(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
|
||||
break;
|
||||
case shop_mul_u64:
|
||||
case shop_mul_s64:
|
||||
{
|
||||
const Register& rd_xreg = Register::GetXRegFromCode(regalloc.MapRegister(op.rd).GetCode());
|
||||
if (op.op == shop_mul_u64)
|
||||
Umull(rd_xreg, regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
|
||||
else
|
||||
Smull(rd_xreg, regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
|
||||
const Register& rd2_xreg = Register::GetXRegFromCode(regalloc.MapRegister(op.rd2).GetCode());
|
||||
Lsr(rd2_xreg, rd_xreg, 32);
|
||||
}
|
||||
break;
|
||||
|
||||
case shop_ext_s8:
|
||||
Sxtb(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
|
||||
break;
|
||||
case shop_ext_s16:
|
||||
Sxth(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
|
||||
break;
|
||||
|
||||
//
|
||||
// FPU
|
||||
//
|
||||
|
||||
case shop_fadd:
|
||||
Fadd(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1), regalloc.MapVRegister(op.rs2));
|
||||
break;
|
||||
case shop_fsub:
|
||||
Fsub(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1), regalloc.MapVRegister(op.rs2));
|
||||
break;
|
||||
case shop_fmul:
|
||||
Fmul(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1), regalloc.MapVRegister(op.rs2));
|
||||
break;
|
||||
case shop_fdiv:
|
||||
Fdiv(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1), regalloc.MapVRegister(op.rs2));
|
||||
break;
|
||||
|
||||
case shop_fabs:
|
||||
Fabs(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1));
|
||||
break;
|
||||
case shop_fneg:
|
||||
Fneg(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1));
|
||||
break;
|
||||
case shop_fsqrt:
|
||||
Fsqrt(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1));
|
||||
break;
|
||||
|
||||
case shop_fmac:
|
||||
Fmadd(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs3), regalloc.MapVRegister(op.rs2), regalloc.MapVRegister(op.rs1));
|
||||
break;
|
||||
|
||||
case shop_fsrra:
|
||||
Fsqrt(s0, regalloc.MapVRegister(op.rs1));
|
||||
Fmov(s1, 1.f);
|
||||
Fdiv(regalloc.MapVRegister(op.rd), s1, s0);
|
||||
break;
|
||||
|
||||
case shop_fsetgt:
|
||||
case shop_fseteq:
|
||||
Fcmp(regalloc.MapVRegister(op.rs1), regalloc.MapVRegister(op.rs2));
|
||||
Cset(regalloc.MapRegister(op.rd), op.op == shop_fsetgt ? gt : eq);
|
||||
break;
|
||||
|
||||
case shop_cvt_f2i_t:
|
||||
Fcvtzs(regalloc.MapRegister(op.rd), regalloc.MapVRegister(op.rs1));
|
||||
break;
|
||||
case shop_cvt_i2f_n:
|
||||
case shop_cvt_i2f_z:
|
||||
Scvtf(regalloc.MapVRegister(op.rd), regalloc.MapRegister(op.rs1));
|
||||
break;
|
||||
|
||||
default:
|
||||
shil_chf[op.op](&op);
|
||||
break;
|
||||
}
|
||||
regalloc.OpEnd(&op);
|
||||
}
|
||||
|
||||
switch (block->BlockType)
|
||||
|
@ -377,8 +617,7 @@ public:
|
|||
die("Invalid block end type");
|
||||
}
|
||||
|
||||
|
||||
Ldp(x28, x30, MemOperand(sp, 16, PostIndex));
|
||||
Ldr(x30, MemOperand(sp, 16, PostIndex));
|
||||
Ret();
|
||||
|
||||
Label code_end;
|
||||
|
@ -464,7 +703,7 @@ public:
|
|||
|
||||
case CPT_f32:
|
||||
if (prm.is_reg()) {
|
||||
Ldr(*call_fregs[fregused], sh4_context_mem_operand(prm.reg_ptr()));
|
||||
Fmov(*call_fregs[fregused], regalloc.MapVRegister(prm));
|
||||
}
|
||||
else {
|
||||
verify(prm.is_null());
|
||||
|
@ -474,7 +713,6 @@ public:
|
|||
|
||||
case CPT_ptr:
|
||||
verify(prm.is_reg());
|
||||
flush_reg_cache(*call_regs64[regused]);
|
||||
// push the ptr itself
|
||||
Mov(*call_regs64[regused++], reinterpret_cast<uintptr_t>(prm.reg_ptr()));
|
||||
|
||||
|
@ -488,7 +726,13 @@ public:
|
|||
}
|
||||
}
|
||||
CallRuntime((void (*)())function);
|
||||
reg_cache.clear();
|
||||
}
|
||||
|
||||
MemOperand sh4_context_mem_operand(void *p)
|
||||
{
|
||||
u32 offset = (u8*)p - (u8*)&p_sh4rcb->cntx;
|
||||
verify((offset & 3) == 0 && offset <= 16380); // FIXME 64-bit regs need multiple of 8 up to 32760
|
||||
return MemOperand(x28, offset);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -508,7 +752,7 @@ private:
|
|||
|
||||
while (sz > 0)
|
||||
{
|
||||
if (sz >= 8)
|
||||
if (sz >= 8 && (reinterpret_cast<uintptr_t>(ptr) & 7) == 0)
|
||||
{
|
||||
Ldr(x10, MemOperand(x9, 8, PostIndex));
|
||||
Ldr(x11, *(u64*)ptr);
|
||||
|
@ -516,7 +760,7 @@ private:
|
|||
sz -= 8;
|
||||
ptr += 8;
|
||||
}
|
||||
else if (sz >= 4)
|
||||
else if (sz >= 4 && (reinterpret_cast<uintptr_t>(ptr) & 3) == 0)
|
||||
{
|
||||
Ldr(w10, MemOperand(x9, 4, PostIndex));
|
||||
Ldr(w11, *(u32*)ptr);
|
||||
|
@ -543,60 +787,51 @@ private:
|
|||
Bind(&blockcheck_success);
|
||||
}
|
||||
|
||||
MemOperand sh4_context_mem_operand(void *p)
|
||||
{
|
||||
u32 offset = (u8*)p - (u8*)&p_sh4rcb->cntx;
|
||||
verify((offset & 3) == 0 && offset <= 16380); // FIXME 64-bit regs need multiple of 8 up to 32760
|
||||
return MemOperand(x28, offset);
|
||||
}
|
||||
|
||||
void shil_param_to_host_reg(const shil_param& param, const Register& reg)
|
||||
{
|
||||
if (param.is_imm()) {
|
||||
if (param.is_imm())
|
||||
{
|
||||
Mov(reg, param._imm);
|
||||
flush_reg_cache(reg);
|
||||
}
|
||||
else if (param.is_reg()) {
|
||||
const Register *cached_reg = reg_cache[param._reg];
|
||||
if (cached_reg != NULL)
|
||||
{
|
||||
if (cached_reg != ®)
|
||||
{
|
||||
Mov(reg, *cached_reg);
|
||||
set_reg_cache(param._reg, reg);
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (param.is_reg())
|
||||
{
|
||||
if (param.is_r64f())
|
||||
{
|
||||
// TODO use regalloc
|
||||
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
|
||||
set_reg_cache(param._reg, reg);
|
||||
}
|
||||
else if (param.is_r32f())
|
||||
Fmov(reg, regalloc.MapVRegister(param));
|
||||
else
|
||||
Mov(reg, regalloc.MapRegister(param));
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
verify(param.is_null());
|
||||
}
|
||||
}
|
||||
|
||||
void host_reg_to_shil_param(const shil_param& param, const CPURegister& reg)
|
||||
{
|
||||
Str(reg, sh4_context_mem_operand(param.reg_ptr()));
|
||||
if (reg.IsRegister())
|
||||
set_reg_cache(param._reg, (const Register&)reg);
|
||||
}
|
||||
|
||||
void set_reg_cache(Sh4RegType sh4_reg_type, const Register& reg)
|
||||
{
|
||||
flush_reg_cache(reg);
|
||||
reg_cache[sh4_reg_type] = ®
|
||||
}
|
||||
|
||||
void flush_reg_cache(const Register& reg)
|
||||
{
|
||||
for (auto it = reg_cache.begin(); it != reg_cache.end();)
|
||||
if (it->second != NULL && it->second->GetCode() == reg.GetCode())
|
||||
it = reg_cache.erase(it);
|
||||
if (reg.Is64Bits())
|
||||
{
|
||||
// TODO use regalloc
|
||||
Str((const Register&)reg, sh4_context_mem_operand(param.reg_ptr()));
|
||||
}
|
||||
else if (regalloc.IsAllocg(param))
|
||||
{
|
||||
if (reg.IsRegister())
|
||||
Mov(regalloc.MapRegister(param), (const Register&)reg);
|
||||
else
|
||||
it++;
|
||||
Fmov(regalloc.MapRegister(param), (const VRegister&)reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (reg.IsVRegister())
|
||||
Fmov(regalloc.MapVRegister(param), (const VRegister&)reg);
|
||||
else
|
||||
Fmov(regalloc.MapVRegister(param), (const Register&)reg);
|
||||
}
|
||||
}
|
||||
|
||||
struct CC_PS
|
||||
|
@ -608,7 +843,7 @@ private:
|
|||
std::vector<const WRegister*> call_regs;
|
||||
std::vector<const XRegister*> call_regs64;
|
||||
std::vector<const VRegister*> call_fregs;
|
||||
std::map<Sh4RegType, const Register*> reg_cache;
|
||||
Arm64RegAlloc regalloc;
|
||||
};
|
||||
|
||||
static Arm64Assembler* compiler;
|
||||
|
@ -645,4 +880,42 @@ void ngen_CC_Finish(shil_opcode* op)
|
|||
|
||||
}
|
||||
|
||||
void Arm64RegAlloc::Preload(u32 reg, eReg nreg)
|
||||
{
|
||||
assembler->Ldr(Register(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
|
||||
}
|
||||
void Arm64RegAlloc::Writeback(u32 reg, eReg nreg)
|
||||
{
|
||||
assembler->Str(Register(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
|
||||
}
|
||||
void Arm64RegAlloc::Preload_FPU(u32 reg, eFReg nreg)
|
||||
{
|
||||
assembler->Ldr(VRegister(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
|
||||
}
|
||||
void Arm64RegAlloc::Writeback_FPU(u32 reg, eFReg nreg)
|
||||
{
|
||||
assembler->Str(VRegister(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
|
||||
}
|
||||
|
||||
|
||||
extern "C" void do_sqw_nommu_area_3(u32 dst, u8* sqb)
|
||||
{
|
||||
__asm__ volatile
|
||||
(
|
||||
"movz x11, #0x0C00, lsl #16 \n\t"
|
||||
"add x11, x1, x11 \n\t" // get ram ptr from x1, part 1
|
||||
"and x12, x0, #0x20 \n\t" // SQ# selection, isolate
|
||||
"ubfx x0, x0, #5, #20 \n\t" // get ram offset
|
||||
"add x1, x12, x1 \n\t" // SQ# selection, add to SQ ptr
|
||||
"add x11, x11, #512 \n\t" // get ram ptr from x1, part 2
|
||||
"add x11, x11, x0, lsl #5 \n\t" // ram + offset
|
||||
"ldp x9, x10, [x1], #16 \n\t"
|
||||
"stp x9, x10, [x11], #16 \n\t"
|
||||
"ldp x9, x10, [x1] \n\t"
|
||||
"stp x9, x10, [x11] \n\t"
|
||||
"ret \n"
|
||||
|
||||
: : : "memory"
|
||||
);
|
||||
}
|
||||
#endif // FEAT_SHREC == DYNAREC_JIT
|
||||
|
|
Loading…
Reference in New Issue