arm64 and x64 recs use ssa regalloc

This commit is contained in:
Flyinghead 2019-06-10 13:57:10 +02:00
parent 623d70d710
commit 3dd16e80d2
10 changed files with 1005 additions and 198 deletions

View File

@ -97,6 +97,46 @@ void* _vmem_read_const(u32 addr,bool& ismem,u32 sz)
return 0;
}
void* _vmem_write_const(u32 addr,bool& ismem,u32 sz)
{
u32 page=addr>>24;
unat iirf=(unat)_vmem_MemInfo_ptr[page];
void* ptr=(void*)(iirf&~HANDLER_MAX);
if (ptr==0)
{
ismem=false;
const unat id=iirf;
if (sz==1)
{
return (void*)_vmem_WF8[id/4];
}
else if (sz==2)
{
return (void*)_vmem_WF16[id/4];
}
else if (sz==4)
{
return (void*)_vmem_WF32[id/4];
}
else
{
die("Invalid size");
}
}
else
{
ismem=true;
addr<<=iirf;
addr>>=iirf;
return &(((u8*)ptr)[addr]);
}
die("Invalid memory size");
return 0;
}
void* _vmem_page_info(u32 addr,bool& ismem,u32 sz,u32& page_sz,bool rw)
{
u32 page=addr>>24;

View File

@ -100,6 +100,7 @@ void _vmem_release();
void _vmem_get_ptrs(u32 sz,bool write,void*** vmap,void*** func);
void* _vmem_get_ptr2(u32 addr,u32& mask);
void* _vmem_read_const(u32 addr,bool& ismem,u32 sz);
void* _vmem_write_const(u32 addr,bool& ismem,u32 sz);
extern u8* virt_ram_base;
extern bool vmem_4gb_space;

View File

@ -824,7 +824,7 @@ void constlink(RuntimeBlockInfo* blk)
else if (def==NoReg && op->rs1.is_imm() && op->rs1._imm==0)
{
//def=op->rd._reg;
val=op->rs1._imm;
val = op->rs1._imm;
}
}
}
@ -892,8 +892,9 @@ void srt_waw(RuntimeBlockInfo* blk)
//Seems to be working
void AnalyseBlock(RuntimeBlockInfo* blk)
{
//SSAOptimizer optim(blk);
//optim.Optimize();
SSAOptimizer optim(blk);
optim.Optimize();
return;
u32 st[sh4_reg_count]={0};
/*

View File

@ -190,6 +190,8 @@ bool SSAOptimizer::ExecuteConstOp(shil_opcode& op)
case shop_setae:
rd = rs1 >= rs2;
break;
default:
break;
}
}
break;

View File

@ -42,7 +42,7 @@ public:
ConstPropPass();
DeadCodeRemovalPass();
ConstantExpressionsPass();
SimplifyExpressionPass();
CombineShiftsPass();
DeadRegisterPass();
IdentityMovePass();
@ -355,7 +355,7 @@ private:
}
}
void ConstantExpressionsPass()
void SimplifyExpressionPass()
{
for (int opnum = 0; opnum < block->oplist.size(); opnum++)
{
@ -404,11 +404,12 @@ private:
continue;
}
}
// Not sure it's worth the trouble, except for the xor perhaps
// Not sure it's worth the trouble, except for the 'and' and 'xor'
else if (op.rs1.is_r32i() && op.rs1._reg == op.rs2._reg)
{
// a ^ a == 0
if (op.op == shop_xor)
// a - a == 0
if (op.op == shop_xor || op.op == shop_sub)
{
//printf("%08x ZERO %s\n", block->vaddr + op.guest_offs, op.dissasm().c_str());
ReplaceByMov32(op, 0);
@ -420,6 +421,14 @@ private:
//printf("%08x IDEN %s\n", block->vaddr + op.guest_offs, op.dissasm().c_str());
ReplaceByMov32(op);
}
// a + a == a * 2 == a << 1
else if (op.op == shop_add)
{
// There's quite a few of these
//printf("%08x +t<< %s\n", block->vaddr + op.guest_offs, op.dissasm().c_str());
op.op = shop_shl;
op.rs2 = shil_param(FMT_IMM, 1);
}
}
}
}

View File

@ -247,11 +247,9 @@ public:
virtual void Preload(u32 reg, nreg_t nreg) = 0;
virtual void Writeback(u32 reg, nreg_t nreg) = 0;
virtual void CheckReg(u32 reg, nreg_t nreg) = 0;
virtual void Preload_FPU(u32 reg, nregf_t nreg) = 0;
virtual void Writeback_FPU(u32 reg, nregf_t nreg) = 0;
virtual void CheckReg_FPU(u32 reg, nregf_t nreg) = 0;
private:
struct reg_alloc {

View File

@ -20,8 +20,11 @@
#ifndef CORE_REC_ARM64_ARM64_REGALLOC_H_
#define CORE_REC_ARM64_ARM64_REGALLOC_H_
#ifdef OLD_REGALLOC
#include "hw/sh4/dyna/regalloc.h"
#else
#include "hw/sh4/dyna/ssa_regalloc.h"
#endif
#include "deps/vixl/aarch64/macro-assembler-aarch64.h"
using namespace vixl::aarch64;
@ -67,7 +70,15 @@ struct Arm64RegAlloc : RegAlloc<eReg, eFReg
const VRegister& MapVRegister(const shil_param& param, u32 index = 0)
{
#ifdef OLD_REGALLOC
eFReg ereg = mapfv(param, index);
#else
#ifdef EXPLODE_SPANS
#error EXPLODE_SPANS not supported with ssa regalloc
#endif
verify(index == 0);
eFReg ereg = mapf(param);
#endif
if (ereg == (eFReg)-1)
die("VRegister not allocated");
return VRegister::GetSRegFromCode(ereg);

View File

@ -437,7 +437,7 @@ public:
if (regalloc.IsAllocf(op.rd))
{
if (op.rs1.is_imm())
Fmov(regalloc.MapVRegister(op.rd), (float&)op.rs1._imm);
Fmov(regalloc.MapVRegister(op.rd), reinterpret_cast<f32&>(op.rs1._imm));
else if (regalloc.IsAllocf(op.rs1))
Fmov(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1));
else
@ -483,9 +483,13 @@ public:
break;
case shop_swaplb:
Mov(w9, Operand(regalloc.MapRegister(op.rs1), LSR, 16));
Rev16(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
Bfi(regalloc.MapRegister(op.rd), w9, 16, 16);
{
const Register rs1 = regalloc.MapRegister(op.rs1);
const Register rd = regalloc.MapRegister(op.rd);
Mov(w9, Operand(rs1, LSR, 16));
Rev16(rd, rs1);
Bfi(rd, w9, 16, 16);
}
break;
case shop_neg:
@ -536,60 +540,182 @@ public:
break;
case shop_adc:
Cmp(regalloc.MapRegister(op.rs3), 1); // C = rs3
Adcs(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2)); // (C,rd)=rs1+rs2+rs3(C)
Cset(regalloc.MapRegister(op.rd2), cs); // rd2 = C
{
Register reg1;
Operand op2;
Register reg3;
if (op.rs1.is_imm())
{
Mov(w0, op.rs1.imm_value());
reg1 = w0;
}
else
{
reg1 = regalloc.MapRegister(op.rs1);
}
if (op.rs2.is_imm())
op2 = Operand(op.rs2.imm_value());
else
op2 = regalloc.MapRegister(op.rs2);
if (op.rs3.is_imm())
{
Mov(w1, op.rs3.imm_value());
reg3 = w1;
}
else
{
reg3 = regalloc.MapRegister(op.rs3);
}
Cmp(reg3, 1); // C = rs3
Adcs(regalloc.MapRegister(op.rd), reg1, op2); // (C,rd)=rs1+rs2+rs3(C)
Cset(regalloc.MapRegister(op.rd2), cs); // rd2 = C
}
break;
case shop_sbc:
Cmp(wzr, regalloc.MapRegister(op.rs3)); // C = ~rs3
Sbcs(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2)); // (C,rd) = rs1 - rs2 - ~rs3(C)
Cset(regalloc.MapRegister(op.rd2), cc); // rd2 = ~C
{
Register reg1;
Operand op2;
Operand op3;
if (op.rs1.is_imm())
{
Mov(w0, op.rs1.imm_value());
reg1 = w0;
}
else
{
reg1 = regalloc.MapRegister(op.rs1);
}
if (op.rs2.is_imm())
op2 = Operand(op.rs2.imm_value());
else
op2 = regalloc.MapRegister(op.rs2);
if (op.rs3.is_imm())
op3 = Operand(op.rs3.imm_value());
else
op3 = regalloc.MapRegister(op.rs3);
Cmp(wzr, op3); // C = ~rs3
Sbcs(regalloc.MapRegister(op.rd), reg1, op2); // (C,rd) = rs1 - rs2 - ~rs3(C)
Cset(regalloc.MapRegister(op.rd2), cc); // rd2 = ~C
}
break;
case shop_negc:
Cmp(wzr, regalloc.MapRegister(op.rs2)); // C = ~rs2
Sbcs(regalloc.MapRegister(op.rd), wzr, regalloc.MapRegister(op.rs1)); // (C,rd) = 0 - rs1 - ~rs2(C)
Cset(regalloc.MapRegister(op.rd2), cc); // rd2 = ~C
{
Operand op1;
Operand op2;
if (op.rs1.is_imm())
op1 = Operand(op.rs1.imm_value());
else
op1 = regalloc.MapRegister(op.rs1);
if (op.rs2.is_imm())
op2 = Operand(op.rs2.imm_value());
else
op2 = regalloc.MapRegister(op.rs2);
Cmp(wzr, op2); // C = ~rs2
Sbcs(regalloc.MapRegister(op.rd), wzr, op1); // (C,rd) = 0 - rs1 - ~rs2(C)
Cset(regalloc.MapRegister(op.rd2), cc); // rd2 = ~C
}
break;
case shop_rocr:
Ubfx(w0, regalloc.MapRegister(op.rs1), 0, 1); // w0 = rs1[0] (new C)
Mov(regalloc.MapRegister(op.rd), Operand(regalloc.MapRegister(op.rs1), LSR, 1)); // rd = rs1 >> 1
Bfi(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs2), 31, 1); // rd |= C << 31
Mov(regalloc.MapRegister(op.rd2), w0); // rd2 = w0 (new C)
{
Register reg1;
Register reg2;
if (op.rs1.is_imm())
{
Mov(w1, op.rs1.imm_value());
reg1 = w1;
}
else
{
reg1 = regalloc.MapRegister(op.rs1);
}
if (op.rs2.is_imm())
{
Mov(w2, op.rs2.imm_value());
reg2 = w2;
}
else
{
reg2 = regalloc.MapRegister(op.rs2);
}
Ubfx(w0, reg1, 0, 1); // w0 = rs1[0] (new C)
const Register rd = regalloc.MapRegister(op.rd);
Mov(rd, Operand(reg1, LSR, 1)); // rd = rs1 >> 1
Bfi(rd, reg2, 31, 1); // rd |= C << 31
Mov(regalloc.MapRegister(op.rd2), w0); // rd2 = w0 (new C)
}
break;
case shop_rocl:
Tst(regalloc.MapRegister(op.rs1), 0x80000000); // Z = ~rs1[31]
Orr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs2), Operand(regalloc.MapRegister(op.rs1), LSL, 1)); // rd = rs1 << 1 | rs2(C)
Cset(regalloc.MapRegister(op.rd2), ne); // rd2 = ~Z(C)
{
Register reg1;
Register reg2;
if (op.rs1.is_imm())
{
Mov(w0, op.rs1.imm_value());
reg1 = w0;
}
else
{
reg1 = regalloc.MapRegister(op.rs1);
}
if (op.rs2.is_imm())
{
Mov(w1, op.rs2.imm_value());
reg2 = w1;
}
else
{
reg2 = regalloc.MapRegister(op.rs2);
}
Tst(reg1, 0x80000000); // Z = ~rs1[31]
Orr(regalloc.MapRegister(op.rd), reg2, Operand(reg1, LSL, 1)); // rd = rs1 << 1 | rs2(C)
Cset(regalloc.MapRegister(op.rd2), ne); // rd2 = ~Z(C)
}
break;
case shop_shld:
case shop_shad:
{
Register reg1;
if (op.rs1.is_imm())
{
Mov(w0, op.rs1.imm_value());
reg1 = w0;
}
else
{
reg1 = regalloc.MapRegister(op.rs1);
}
Label positive_shift, negative_shift, end;
Tbz(regalloc.MapRegister(op.rs2), 31, &positive_shift);
Cmn(regalloc.MapRegister(op.rs2), 32);
const Register rs2 = regalloc.MapRegister(op.rs2);
Tbz(rs2, 31, &positive_shift);
Cmn(rs2, 32);
B(&negative_shift, ne);
const Register rd = regalloc.MapRegister(op.rd);
// rs2 == -32 => rd = 0 (logical) or 0/-1 (arith)
if (op.op == shop_shld)
// Logical shift
Lsr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), 31);
//Lsr(rd, reg1, 31);
Mov(rd, wzr);
else
// Arithmetic shift
Asr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), 31);
Asr(rd, reg1, 31);
B(&end);
Bind(&positive_shift);
Lsl(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
// rs2 >= 0 => left shift
Lsl(rd, reg1, rs2);
B(&end);
Bind(&negative_shift);
Neg(w1, regalloc.MapRegister(op.rs2));
// rs2 < 0 => right shift
Neg(w1, rs2);
if (op.op == shop_shld)
// Logical shift
Lsr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w1);
Lsr(rd, reg1, w1);
else
// Arithmetic shift
Asr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), w1);
Asr(rd, reg1, w1);
Bind(&end);
}
break;
@ -601,19 +727,20 @@ public:
case shop_setae:
case shop_setab:
{
const Register rs1 = regalloc.MapRegister(op.rs1);
if (op.op == shop_test)
{
if (op.rs2.is_imm())
Tst(regalloc.MapRegister(op.rs1), op.rs2._imm);
Tst(rs1, op.rs2._imm);
else
Tst(regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
Tst(rs1, regalloc.MapRegister(op.rs2));
}
else
{
if (op.rs2.is_imm())
Cmp(regalloc.MapRegister(op.rs1), op.rs2._imm);
Cmp(rs1, op.rs2._imm);
else
Cmp(regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
Cmp(rs1, regalloc.MapRegister(op.rs2));
}
static const Condition shop_conditions[] = { eq, eq, ge, gt, hs, hi };
@ -622,32 +749,90 @@ public:
}
break;
case shop_setpeq:
Eor(w1, regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
Mov(regalloc.MapRegister(op.rd), wzr);
Mov(w2, wzr); // wzr not supported by csinc (?!)
Tst(w1, 0xFF000000);
Csinc(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w2, ne);
Tst(w1, 0x00FF0000);
Csinc(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w2, ne);
Tst(w1, 0x0000FF00);
Csinc(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w2, ne);
Tst(w1, 0x000000FF);
Csinc(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w2, ne);
{
Register reg1;
Register reg2;
if (op.rs1.is_imm())
{
Mov(w0, op.rs1.imm_value());
reg1 = w0;
}
else
{
reg1 = regalloc.MapRegister(op.rs1);
}
if (op.rs2.is_imm())
{
Mov(w1, op.rs2.imm_value());
reg2 = w1;
}
else
{
reg2 = regalloc.MapRegister(op.rs2);
}
Eor(w1, reg1, reg2);
const Register rd = regalloc.MapRegister(op.rd);
Mov(rd, wzr);
Mov(w2, wzr); // wzr not supported by csinc (?!)
Tst(w1, 0xFF000000);
Csinc(rd, rd, w2, ne);
Tst(w1, 0x00FF0000);
Csinc(rd, rd, w2, ne);
Tst(w1, 0x0000FF00);
Csinc(rd, rd, w2, ne);
Tst(w1, 0x000000FF);
Csinc(rd, rd, w2, ne);
}
break;
case shop_mul_u16:
Uxth(w10, regalloc.MapRegister(op.rs1));
Uxth(w11, regalloc.MapRegister(op.rs2));
Mul(regalloc.MapRegister(op.rd), w10, w11);
{
Register reg2;
if (op.rs2.is_imm())
{
Mov(w0, op.rs2.imm_value());
reg2 = w0;
}
else
{
reg2 = regalloc.MapRegister(op.rs2);
}
Uxth(w10, regalloc.MapRegister(op.rs1));
Uxth(w11, reg2);
Mul(regalloc.MapRegister(op.rd), w10, w11);
}
break;
case shop_mul_s16:
Sxth(w10, regalloc.MapRegister(op.rs1));
Sxth(w11, regalloc.MapRegister(op.rs2));
Mul(regalloc.MapRegister(op.rd), w10, w11);
{
Register reg2;
if (op.rs2.is_imm())
{
Mov(w0, op.rs2.imm_value());
reg2 = w0;
}
else
{
reg2 = regalloc.MapRegister(op.rs2);
}
Sxth(w10, regalloc.MapRegister(op.rs1));
Sxth(w11, reg2);
Mul(regalloc.MapRegister(op.rd), w10, w11);
}
break;
case shop_mul_i32:
Mul(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
{
Register reg2;
if (op.rs2.is_imm())
{
Mov(w0, op.rs2.imm_value());
reg2 = w0;
}
else
{
reg2 = regalloc.MapRegister(op.rs2);
}
Mul(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), reg2);
}
break;
case shop_mul_u64:
case shop_mul_s64:
@ -709,9 +894,12 @@ public:
break;
case shop_xtrct:
Lsr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), 16);
Lsl(w0, regalloc.MapRegister(op.rs2), 16);
Orr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w0);
{
const Register rd = regalloc.MapRegister(op.rd);
Lsr(rd, regalloc.MapRegister(op.rs1), 16);
Lsl(w0, regalloc.MapRegister(op.rs2), 16);
Orr(rd, rd, w0);
}
break;
//
@ -719,16 +907,104 @@ public:
//
case shop_fadd:
Fadd(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1), regalloc.MapVRegister(op.rs2));
{
VRegister reg1;
VRegister reg2;
if (op.rs1.is_imm())
{
Fmov(s0, reinterpret_cast<f32&>(op.rs1._imm));
reg1 = s0;
}
else
{
reg1 = regalloc.MapVRegister(op.rs1);
}
if (op.rs2.is_imm())
{
Fmov(s1, reinterpret_cast<f32&>(op.rs2._imm));
reg2 = s1;
}
else
{
reg2 = regalloc.MapVRegister(op.rs2);
}
Fadd(regalloc.MapVRegister(op.rd), reg1, reg2);
}
break;
case shop_fsub:
Fsub(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1), regalloc.MapVRegister(op.rs2));
{
VRegister reg1;
VRegister reg2;
if (op.rs1.is_imm())
{
Fmov(s0, reinterpret_cast<f32&>(op.rs1._imm));
reg1 = s0;
}
else
{
reg1 = regalloc.MapVRegister(op.rs1);
}
if (op.rs2.is_imm())
{
Fmov(s1, reinterpret_cast<f32&>(op.rs2._imm));
reg2 = s1;
}
else
{
reg2 = regalloc.MapVRegister(op.rs2);
}
Fsub(regalloc.MapVRegister(op.rd), reg1, reg2);
}
break;
case shop_fmul:
Fmul(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1), regalloc.MapVRegister(op.rs2));
{
VRegister reg1;
VRegister reg2;
if (op.rs1.is_imm())
{
Fmov(s0, reinterpret_cast<f32&>(op.rs1._imm));
reg1 = s0;
}
else
{
reg1 = regalloc.MapVRegister(op.rs1);
}
if (op.rs2.is_imm())
{
Fmov(s1, reinterpret_cast<f32&>(op.rs2._imm));
reg2 = s1;
}
else
{
reg2 = regalloc.MapVRegister(op.rs2);
}
Fmul(regalloc.MapVRegister(op.rd), reg1, reg2);
}
break;
case shop_fdiv:
Fdiv(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1), regalloc.MapVRegister(op.rs2));
{
VRegister reg1;
VRegister reg2;
if (op.rs1.is_imm())
{
Fmov(s0, reinterpret_cast<f32&>(op.rs1._imm));
reg1 = s0;
}
else
{
reg1 = regalloc.MapVRegister(op.rs1);
}
if (op.rs2.is_imm())
{
Fmov(s1, reinterpret_cast<f32&>(op.rs2._imm));
reg2 = s1;
}
else
{
reg2 = regalloc.MapVRegister(op.rs2);
}
Fdiv(regalloc.MapVRegister(op.rd), reg1, reg2);
}
break;
case shop_fabs:
@ -888,12 +1164,12 @@ public:
break;
case CPT_f32:
if (prm.is_reg()) {
if (prm.is_reg())
Fmov(*call_fregs[fregused], regalloc.MapVRegister(prm));
}
else {
else if (prm.is_imm())
Fmov(*call_fregs[fregused], reinterpret_cast<f32&>(prm._imm));
else
verify(prm.is_null());
}
fregused++;
break;
@ -1033,7 +1309,7 @@ public:
if (block->oplist[opid].op == shop_readm)
{
regalloc.DoAlloc(block);
regalloc.current_opid = opid;
regalloc.SetOpnum(opid);
}
}
@ -1181,9 +1457,8 @@ public:
vmem_platform_flush_cache(
CC_RW2RX(GetBuffer()->GetStartAddress<void*>()), CC_RW2RX(GetBuffer()->GetEndAddress<void*>()),
GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());
#if 0
// if (rewrite)
if (rewrite && block != NULL)
{
Instruction* instr_start = (Instruction*)block->code;
// Instruction* instr_end = GetLabelAddress<Instruction*>(&code_end);
@ -1432,6 +1707,10 @@ private:
{
switch (size)
{
case 1:
Ldrsb(regalloc.MapRegister(op.rd), MemOperand(x1));
break;
case 2:
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1));
break;
@ -1452,6 +1731,10 @@ private:
{
switch (size)
{
case 1:
Ldrsb(w1, MemOperand(x1));
break;
case 2:
Ldrsh(w1, MemOperand(x1));
break;
@ -1460,11 +1743,18 @@ private:
Ldr(w1, MemOperand(x1));
break;
case 8:
Ldr(x1, MemOperand(x1));
break;
default:
die("Invalid size");
break;
}
Str(w1, sh4_context_mem_operand(op.rd.reg_ptr()));
if (size == 8)
Str(x1, sh4_context_mem_operand(op.rd.reg_ptr()));
else
Str(w1, sh4_context_mem_operand(op.rd.reg_ptr()));
}
}
else
@ -1568,7 +1858,7 @@ private:
Lsr(x1, x1, 32);
Fmov(regalloc.MapVRegister(op.rd, 1), w1);
#else
Str(x1, sh4_context_mem_operand(op.rd.reg_ptr()));
die("GenReadMemoryFast: size == 8 and !explode_spans");
#endif
}
}
@ -1604,6 +1894,9 @@ private:
void GenWriteMemory(const shil_opcode& op, size_t opid, bool optimise)
{
if (GenWriteMemoryImmediate(op))
return;
GenMemAddr(op, call_regs[0]);
u32 size = op.flags & 0x7f;
@ -1627,6 +1920,111 @@ private:
GenWriteMemorySlow(op);
}
bool GenWriteMemoryImmediate(const shil_opcode& op)
{
if (!op.rs1.is_imm())
return false;
u32 size = op.flags & 0x7f;
u32 addr = op.rs1._imm;
if (mmu_enabled())
{
if ((addr >> 12) != (block->vaddr >> 12))
// When full mmu is on, only consider addresses in the same 4k page
return false;
u32 paddr;
u32 rv;
switch (size)
{
case 1:
rv = mmu_data_translation<MMU_TT_DWRITE, u8>(addr, paddr);
break;
case 2:
rv = mmu_data_translation<MMU_TT_DWRITE, u16>(addr, paddr);
break;
case 4:
case 8:
rv = mmu_data_translation<MMU_TT_DWRITE, u32>(addr, paddr);
break;
}
if (rv != MMU_ERROR_NONE)
return false;
addr = paddr;
}
bool isram = false;
void* ptr = _vmem_write_const(addr, isram, size);
Register reg2;
if (op.rs2.is_imm())
{
Mov(w0, op.rs2._imm);
reg2 = w0;
}
else if (regalloc.IsAllocg(op.rs2))
{
reg2 = regalloc.MapRegister(op.rs2);
}
else if (regalloc.IsAllocf(op.rs2))
{
Fmov(w0, regalloc.MapVRegister(op.rs2));
reg2 = w0;
}
else
die("Invalid rs2 param");
if (isram)
{
Ldr(x1, reinterpret_cast<uintptr_t>(ptr));
switch (size)
{
case 1:
Strb(reg2, MemOperand(x1));
break;
case 2:
Strh(reg2, MemOperand(x1));
break;
case 4:
if (op.rs2.is_r32f())
Str(reg2, MemOperand(x1));
else
Str(reg2, MemOperand(x1));
break;
default:
die("Invalid size");
break;
}
}
else
{
// Not RAM
Mov(w1, reg2);
Mov(w0, addr);
switch(size)
{
case 1:
GenCallRuntime((void (*)())ptr);
break;
case 2:
GenCallRuntime((void (*)())ptr);
break;
case 4:
GenCallRuntime((void (*)())ptr);
break;
case 8:
die("SZ_64F not supported");
break;
}
}
return true;
}
bool GenWriteMemoryFast(const shil_opcode& op, size_t opid)
{
// Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite()

View File

@ -3,8 +3,9 @@
#if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X64
#include <setjmp.h>
#define EXPLODE_SPANS
//#define EXPLODE_SPANS
//#define PROFILING
//#define CANONICAL_TEST
#include "deps/xbyak/xbyak.h"
#include "deps/xbyak/xbyak_util.h"
@ -40,23 +41,32 @@ extern "C" {
int cycle_counter;
}
double host_cpu_time;
u64 guest_cpu_cycles;
u64 host_cpu_time;
u32 mem_writes, mem_reads;
u32 mem_rewrites_w, mem_rewrites_r;
#ifdef PROFILING
static double slice_start;
static clock_t slice_start;
int start_cycle;
extern "C"
{
static __attribute((used)) void start_slice()
static __attribute((used)) void* start_slice(void *p)
{
slice_start = os_GetSeconds();
slice_start = clock();
start_cycle = cycle_counter;
return p;
}
static __attribute((used)) void end_slice()
{
host_cpu_time += os_GetSeconds() - slice_start;
clock_t now = clock();
if (slice_start != 0)
{
host_cpu_time += now - slice_start;
guest_cpu_cycles += start_cycle - cycle_counter;
}
slice_start = now;
start_cycle = cycle_counter;
}
}
#endif
@ -158,15 +168,15 @@ WIN32_ONLY( ".seh_pushreg %r14 \n\t")
#endif
"call " _U "bm_GetCodeByVAddr \n\t"
"call *%rax \n\t"
#ifdef PROFILING
"call end_slice \n\t"
#endif
"movl " _U "cycle_counter(%rip), %ecx \n\t"
"testl %ecx, %ecx \n\t"
"jg 2b \n\t" // slice_loop
"addl $" _S(SH4_TIMESLICE) ", %ecx \n\t"
"movl %ecx, " _U "cycle_counter(%rip) \n\t"
#ifdef PROFILING
"call end_slice \n\t"
#endif
"call " _U "UpdateSystem_INTC \n\t"
"jmp 1b \n" // run_loop
@ -371,11 +381,6 @@ public:
sub(dword[rax], block->guest_cycles);
#else
sub(dword[rip + &cycle_counter], block->guest_cycles);
#endif
#ifdef PROFILING
mov(rax, (uintptr_t)&guest_cpu_cycles);
mov(ecx, block->guest_cycles);
add(qword[rax], rcx);
#endif
regalloc.DoAlloc(block);
@ -412,14 +417,12 @@ public:
case shop_jcond:
case shop_jdyn:
{
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
Xbyak::Reg32 rs1 = regalloc.MapRegister(op.rs1);
if (rd != rs1)
mov(rd, rs1);
if (op.rs2.is_imm())
{
mov(ecx, regalloc.MapRegister(op.rs1));
add(ecx, op.rs2._imm);
mov(regalloc.MapRegister(op.rd), ecx);
}
else
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
add(rd, op.rs2._imm);
}
break;
@ -495,41 +498,44 @@ public:
case shop_writem:
{
shil_param_to_host_reg(op.rs1, call_regs[0]);
if (!op.rs3.is_null())
if (!GenWriteMemImmediate(op, block))
{
if (op.rs3.is_imm())
add(call_regs[0], op.rs3._imm);
else if (regalloc.IsAllocg(op.rs3))
add(call_regs[0], regalloc.MapRegister(op.rs3));
else
shil_param_to_host_reg(op.rs1, call_regs[0]);
if (!op.rs3.is_null())
{
mov(rax, (uintptr_t)op.rs3.reg_ptr());
add(call_regs[0], dword[rax]);
if (op.rs3.is_imm())
add(call_regs[0], op.rs3._imm);
else if (regalloc.IsAllocg(op.rs3))
add(call_regs[0], regalloc.MapRegister(op.rs3));
else
{
mov(rax, (uintptr_t)op.rs3.reg_ptr());
add(call_regs[0], dword[rax]);
}
}
}
u32 size = op.flags & 0x7f;
if (size != 8)
shil_param_to_host_reg(op.rs2, call_regs[1]);
else {
u32 size = op.flags & 0x7f;
if (size != 8)
shil_param_to_host_reg(op.rs2, call_regs[1]);
else {
#ifdef EXPLODE_SPANS
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1))
{
movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1));
shl(call_regs64[1], 32);
movd(eax, regalloc.MapXRegister(op.rs2, 0));
or_(call_regs64[1], rax);
}
else
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1))
{
movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1));
shl(call_regs64[1], 32);
movd(eax, regalloc.MapXRegister(op.rs2, 0));
or_(call_regs64[1], rax);
}
else
#endif
{
mov(rax, (uintptr_t)op.rs2.reg_ptr());
mov(call_regs64[1], qword[rax]);
{
mov(rax, (uintptr_t)op.rs2.reg_ptr());
mov(call_regs64[1], qword[rax]);
}
}
if (!optimise || !GenWriteMemoryFast(op, block))
GenWriteMemorySlow(op, block);
}
if (!optimise || !GenWriteMemoryFast(op, block))
GenWriteMemorySlow(op, block);
}
break;
@ -544,7 +550,7 @@ public:
case shop_swaplb:
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
ror(Xbyak::Reg16(regalloc.MapRegister(op.rd).getIdx()), 8);
ror(regalloc.MapRegister(op.rd).cvt16(), 8);
break;
case shop_neg:
@ -595,12 +601,30 @@ public:
break;
case shop_adc:
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
cmp(regalloc.MapRegister(op.rs3), 1); // C = ~rs3
cmc(); // C = rs3
adc(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs2)); // (C,rd)=rs1+rs2+rs3(C)
setc(regalloc.MapRegister(op.rd2).cvt8()); // rd2 = C
{
cmp(regalloc.MapRegister(op.rs3), 1); // C = ~rs3
Xbyak::Reg32 rs2;
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
if (op.rs2.is_reg())
{
rs2 = regalloc.MapRegister(op.rs2);
if (regalloc.mapg(op.rd) == regalloc.mapg(op.rs2))
{
mov(ecx, rs2);
rs2 = ecx;
}
}
if (op.rs1.is_imm())
mov(rd, op.rs1.imm_value());
else if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
mov(rd, regalloc.MapRegister(op.rs1));
cmc(); // C = rs3
if (op.rs2.is_reg())
adc(rd, rs2); // (C,rd)=rs1+rs2+rs3(C)
else
adc(rd, op.rs2.imm_value());
setc(regalloc.MapRegister(op.rd2).cvt8()); // rd2 = C
}
break;
/* FIXME buggy
@ -619,11 +643,27 @@ public:
*/
case shop_negc:
{
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
Xbyak::Reg64 rd64 = regalloc.MapRegister(op.rd).cvt64();
Xbyak::Reg32 rs2;
if (op.rs2.is_reg())
{
rs2 = regalloc.MapRegister(op.rs2);
if (regalloc.mapg(op.rd) == regalloc.mapg(op.rs2))
{
mov(ecx, rs2);
rs2 = ecx;
}
}
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
if (op.rs1.is_imm())
mov(rd, op.rs1.imm_value());
else if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
mov(rd, regalloc.MapRegister(op.rs1));
Xbyak::Reg64 rd64 = rd.cvt64();
neg(rd64);
sub(rd64, regalloc.MapRegister(op.rs2).cvt64());
if (op.rs2.is_imm())
sub(rd64, op.rs2.imm_value());
else
sub(rd64, rs2.cvt64());
Xbyak::Reg64 rd2_64 = regalloc.MapRegister(op.rd2).cvt64();
mov(rd2_64, rd64);
shr(rd2_64, 63);
@ -632,48 +672,60 @@ public:
case shop_rocr:
case shop_rocl:
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
cmp(regalloc.MapRegister(op.rs2), 1); // C = ~rs2
cmc(); // C = rs2
if (op.op == shop_rocr)
rcr(regalloc.MapRegister(op.rd), 1);
else
rcl(regalloc.MapRegister(op.rd), 1);
setc(al);
movzx(regalloc.MapRegister(op.rd2), al); // rd2 = C
{
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
cmp(regalloc.MapRegister(op.rs2), 1); // C = ~rs2
if (op.rs1.is_imm())
mov(rd, op.rs1.imm_value());
else if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
mov(rd, regalloc.MapRegister(op.rs1));
cmc(); // C = rs2
if (op.op == shop_rocr)
rcr(rd, 1);
else
rcl(rd, 1);
setc(al);
movzx(regalloc.MapRegister(op.rd2), al); // rd2 = C
}
break;
case shop_shld:
case shop_shad:
{
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
if (op.rs2.is_reg())
mov(ecx, regalloc.MapRegister(op.rs2));
else
// This shouldn't happen. If arg is imm -> shop_shl/shr/sar
mov(ecx, op.rs2.imm_value());
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
if (op.rs1.is_imm())
mov(rd, op.rs1.imm_value());
else if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
mov(rd, regalloc.MapRegister(op.rs1));
Xbyak::Label negative_shift;
Xbyak::Label non_zero;
Xbyak::Label exit;
mov(ecx, regalloc.MapRegister(op.rs2));
cmp(ecx, 0);
js(negative_shift);
shl(regalloc.MapRegister(op.rd), cl);
shl(rd, cl);
jmp(exit);
L(negative_shift);
test(ecx, 0x1f);
jnz(non_zero);
if (op.op == shop_shld)
xor_(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd));
xor_(rd, rd);
else
sar(regalloc.MapRegister(op.rd), 31);
sar(rd, 31);
jmp(exit);
L(non_zero);
neg(ecx);
if (op.op == shop_shld)
shr(regalloc.MapRegister(op.rd), cl);
shr(rd, cl);
else
sar(regalloc.MapRegister(op.rd), cl);
sar(rd, cl);
L(exit);
}
break;
@ -730,25 +782,40 @@ public:
break;
*/
case shop_mul_u16:
movzx(eax, Xbyak::Reg16(regalloc.MapRegister(op.rs1).getIdx()));
movzx(ecx, Xbyak::Reg16(regalloc.MapRegister(op.rs2).getIdx()));
movzx(eax, regalloc.MapRegister(op.rs1).cvt16());
if (op.rs2.is_reg())
movzx(ecx, regalloc.MapRegister(op.rs2).cvt16());
else
mov(ecx, op.rs2._imm & 0xFFFF);
mul(ecx);
mov(regalloc.MapRegister(op.rd), eax);
break;
case shop_mul_s16:
movsx(eax, Xbyak::Reg16(regalloc.MapRegister(op.rs1).getIdx()));
movsx(ecx, Xbyak::Reg16(regalloc.MapRegister(op.rs2).getIdx()));
movsx(eax, regalloc.MapRegister(op.rs1).cvt16());
if (op.rs2.is_reg())
movsx(ecx, regalloc.MapRegister(op.rs2).cvt16());
else
mov(ecx, (s32)(s16)op.rs2._imm);
mul(ecx);
mov(regalloc.MapRegister(op.rd), eax);
break;
case shop_mul_i32:
mov(eax, regalloc.MapRegister(op.rs1));
mul(regalloc.MapRegister(op.rs2));
if (op.rs2.is_reg())
mul(regalloc.MapRegister(op.rs2));
else
{
mov(ecx, op.rs2._imm);
mul(ecx);
}
mov(regalloc.MapRegister(op.rd), eax);
break;
case shop_mul_u64:
mov(eax, regalloc.MapRegister(op.rs1));
mov(ecx, regalloc.MapRegister(op.rs2));
if (op.rs2.is_reg())
mov(ecx, regalloc.MapRegister(op.rs2));
else
mov(ecx, op.rs2._imm);
mul(rcx);
mov(regalloc.MapRegister(op.rd), eax);
shr(rax, 32);
@ -756,7 +823,10 @@ public:
break;
case shop_mul_s64:
movsxd(rax, regalloc.MapRegister(op.rs1));
movsxd(rcx, regalloc.MapRegister(op.rs2));
if (op.rs2.is_reg())
movsxd(rcx, regalloc.MapRegister(op.rs2));
else
mov(rcx, (s64)(s32)op.rs2._imm);
mul(rcx);
mov(regalloc.MapRegister(op.rd), eax);
shr(rax, 32);
@ -764,6 +834,33 @@ public:
break;
case shop_pref:
if (op.rs1.is_imm())
{
// this test shouldn't be necessary
if ((op.rs1._imm & 0xFC000000) == 0xE0000000)
{
mov(call_regs[0], op.rs1._imm);
if (mmu_enabled())
{
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
GenCall(do_sqw_mmu_no_ex);
}
else
{
if (CCN_MMUCR.AT == 1)
{
GenCall(do_sqw_mmu);
}
else
{
mov(call_regs64[1], (uintptr_t)sq_both);
GenCall(&do_sqw_nommu_local);
}
}
}
}
else
{
Xbyak::Reg32 rn;
if (regalloc.IsAllocg(op.rs1))
@ -810,16 +907,31 @@ public:
movsx(regalloc.MapRegister(op.rd), al);
break;
case shop_ext_s16:
movsx(regalloc.MapRegister(op.rd), Xbyak::Reg16(regalloc.MapRegister(op.rs1).getIdx()));
movsx(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1).cvt16());
break;
case shop_xtrct:
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
shr(regalloc.MapRegister(op.rd), 16);
mov(eax, regalloc.MapRegister(op.rs2));
shl(eax, 16);
or_(regalloc.MapRegister(op.rd), eax);
{
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
Xbyak::Reg32 rs1 = regalloc.MapRegister(op.rs1);
Xbyak::Reg32 rs2 = regalloc.MapRegister(op.rs2);
if (regalloc.mapg(op.rd) == regalloc.mapg(op.rs2))
{
shl(rd, 16);
mov(eax, rs1);
shr(eax, 16);
or_(rd, eax);
break;
}
else if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
{
mov(rd, rs1);
}
shr(rd, 16);
mov(eax, rs2);
shl(eax, 16);
or_(rd, eax);
}
break;
//
@ -859,15 +971,38 @@ public:
break;
case shop_fmac:
if (regalloc.mapf(op.rd) != regalloc.mapf(op.rs1))
movss(regalloc.MapXRegister(op.rd), regalloc.MapXRegister(op.rs1));
if (cpu.has(Xbyak::util::Cpu::tFMA))
vfmadd231ss(regalloc.MapXRegister(op.rd), regalloc.MapXRegister(op.rs2), regalloc.MapXRegister(op.rs3));
else
{
movss(xmm0, regalloc.MapXRegister(op.rs2));
mulss(xmm0, regalloc.MapXRegister(op.rs3));
addss(regalloc.MapXRegister(op.rd), xmm0);
Xbyak::Xmm rs1 = regalloc.MapXRegister(op.rs1);
Xbyak::Xmm rs2 = regalloc.MapXRegister(op.rs2);
Xbyak::Xmm rs3 = regalloc.MapXRegister(op.rs3);
Xbyak::Xmm rd = regalloc.MapXRegister(op.rd);
if (rd == rs2)
{
movss(xmm1, rs2);
rs2 = xmm1;
}
if (rd == rs3)
{
movss(xmm2, rs3);
rs3 = xmm2;
}
if (op.rs1.is_imm())
{
mov(eax, op.rs1._imm);
movd(rd, eax);
}
else if (rd != rs1)
{
movss(rd, rs1);
}
if (cpu.has(Xbyak::util::Cpu::tFMA))
vfmadd231ss(rd, rs2, rs3);
else
{
movss(xmm0, rs2);
mulss(xmm0, rs3);
addss(rd, xmm0);
}
}
break;
@ -898,7 +1033,7 @@ public:
break;
case shop_fsca:
movzx(rax, Xbyak::Reg16(regalloc.MapRegister(op.rs1).getIdx()));
movzx(rax, regalloc.MapRegister(op.rs1).cvt16());
mov(rcx, (uintptr_t)&sin_table);
#ifdef EXPLODE_SPANS
movss(regalloc.MapXRegister(op.rd, 0), dword[rcx + rax * 8]);
@ -1359,6 +1494,17 @@ private:
mov(rax, reinterpret_cast<uintptr_t>(ptr));
switch (size)
{
case 1:
if (regalloc.IsAllocg(op.rd))
movsx(regalloc.MapRegister(op.rd), byte[rax]);
else
{
movsx(eax, byte[rax]);
mov(rcx, (uintptr_t)op.rd.reg_ptr());
mov(dword[rcx], eax);
}
break;
case 2:
if (regalloc.IsAllocg(op.rd))
movsx(regalloc.MapRegister(op.rd), word[rax]);
@ -1383,6 +1529,23 @@ private:
}
break;
case 8:
mov(rcx, qword[rax]);
#ifdef EXPLODE_SPANS
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1))
{
movd(regalloc.MapXRegister(op.rd, 0), ecx);
shr(rcx, 32);
movd(regalloc.MapXRegister(op.rd, 1), ecx);
}
else
#endif
{
mov(rax, (uintptr_t)op.rd.reg_ptr());
mov(qword[rax], rcx);
}
break;
default:
die("Invalid immediate size");
break;
@ -1395,6 +1558,11 @@ private:
switch(size)
{
case 1:
GenCall((void (*)())ptr);
movsx(ecx, al);
break;
case 2:
GenCall((void (*)())ptr);
movsx(ecx, ax);
@ -1415,6 +1583,122 @@ private:
return true;
}
bool GenWriteMemImmediate(const shil_opcode& op, RuntimeBlockInfo* block)
{
if (!op.rs1.is_imm())
return false;
u32 size = op.flags & 0x7f;
u32 addr = op.rs1._imm;
if (mmu_enabled())
{
if ((addr >> 12) != (block->vaddr >> 12))
// When full mmu is on, only consider addresses in the same 4k page
return false;
u32 paddr;
u32 rv;
switch (size)
{
case 1:
rv = mmu_data_translation<MMU_TT_DWRITE, u8>(addr, paddr);
break;
case 2:
rv = mmu_data_translation<MMU_TT_DWRITE, u16>(addr, paddr);
break;
case 4:
case 8:
rv = mmu_data_translation<MMU_TT_DWRITE, u32>(addr, paddr);
break;
}
if (rv != MMU_ERROR_NONE)
return false;
addr = paddr;
}
bool isram = false;
void* ptr = _vmem_write_const(addr, isram, size);
if (isram)
{
// Immediate pointer to RAM: super-duper fast access
mov(rax, reinterpret_cast<uintptr_t>(ptr));
switch (size)
{
case 1:
if (regalloc.IsAllocg(op.rs2))
mov(byte[rax], regalloc.MapRegister(op.rs2));
else if (op.rs2.is_imm())
mov(byte[rax], op.rs2._imm);
else
{
mov(rcx, (uintptr_t)op.rs2.reg_ptr());
mov(ecx, dword[rcx]);
mov(byte[rax], ecx);
}
break;
case 2:
if (regalloc.IsAllocg(op.rs2))
mov(word[rax], regalloc.MapRegister(op.rs2));
else if (op.rs2.is_imm())
mov(word[rax], op.rs2._imm);
else
{
mov(rcx, (uintptr_t)op.rs2.reg_ptr());
mov(ecx, dword[rcx]);
mov(word[rax], ecx);
}
break;
case 4:
if (regalloc.IsAllocg(op.rs2))
mov(dword[rax], regalloc.MapRegister(op.rs2));
else if (regalloc.IsAllocf(op.rs2))
movd(dword[rax], regalloc.MapXRegister(op.rs2));
else if (op.rs2.is_imm())
mov(dword[rax], op.rs2._imm);
else
{
mov(rcx, (uintptr_t)op.rd.reg_ptr());
mov(ecx, dword[rcx]);
mov(dword[rax], ecx);
}
break;
case 8:
#ifdef EXPLODE_SPANS
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1))
{
movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1));
shl(call_regs64[1], 32);
movd(eax, regalloc.MapXRegister(op.rs2, 0));
or_(call_regs64[1], rax);
}
else
#endif
{
mov(rcx, (uintptr_t)op.rd.reg_ptr());
mov(rcx, qword[rcx]);
mov(qword[rax], rcx);
}
default:
die("Invalid immediate size");
break;
}
}
else
{
// Not RAM: the returned pointer is a memory handler
mov(call_regs[0], addr);
shil_param_to_host_reg(op.rs2, call_regs[1]);
GenCall((void (*)())ptr);
}
return true;
}
bool GenReadMemoryFast(const shil_opcode& op, RuntimeBlockInfo* block)
{
if (!mmu_enabled() || !vmem32_enabled())
@ -1487,11 +1771,11 @@ private:
switch (size)
{
case 1:
mov(byte[rax + call_regs64[0] + 0], Xbyak::Reg8(call_regs[1].getIdx(), call_regs[1] == edi || call_regs[1] == esi));
mov(byte[rax + call_regs64[0] + 0], call_regs[1].cvt8());
break;
case 2:
mov(word[rax + call_regs64[0]], Xbyak::Reg16(call_regs[1].getIdx()));
mov(word[rax + call_regs64[0]], call_regs[1].cvt16());
break;
case 4:
@ -1589,22 +1873,67 @@ private:
void GenBinaryOp(const shil_opcode &op, X64BinaryOp natop)
{
Xbyak::Reg32 rd = regalloc.MapRegister(op.rd);
const shil_param *rs2 = &op.rs2;
if (regalloc.mapg(op.rd) != regalloc.mapg(op.rs1))
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1));
{
if (op.rs2.is_reg() && regalloc.mapg(op.rd) == regalloc.mapg(op.rs2))
{
if (op.op == shop_sub)
{
// This op isn't commutative
mov(ecx, regalloc.MapRegister(op.rs2));
mov(rd, regalloc.MapRegister(op.rs1));
(this->*natop)(rd, ecx);
return;
}
// otherwise just swap the operands
rs2 = &op.rs1;
}
else
mov(rd, regalloc.MapRegister(op.rs1));
}
if (op.rs2.is_imm())
{
mov(ecx, op.rs2._imm);
(this->*natop)(regalloc.MapRegister(op.rd), ecx);
(this->*natop)(rd, ecx);
}
else
(this->*natop)(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs2));
(this->*natop)(rd, regalloc.MapRegister(*rs2));
}
void GenBinaryFOp(const shil_opcode &op, X64BinaryFOp natop)
{
Xbyak::Xmm rd = regalloc.MapXRegister(op.rd);
const shil_param *rs2 = &op.rs2;
if (regalloc.mapf(op.rd) != regalloc.mapf(op.rs1))
movss(regalloc.MapXRegister(op.rd), regalloc.MapXRegister(op.rs1));
(this->*natop)(regalloc.MapXRegister(op.rd), regalloc.MapXRegister(op.rs2));
{
if (op.rs2.is_reg() && regalloc.mapf(op.rd) == regalloc.mapf(op.rs2))
{
if (op.op == shop_fsub || op.op == shop_fdiv)
{
// these ops aren't commutative so we need a scratch reg
movss(xmm0, regalloc.MapXRegister(op.rs2));
movss(rd, regalloc.MapXRegister(op.rs1));
(this->*natop)(rd, xmm0);
return;
}
// otherwise just swap the operands
rs2 = &op.rs1;
}
else
movss(rd, regalloc.MapXRegister(op.rs1));
}
if (op.rs2.is_imm())
{
mov(eax, op.rs2._imm);
movd(xmm0, eax);
(this->*natop)(rd, xmm0);
}
else
(this->*natop)(rd, regalloc.MapXRegister(*rs2));
}
template<class Ret, class... Params>
@ -1693,10 +2022,11 @@ private:
{
if (regalloc.IsAllocf(param))
{
Xbyak::Xmm sreg = regalloc.MapXRegister(param);
if (!reg.isXMM())
movd((const Xbyak::Reg32 &)reg, regalloc.MapXRegister(param));
else
movss((const Xbyak::Xmm &)reg, regalloc.MapXRegister(param));
movd((const Xbyak::Reg32 &)reg, sreg);
else if (reg != sreg)
movss((const Xbyak::Xmm &)reg, sreg);
}
else
{
@ -1709,10 +2039,11 @@ private:
{
if (regalloc.IsAllocg(param))
{
if (!reg.isXMM())
mov((const Xbyak::Reg32 &)reg, regalloc.MapRegister(param));
else
movd((const Xbyak::Xmm &)reg, regalloc.MapRegister(param));
Xbyak::Reg32 sreg = regalloc.MapRegister(param);
if (reg.isXMM())
movd((const Xbyak::Xmm &)reg, sreg);
else if (reg != sreg)
mov((const Xbyak::Reg32 &)reg, sreg);
}
else
{
@ -1735,17 +2066,19 @@ private:
{
if (regalloc.IsAllocg(param))
{
Xbyak::Reg32 sreg = regalloc.MapRegister(param);
if (!reg.isXMM())
mov(regalloc.MapRegister(param), (const Xbyak::Reg32 &)reg);
else
movd(regalloc.MapRegister(param), (const Xbyak::Xmm &)reg);
mov(sreg, (const Xbyak::Reg32 &)reg);
else if (reg != sreg)
movd(sreg, (const Xbyak::Xmm &)reg);
}
else if (regalloc.IsAllocf(param))
{
Xbyak::Xmm sreg = regalloc.MapXRegister(param);
if (!reg.isXMM())
movd(regalloc.MapXRegister(param), (const Xbyak::Reg32 &)reg);
else
movss(regalloc.MapXRegister(param), (const Xbyak::Xmm &)reg);
movd(sreg, (const Xbyak::Reg32 &)reg);
else if (reg != sreg)
movss(sreg, (const Xbyak::Xmm &)reg);
}
else
{

View File

@ -20,8 +20,14 @@
#ifndef CORE_REC_X64_X64_REGALLOC_H_
#define CORE_REC_X64_X64_REGALLOC_H_
//#define OLD_REGALLOC
#include "deps/xbyak/xbyak.h"
#ifdef OLD_REGALLOC
#include "hw/sh4/dyna/regalloc.h"
#else
#include "hw/sh4/dyna/ssa_regalloc.h"
#endif
#ifdef _WIN32
static Xbyak::Operand::Code alloc_regs[] = { Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI, Xbyak::Operand::RSI,
@ -65,7 +71,11 @@ struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8,
Xbyak::Xmm MapXRegister(const shil_param& param, u32 index = 0)
{
#ifdef OLD_REGALLOC
s8 ereg = mapfv(param, index);
#else
s8 ereg = mapf(param);
#endif
if (ereg == -1)
die("VRegister not allocated");
return Xbyak::Xmm(ereg);
@ -73,12 +83,16 @@ struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8,
bool IsMapped(const Xbyak::Xmm &xmm, size_t opid)
{
#ifndef OLD_REGALLOC
return regf_used((s8)xmm.getIdx());
#else
for (size_t sid = 0; sid < all_spans.size(); sid++)
{
if (all_spans[sid]->nregf == xmm.getIdx() && all_spans[sid]->contains(opid))
return true;
}
return false;
#endif
}
BlockCompiler *compiler;