dynarec: reg alloc 64-bit regs. avoid some interpreter fallbacks
Option to reg alloc 64-bit regs in two host regs. Used when FPSCR.SZ == 1 (64-bit reg and memory transfers.) Enabled for arm, arm64 and x64 (windows only) dynarecs. Don't fallback to interpreter when FPSCR.PR==1 (double precision) for FMOV, FLDS and FLTS.
This commit is contained in:
parent
95a00a165a
commit
62085539a7
|
@ -48,7 +48,7 @@ static const char idle_hash[] =
|
|||
|
||||
static inline shil_param mk_imm(u32 immv)
|
||||
{
|
||||
return shil_param(FMT_IMM,immv);
|
||||
return shil_param(immv);
|
||||
}
|
||||
|
||||
static inline shil_param mk_reg(Sh4RegType reg)
|
||||
|
@ -63,17 +63,18 @@ static inline shil_param mk_regi(int reg)
|
|||
|
||||
static state_t state;
|
||||
|
||||
static void Emit(shilop op,shil_param rd=shil_param(),shil_param rs1=shil_param(),shil_param rs2=shil_param(),u32 flags=0,shil_param rs3=shil_param(),shil_param rd2=shil_param())
|
||||
static void Emit(shilop op, shil_param rd = shil_param(), shil_param rs1 = shil_param(), shil_param rs2 = shil_param(),
|
||||
u32 size = 0, shil_param rs3 = shil_param(), shil_param rd2 = shil_param())
|
||||
{
|
||||
shil_opcode sp;
|
||||
|
||||
sp.flags=flags;
|
||||
sp.op=op;
|
||||
sp.rd=(rd);
|
||||
sp.rd2=(rd2);
|
||||
sp.rs1=(rs1);
|
||||
sp.rs2=(rs2);
|
||||
sp.rs3=(rs3);
|
||||
|
||||
sp.size = size;
|
||||
sp.op = op;
|
||||
sp.rd = rd;
|
||||
sp.rd2 = rd2;
|
||||
sp.rs1 = rs1;
|
||||
sp.rs2 = rs2;
|
||||
sp.rs3 = rs3;
|
||||
sp.guest_offs = state.cpu.rpc - blk->vaddr;
|
||||
sp.delay_slot = state.cpu.is_delayslot;
|
||||
|
||||
|
@ -83,12 +84,12 @@ static void Emit(shilop op,shil_param rd=shil_param(),shil_param rs1=shil_param(
|
|||
static void dec_fallback(u32 op)
|
||||
{
|
||||
shil_opcode opcd;
|
||||
opcd.op=shop_ifb;
|
||||
opcd.op = shop_ifb;
|
||||
|
||||
opcd.rs1=shil_param(FMT_IMM,OpDesc[op]->NeedPC());
|
||||
opcd.rs1 = shil_param(OpDesc[op]->NeedPC());
|
||||
|
||||
opcd.rs2=shil_param(FMT_IMM,state.cpu.rpc+2);
|
||||
opcd.rs3=shil_param(FMT_IMM,op);
|
||||
opcd.rs2 = shil_param(state.cpu.rpc + 2);
|
||||
opcd.rs3 = shil_param(op);
|
||||
|
||||
opcd.guest_offs = state.cpu.rpc - blk->vaddr;
|
||||
opcd.delay_slot = state.cpu.is_delayslot;
|
||||
|
@ -671,9 +672,13 @@ static bool dec_generic(u32 op)
|
|||
if (op>=0xF000)
|
||||
{
|
||||
state.info.has_fpu=true;
|
||||
if (state.cpu.FPR64)
|
||||
if (state.cpu.FPR64) {
|
||||
// fallback to interpreter for double float ops
|
||||
return false;
|
||||
// except fmov, flds and fsts that don't depend on PR
|
||||
if (((op & 0xf) < 6 || (op & 0xf) > 0xc) // fmov
|
||||
&& (op & 0xef) != 0x0d) // flds, flts
|
||||
return false;
|
||||
}
|
||||
|
||||
if (state.cpu.FSZ64 && (d==PRM_FRN_SZ || d==PRM_FRM_SZ || s==PRM_FRN_SZ || s==PRM_FRM_SZ))
|
||||
transfer_64 = true;
|
||||
|
|
|
@ -7,23 +7,19 @@ extern shil_chfp* shil_chf[];
|
|||
|
||||
enum shil_param_type
|
||||
{
|
||||
//2 bits
|
||||
FMT_NULL,
|
||||
FMT_IMM,
|
||||
FMT_I32,
|
||||
FMT_F32,
|
||||
FMT_F64,
|
||||
|
||||
FMT_V2,
|
||||
FMT_V3,
|
||||
FMT_V4,
|
||||
FMT_V8,
|
||||
FMT_V16,
|
||||
|
||||
FMT_REG_BASE=FMT_I32,
|
||||
FMT_VECTOR_BASE=FMT_V2,
|
||||
FMT_REG_BASE = FMT_I32,
|
||||
FMT_VECTOR_BASE = FMT_V4,
|
||||
|
||||
FMT_MASK=0xFFFF,
|
||||
FMT_MASK = 0xFFFF,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -39,56 +35,54 @@ struct shil_param
|
|||
{
|
||||
shil_param()
|
||||
{
|
||||
type=FMT_NULL;
|
||||
_imm=0xFFFFFFFF;
|
||||
type = FMT_NULL;
|
||||
_imm = 0xFFFFFFFF;
|
||||
memset(version, 0, sizeof(version));
|
||||
}
|
||||
shil_param(u32 type,u32 imm)
|
||||
|
||||
shil_param(u32 imm)
|
||||
{
|
||||
this->type=type;
|
||||
if (type >= FMT_REG_BASE)
|
||||
new (this) shil_param((Sh4RegType)imm);
|
||||
_imm=imm;
|
||||
this->type = FMT_IMM;
|
||||
_imm = imm;
|
||||
memset(version, 0, sizeof(version));
|
||||
}
|
||||
|
||||
shil_param(Sh4RegType reg)
|
||||
{
|
||||
type=FMT_NULL;
|
||||
if (reg>=reg_fr_0 && reg<=reg_xf_15)
|
||||
if (reg >= reg_fr_0 && reg <= reg_xf_15)
|
||||
{
|
||||
type=FMT_F32;
|
||||
_imm=reg;
|
||||
type = FMT_F32;
|
||||
_imm = reg;
|
||||
}
|
||||
else if (reg>=regv_dr_0 && reg<=regv_dr_14)
|
||||
else if (reg >= regv_dr_0 && reg <= regv_dr_14)
|
||||
{
|
||||
type=FMT_F64;
|
||||
_imm=(reg-regv_dr_0)*2+reg_fr_0;
|
||||
type = FMT_F64;
|
||||
_imm = (reg - regv_dr_0) * 2 + reg_fr_0;
|
||||
}
|
||||
else if (reg>=regv_xd_0 && reg<=regv_xd_14)
|
||||
else if (reg >= regv_xd_0 && reg <= regv_xd_14)
|
||||
{
|
||||
type=FMT_F64;
|
||||
_imm=(reg-regv_xd_0)*2+reg_xf_0;
|
||||
type = FMT_F64;
|
||||
_imm = (reg - regv_xd_0) * 2 + reg_xf_0;
|
||||
}
|
||||
else if (reg>=regv_fv_0 && reg<=regv_fv_12)
|
||||
else if (reg >= regv_fv_0 && reg <= regv_fv_12)
|
||||
{
|
||||
type=FMT_V4;
|
||||
_imm=(reg-regv_fv_0)*4+reg_fr_0;
|
||||
type = FMT_V4;
|
||||
_imm = (reg - regv_fv_0) * 4 + reg_fr_0;
|
||||
}
|
||||
else if (reg==regv_xmtrx)
|
||||
else if (reg == regv_xmtrx)
|
||||
{
|
||||
type=FMT_V16;
|
||||
_imm=reg_xf_0;
|
||||
type = FMT_V16;
|
||||
_imm = reg_xf_0;
|
||||
}
|
||||
else if (reg==regv_fmtrx)
|
||||
else if (reg == regv_fmtrx)
|
||||
{
|
||||
type=FMT_V16;
|
||||
_imm=reg_fr_0;
|
||||
type = FMT_V16;
|
||||
_imm = reg_fr_0;
|
||||
}
|
||||
else
|
||||
{
|
||||
type=FMT_I32;
|
||||
_reg=reg;
|
||||
type = FMT_I32;
|
||||
_reg = reg;
|
||||
}
|
||||
memset(version, 0, sizeof(version));
|
||||
}
|
||||
|
@ -106,25 +100,22 @@ struct shil_param
|
|||
|
||||
bool is_r32i() const { return type==FMT_I32; }
|
||||
bool is_r32f() const { return type==FMT_F32; }
|
||||
u32 is_r32fv() const { return type>=FMT_VECTOR_BASE?count():0; }
|
||||
u32 is_r32fv() const { return type >= FMT_VECTOR_BASE ? count() : 0; }
|
||||
bool is_r64f() const { return type==FMT_F64; }
|
||||
|
||||
bool is_r32() const { return is_r32i() || is_r32f(); }
|
||||
bool is_r64() const { return is_r64f(); } //just here for symmetry ...
|
||||
|
||||
bool is_imm_s8() const { return is_imm() && (int8_t)_imm == (int32_t)_imm; }
|
||||
|
||||
u32* reg_ptr() const { verify(is_reg()); return GetRegPtr(_reg); }
|
||||
s32 reg_nofs() const { verify(is_reg()); return (s32)((u8*)GetRegPtr(_reg) - (u8*)GetRegPtr(reg_xf_0)-sizeof(Sh4cntx)); }
|
||||
u32 reg_aofs() const { return -reg_nofs(); }
|
||||
u32* reg_ptr() const { verify(is_reg()); return GetRegPtr(_reg); }
|
||||
s32 reg_nofs() const { verify(is_reg()); return (s32)((u8*)GetRegPtr(_reg) - (u8*)GetRegPtr(reg_xf_0)-sizeof(Sh4cntx)); }
|
||||
u32 reg_aofs() const { return -reg_nofs(); }
|
||||
|
||||
u32 imm_value() const { verify(is_imm()); return _imm; }
|
||||
|
||||
bool is_vector() const { return type>=FMT_VECTOR_BASE; }
|
||||
|
||||
u32 count() const { return type==FMT_F64?2:type==FMT_V2?2:
|
||||
type==FMT_V3?3:type==FMT_V4?4:type==FMT_V8?8:
|
||||
type==FMT_V16?16:1; } //count of hardware regs
|
||||
u32 count() const { return type == FMT_F64 ? 2 :
|
||||
type == FMT_V4 ? 4 :
|
||||
type == FMT_V16 ? 16 : 1; } //count of hardware regs
|
||||
|
||||
/*
|
||||
Imms:
|
||||
|
@ -134,20 +125,18 @@ struct shil_param
|
|||
integer regs : is_r32i,is_r32,count=1
|
||||
fpu regs, single view : is_r32f,is_r32,count=1
|
||||
fpu regs, double view : is_r64f,count=2
|
||||
fpu regs, quad view : is_vector,is_r32fv=4, count=4
|
||||
fpu regs, matrix view : is_vector,is_r32fv=16, count=16
|
||||
fpu regs, quad view : is_r32fv=4, count=4
|
||||
fpu regs, matrix view : is_r32fv=16, count=16
|
||||
*/
|
||||
};
|
||||
|
||||
struct shil_opcode
|
||||
{
|
||||
shilop op;
|
||||
u32 Flow;
|
||||
u32 flags;
|
||||
u32 flags2;
|
||||
u32 size; // memory access size
|
||||
|
||||
shil_param rd,rd2;
|
||||
shil_param rs1,rs2,rs3;
|
||||
shil_param rd, rd2;
|
||||
shil_param rs1, rs2, rs3;
|
||||
|
||||
u16 host_offs;
|
||||
u16 guest_offs;
|
||||
|
|
|
@ -86,7 +86,7 @@ bool SSAOptimizer::ExecuteConstOp(shil_opcode* op)
|
|||
|
||||
shil_param op2_rd = shil_param(op->rd2._reg);
|
||||
op2_rd.version[0] = op->rd2.version[0];
|
||||
InsertMov32Op(op2_rd, shil_param(FMT_IMM, rd2));
|
||||
InsertMov32Op(op2_rd, shil_param(rd2));
|
||||
|
||||
// the previous insert might have invalidated our reference
|
||||
op = &block->oplist[opnum - 1];
|
||||
|
@ -151,7 +151,7 @@ bool SSAOptimizer::ExecuteConstOp(shil_opcode* op)
|
|||
|
||||
shil_param op2_rd = shil_param(op->rd2._reg);
|
||||
op2_rd.version[0] = op->rd2.version[0];
|
||||
InsertMov32Op(op2_rd, shil_param(FMT_IMM, rd2));
|
||||
InsertMov32Op(op2_rd, shil_param(rd2));
|
||||
|
||||
// the previous insert might have invalidated our reference
|
||||
op = &block->oplist[opnum - 1];
|
||||
|
@ -201,7 +201,7 @@ bool SSAOptimizer::ExecuteConstOp(shil_opcode* op)
|
|||
|
||||
shil_param op2_rd = shil_param((Sh4RegType)(op->rd._reg + 1));
|
||||
op2_rd.version[0] = op->rd.version[1];
|
||||
InsertMov32Op(op2_rd, shil_param(FMT_IMM, res >> 32));
|
||||
InsertMov32Op(op2_rd, shil_param(res >> 32));
|
||||
|
||||
// the previous insert might have invalidated our reference
|
||||
op = &block->oplist[opnum - 1];
|
||||
|
@ -328,7 +328,7 @@ bool SSAOptimizer::ExecuteConstOp(shil_opcode* op)
|
|||
|
||||
shil_param op2_rd = shil_param((Sh4RegType)(op->rd._reg + 1));
|
||||
op2_rd.version[0] = op->rd.version[1];
|
||||
InsertMov32Op(op2_rd, shil_param(FMT_IMM, rd_1));
|
||||
InsertMov32Op(op2_rd, shil_param(rd_1));
|
||||
|
||||
// the previous insert might have invalidated our reference
|
||||
op = &block->oplist[opnum - 1];
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
#include <cstdio>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <deque>
|
||||
#include <cmath>
|
||||
#include "types.h"
|
||||
#include "decoder.h"
|
||||
|
@ -124,7 +123,7 @@ private:
|
|||
{
|
||||
verify(op.rd2.is_null());
|
||||
op.op = shop_mov32;
|
||||
op.rs1 = shil_param(FMT_IMM, v);
|
||||
op.rs1 = shil_param(v);
|
||||
op.rs2.type = FMT_NULL;
|
||||
op.rs3.type = FMT_NULL;
|
||||
stats.constant_ops_replaced++;
|
||||
|
@ -235,7 +234,7 @@ private:
|
|||
if (op.rs1.is_imm() && op.op == shop_readm && block->read_only
|
||||
&& (op.rs1._imm >> 12) >= (block->vaddr >> 12)
|
||||
&& (op.rs1._imm >> 12) <= ((block->vaddr + block->sh4_code_size - 1) >> 12)
|
||||
&& (op.flags & 0x7f) <= 4)
|
||||
&& op.size <= 4)
|
||||
{
|
||||
bool doit = false;
|
||||
if (mmu_enabled())
|
||||
|
@ -262,7 +261,7 @@ private:
|
|||
if (doit)
|
||||
{
|
||||
u32 v;
|
||||
switch (op.flags & 0x7f)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
v = (s32)(::s8)ReadMem8(op.rs1._imm);
|
||||
|
@ -513,7 +512,7 @@ private:
|
|||
// There's quite a few of these
|
||||
//printf("%08x +t<< %s\n", block->vaddr + op.guest_offs, op.dissasm().c_str());
|
||||
op.op = shop_shl;
|
||||
op.rs2 = shil_param(FMT_IMM, 1);
|
||||
op.rs2 = shil_param(1);
|
||||
}
|
||||
// a ^ a == 0
|
||||
// a - a == 0
|
||||
|
@ -526,8 +525,8 @@ private:
|
|||
else if (op.op == shop_sbc)
|
||||
{
|
||||
//printf("%08x ZERO %s\n", block->vaddr + op.guest_offs, op.dissasm().c_str());
|
||||
op.rs1 = shil_param(FMT_IMM, 0);
|
||||
op.rs2 = shil_param(FMT_IMM, 0);
|
||||
op.rs1 = shil_param(0);
|
||||
op.rs2 = shil_param(0);
|
||||
stats.prop_constants += 2;
|
||||
}
|
||||
// a & a == a
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
|
||||
#define ssa_printf(...) DEBUG_LOG(DYNAREC, __VA_ARGS__)
|
||||
|
||||
template<typename nreg_t, typename nregf_t>
|
||||
template<typename nreg_t, typename nregf_t, bool AllocVec2 = false>
|
||||
class RegAlloc
|
||||
{
|
||||
public:
|
||||
|
@ -78,17 +78,17 @@ public:
|
|||
FlushReg((Sh4RegType)i, true);
|
||||
}
|
||||
// Flush regs used by vector ops
|
||||
if (op->rs1.is_reg() && op->rs1.count() > 1)
|
||||
if (op->rs1.is_reg() && op->rs1.count() > MaxVecSize)
|
||||
{
|
||||
for (u32 i = 0; i < op->rs1.count(); i++)
|
||||
FlushReg((Sh4RegType)(op->rs1._reg + i), false);
|
||||
}
|
||||
if (op->rs2.is_reg() && op->rs2.count() > 1)
|
||||
if (op->rs2.is_reg() && op->rs2.count() > MaxVecSize)
|
||||
{
|
||||
for (u32 i = 0; i < op->rs2.count(); i++)
|
||||
FlushReg((Sh4RegType)(op->rs2._reg + i), false);
|
||||
}
|
||||
if (op->rs3.is_reg() && op->rs3.count() > 1)
|
||||
if (op->rs3.is_reg() && op->rs3.count() > MaxVecSize)
|
||||
{
|
||||
for (u32 i = 0; i < op->rs3.count(); i++)
|
||||
FlushReg((Sh4RegType)(op->rs3._reg + i), false);
|
||||
|
@ -100,7 +100,7 @@ public:
|
|||
AllocSourceReg(op->rs3);
|
||||
// Hard flush vector ops destination regs
|
||||
// Note that this is incorrect if a reg is both src (scalar) and dest (vec). However such an op doesn't exist.
|
||||
if (op->rd.is_reg() && op->rd.count() > 1)
|
||||
if (op->rd.is_reg() && op->rd.count() > MaxVecSize)
|
||||
{
|
||||
for (u32 i = 0; i < op->rd.count(); i++)
|
||||
{
|
||||
|
@ -108,7 +108,7 @@ public:
|
|||
FlushReg((Sh4RegType)(op->rd._reg + i), true);
|
||||
}
|
||||
}
|
||||
if (op->rd2.is_reg() && op->rd2.count() > 1)
|
||||
if (op->rd2.is_reg() && op->rd2.count() > MaxVecSize)
|
||||
{
|
||||
for (u32 i = 0; i < op->rd2.count(); i++)
|
||||
{
|
||||
|
@ -133,9 +133,7 @@ public:
|
|||
|
||||
// Flush normally
|
||||
for (auto const& reg : reg_alloced)
|
||||
{
|
||||
FlushReg(reg.first, false);
|
||||
}
|
||||
|
||||
// Hard flush all dirty regs. Useful for troubleshooting
|
||||
// while (!reg_alloced.empty())
|
||||
|
@ -175,7 +173,7 @@ public:
|
|||
bool rv = IsAllocAny(prm._reg);
|
||||
if (prm.count() != 1)
|
||||
{
|
||||
for (u32 i = 1;i < prm.count(); i++)
|
||||
for (u32 i = 1; i < prm.count(); i++)
|
||||
verify(IsAllocAny((Sh4RegType)(prm._reg + i)) == rv);
|
||||
}
|
||||
return rv;
|
||||
|
@ -190,7 +188,8 @@ public:
|
|||
{
|
||||
if (prm.is_reg())
|
||||
{
|
||||
verify(prm.count() == 1);
|
||||
if (prm.count() > MaxVecSize)
|
||||
return false;
|
||||
return IsAllocg(prm._reg);
|
||||
}
|
||||
else
|
||||
|
@ -203,7 +202,8 @@ public:
|
|||
{
|
||||
if (prm.is_reg())
|
||||
{
|
||||
verify(prm.count() == 1);
|
||||
if (prm.count() > MaxVecSize)
|
||||
return false;
|
||||
return IsAllocf(prm._reg);
|
||||
}
|
||||
else
|
||||
|
@ -219,11 +219,11 @@ public:
|
|||
return mapg(prm._reg);
|
||||
}
|
||||
|
||||
nregf_t mapf(const shil_param& prm)
|
||||
nregf_t mapf(const shil_param& prm, int index = 0)
|
||||
{
|
||||
verify(IsAllocf(prm));
|
||||
verify(prm.count() == 1);
|
||||
return mapf(prm._reg);
|
||||
verify(prm.count() <= MaxVecSize);
|
||||
return mapf((Sh4RegType)(prm._reg + index));
|
||||
}
|
||||
|
||||
bool reg_used(nreg_t host_reg)
|
||||
|
@ -266,6 +266,7 @@ private:
|
|||
bool write_back;
|
||||
bool dirty;
|
||||
};
|
||||
static constexpr u32 MaxVecSize = AllocVec2 ? 2 : 1;
|
||||
|
||||
bool IsFloat(Sh4RegType reg)
|
||||
{
|
||||
|
@ -309,11 +310,16 @@ private:
|
|||
{
|
||||
if (!fast_forwarding)
|
||||
{
|
||||
ssa_printf("WB %s.%d <- %cx", name_reg(reg_num).c_str(), reg_alloc.version, 'a' + reg_alloc.host_reg);
|
||||
if (IsFloat(reg_num))
|
||||
{
|
||||
ssa_printf("WB %s.%d <- xmm%d", name_reg(reg_num).c_str(), reg_alloc.version, reg_alloc.host_reg);
|
||||
Writeback_FPU(reg_num, (nregf_t)reg_alloc.host_reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
ssa_printf("WB %s.%d <- %cx", name_reg(reg_num).c_str(), reg_alloc.version, 'a' + reg_alloc.host_reg);
|
||||
Writeback(reg_num, (nreg_t)reg_alloc.host_reg);
|
||||
}
|
||||
}
|
||||
reg_alloc.write_back = false;
|
||||
reg_alloc.dirty = false;
|
||||
|
@ -354,9 +360,12 @@ private:
|
|||
|
||||
void AllocSourceReg(const shil_param& param)
|
||||
{
|
||||
if (param.is_reg() && param.count() == 1) // TODO EXPLODE_SPANS?
|
||||
if (!param.is_reg() || param.count() > MaxVecSize)
|
||||
return;
|
||||
for (u32 i = 0; i < param.count(); i++)
|
||||
{
|
||||
auto it = reg_alloced.find(param._reg);
|
||||
Sh4RegType sh4reg = (Sh4RegType)(param._reg + i);
|
||||
auto it = reg_alloced.find(sh4reg);
|
||||
if (it == reg_alloced.end())
|
||||
{
|
||||
u32 host_reg;
|
||||
|
@ -380,14 +389,19 @@ private:
|
|||
host_reg = host_fregs.back();
|
||||
host_fregs.pop_back();
|
||||
}
|
||||
reg_alloced[param._reg] = { host_reg, param.version[0], false, false };
|
||||
reg_alloced[sh4reg] = { host_reg, param.version[i], false, false };
|
||||
if (!fast_forwarding)
|
||||
{
|
||||
ssa_printf("PL %s.%d -> %cx", name_reg(param._reg).c_str(), param.version[0], 'a' + host_reg);
|
||||
if (IsFloat(param._reg))
|
||||
Preload_FPU(param._reg, (nregf_t)host_reg);
|
||||
if (IsFloat(sh4reg))
|
||||
{
|
||||
ssa_printf("PL %s.%d -> xmm%d", name_reg(sh4reg).c_str(), param.version[i], host_reg);
|
||||
Preload_FPU(sh4reg, (nregf_t)host_reg);
|
||||
}
|
||||
else
|
||||
Preload(param._reg, (nreg_t)host_reg);
|
||||
{
|
||||
ssa_printf("PL %s.%d -> %cx", name_reg(sh4reg).c_str(), param.version[i], 'a' + host_reg);
|
||||
Preload(sh4reg, (nreg_t)host_reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -424,9 +438,12 @@ private:
|
|||
|
||||
void AllocDestReg(const shil_param& param)
|
||||
{
|
||||
if (param.is_reg() && param.count() == 1) // TODO EXPLODE_SPANS?
|
||||
if (!param.is_reg() || param.count() > MaxVecSize)
|
||||
return;
|
||||
for (u32 i = 0; i < param.count(); i++)
|
||||
{
|
||||
auto it = reg_alloced.find(param._reg);
|
||||
Sh4RegType sh4reg = (Sh4RegType)(param._reg + i);
|
||||
auto it = reg_alloced.find(sh4reg);
|
||||
if (it == reg_alloced.end())
|
||||
{
|
||||
u32 host_reg;
|
||||
|
@ -450,18 +467,21 @@ private:
|
|||
host_reg = host_fregs.back();
|
||||
host_fregs.pop_back();
|
||||
}
|
||||
reg_alloced[param._reg] = { host_reg, param.version[0], NeedsWriteBack(param._reg, param.version[0]), true };
|
||||
ssa_printf(" %s.%d -> %cx %s", name_reg(param._reg).c_str(), param.version[0], 'a' + host_reg, reg_alloced[param._reg].write_back ? "(wb)" : "");
|
||||
reg_alloced[sh4reg] = { host_reg, param.version[i], NeedsWriteBack(sh4reg, param.version[i]), true };
|
||||
if (param.is_r32i())
|
||||
ssa_printf(" %s.%d -> %cx %s", name_reg(sh4reg).c_str(), param.version[i], 'a' + host_reg, reg_alloced[sh4reg].write_back ? "(wb)" : "");
|
||||
else
|
||||
ssa_printf(" %s.%d -> xmm%d %s", name_reg(sh4reg).c_str(), param.version[i], host_reg, reg_alloced[sh4reg].write_back ? "(wb)" : "");
|
||||
}
|
||||
else
|
||||
{
|
||||
reg_alloc& reg = reg_alloced[param._reg];
|
||||
reg_alloc& reg = reg_alloced[sh4reg];
|
||||
verify(!reg.write_back);
|
||||
reg.write_back = NeedsWriteBack(param._reg, param.version[0]);
|
||||
reg.write_back = NeedsWriteBack(sh4reg, param.version[i]);
|
||||
reg.dirty = true;
|
||||
reg.version = param.version[0];
|
||||
reg.version = param.version[i];
|
||||
}
|
||||
verify(reg_alloced[param._reg].dirty);
|
||||
verify(reg_alloced[sh4reg].dirty);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -544,22 +564,26 @@ private:
|
|||
|
||||
bool IsVectorOp(shil_opcode* op)
|
||||
{
|
||||
return op->rs1.count() > 1 || op->rs2.count() > 1 || op->rs3.count() > 1 || op->rd.count() > 1 || op->rd2.count() > 1;
|
||||
return op->rs1.count() > MaxVecSize
|
||||
|| op->rs2.count() > MaxVecSize
|
||||
|| op->rs3.count() > MaxVecSize
|
||||
|| op->rd.count() > MaxVecSize
|
||||
|| op->rd2.count() > MaxVecSize;
|
||||
}
|
||||
|
||||
bool UsesReg(shil_opcode* op, Sh4RegType reg, u32 version, bool vector)
|
||||
{
|
||||
if (op->rs1.is_reg() && reg >= op->rs1._reg && reg < (Sh4RegType)(op->rs1._reg + op->rs1.count())
|
||||
&& version == op->rs1.version[reg - op->rs1._reg]
|
||||
&& vector == (op->rs1.count() > 1))
|
||||
&& vector == (op->rs1.count() > MaxVecSize))
|
||||
return true;
|
||||
if (op->rs2.is_reg() && reg >= op->rs2._reg && reg < (Sh4RegType)(op->rs2._reg + op->rs2.count())
|
||||
&& version == op->rs2.version[reg - op->rs2._reg]
|
||||
&& vector == (op->rs2.count() > 1))
|
||||
&& vector == (op->rs2.count() > MaxVecSize))
|
||||
return true;
|
||||
if (op->rs3.is_reg() && reg >= op->rs3._reg && reg < (Sh4RegType)(op->rs3._reg + op->rs3.count())
|
||||
&& version == op->rs3.version[reg - op->rs3._reg]
|
||||
&& vector == (op->rs3.count() > 1))
|
||||
&& vector == (op->rs3.count() > MaxVecSize))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -568,10 +592,10 @@ private:
|
|||
bool DefsReg(shil_opcode* op, Sh4RegType reg, bool vector)
|
||||
{
|
||||
if (op->rd.is_reg() && reg >= op->rd._reg && reg < (Sh4RegType)(op->rd._reg + op->rd.count())
|
||||
&& vector == (op->rd.count() > 1))
|
||||
&& vector == (op->rd.count() > MaxVecSize))
|
||||
return true;
|
||||
if (op->rd2.is_reg() && reg >= op->rd2._reg && reg < (Sh4RegType)(op->rd2._reg + op->rd2.count())
|
||||
&& vector == (op->rd2.count() > 1))
|
||||
&& vector == (op->rd2.count() > MaxVecSize))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -62,6 +62,7 @@ void sh4_rio_reg(RegisterStruct *arr, u32 addr, RegIO flags, RegReadAddrFP* rf,
|
|||
}
|
||||
else
|
||||
{
|
||||
verify(!(flags & REG_WO)); // not supported here
|
||||
if (flags & REG_RF)
|
||||
arr[idx].readFunctionAddr = rf;
|
||||
else
|
||||
|
|
|
@ -120,7 +120,7 @@ const int alloc_regs[] = { 5, 6, 7, 10, 11, -1 };
|
|||
const int alloc_fpu[] = { 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31, -1 };
|
||||
|
||||
struct arm_reg_alloc: RegAlloc<int, int>
|
||||
struct arm_reg_alloc: RegAlloc<int, int, true>
|
||||
{
|
||||
void Preload(u32 reg, int nreg) override
|
||||
{
|
||||
|
@ -149,9 +149,9 @@ struct arm_reg_alloc: RegAlloc<int, int>
|
|||
ass.Vstr(SRegister(nreg), MemOperand(r8, shRegOffs));
|
||||
}
|
||||
|
||||
SRegister mapFReg(const shil_param& prm)
|
||||
SRegister mapFReg(const shil_param& prm, int index = 0)
|
||||
{
|
||||
return SRegister(mapf(prm));
|
||||
return SRegister(mapf(prm, index));
|
||||
}
|
||||
Register mapReg(const shil_param& prm)
|
||||
{
|
||||
|
@ -561,16 +561,15 @@ enum mem_op_type
|
|||
|
||||
static mem_op_type memop_type(shil_opcode* op)
|
||||
{
|
||||
int sz = op->flags & 0x7f;
|
||||
bool fp32 = op->rs2.is_r32f() || op->rd.is_r32f();
|
||||
|
||||
if (sz == 1)
|
||||
if (op->size == 1)
|
||||
return SZ_8;
|
||||
else if (sz == 2)
|
||||
else if (op->size == 2)
|
||||
return SZ_16;
|
||||
else if (sz == 4)
|
||||
else if (op->size == 4)
|
||||
return fp32 ? SZ_32F : SZ_32I;
|
||||
else if (sz == 8)
|
||||
else if (op->size == 8)
|
||||
return SZ_64F;
|
||||
|
||||
die("Unknown op");
|
||||
|
@ -855,16 +854,15 @@ static bool ngen_readm_immediate(RuntimeBlockInfo* block, shil_opcode* op, bool
|
|||
if (!op->rs1.is_imm())
|
||||
return false;
|
||||
|
||||
u32 size = op->flags & 0x7f;
|
||||
u32 addr = op->rs1._imm;
|
||||
if (mmu_enabled() && mmu_is_translated(addr, size))
|
||||
if (mmu_enabled() && mmu_is_translated(addr, op->size))
|
||||
{
|
||||
if ((addr >> 12) != (block->vaddr >> 12) && ((addr >> 12) != ((block->vaddr + block->guest_opcodes * 2 - 1) >> 12)))
|
||||
// When full mmu is on, only consider addresses in the same 4k page
|
||||
return false;
|
||||
u32 paddr;
|
||||
u32 rv;
|
||||
switch (size)
|
||||
switch (op->size)
|
||||
{
|
||||
case 1:
|
||||
rv = mmu_data_translation<MMU_TT_DREAD, u8>(addr, paddr);
|
||||
|
@ -914,8 +912,16 @@ static bool ngen_readm_immediate(RuntimeBlockInfo* block, shil_opcode* op, bool
|
|||
break;
|
||||
|
||||
case SZ_64F:
|
||||
ass.Vldr(d0, MemOperand(r0));
|
||||
ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs()));
|
||||
if (reg.IsAllocf(op->rd))
|
||||
{
|
||||
ass.Vldr(reg.mapFReg(op->rd, 0), MemOperand(r0));
|
||||
ass.Vldr(reg.mapFReg(op->rd, 1), MemOperand(r0, 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
ass.Vldr(d0, MemOperand(r0));
|
||||
ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -928,11 +934,17 @@ static bool ngen_readm_immediate(RuntimeBlockInfo* block, shil_opcode* op, bool
|
|||
// Need to call the handler twice
|
||||
ass.Mov(r0, op->rs1._imm);
|
||||
call(ptr);
|
||||
ass.Str(r0, MemOperand(r8, op->rd.reg_nofs()));
|
||||
if (reg.IsAllocf(op->rd))
|
||||
ass.Vmov(reg.mapFReg(op->rd, 0), r0);
|
||||
else
|
||||
ass.Str(r0, MemOperand(r8, op->rd.reg_nofs()));
|
||||
|
||||
ass.Mov(r0, op->rs1._imm + 4);
|
||||
call(ptr);
|
||||
ass.Str(r0, MemOperand(r8, op->rd.reg_nofs() + 4));
|
||||
if (reg.IsAllocf(op->rd))
|
||||
ass.Vmov(reg.mapFReg(op->rd, 1), r0);
|
||||
else
|
||||
ass.Str(r0, MemOperand(r8, op->rd.reg_nofs() + 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -975,16 +987,15 @@ static bool ngen_writemem_immediate(RuntimeBlockInfo* block, shil_opcode* op, bo
|
|||
if (!op->rs1.is_imm())
|
||||
return false;
|
||||
|
||||
u32 size = op->flags & 0x7f;
|
||||
u32 addr = op->rs1._imm;
|
||||
if (mmu_enabled() && mmu_is_translated(addr, size))
|
||||
if (mmu_enabled() && mmu_is_translated(addr, op->size))
|
||||
{
|
||||
if ((addr >> 12) != (block->vaddr >> 12) && ((addr >> 12) != ((block->vaddr + block->guest_opcodes * 2 - 1) >> 12)))
|
||||
// When full mmu is on, only consider addresses in the same 4k page
|
||||
return false;
|
||||
u32 paddr;
|
||||
u32 rv;
|
||||
switch (size)
|
||||
switch (op->size)
|
||||
{
|
||||
case 1:
|
||||
rv = mmu_data_translation<MMU_TT_DWRITE, u8>(addr, paddr);
|
||||
|
@ -1041,8 +1052,16 @@ static bool ngen_writemem_immediate(RuntimeBlockInfo* block, shil_opcode* op, bo
|
|||
break;
|
||||
|
||||
case SZ_64F:
|
||||
ass.Vldr(d0, MemOperand(r8, op->rs2.reg_nofs()));
|
||||
ass.Vstr(d0, MemOperand(r0));
|
||||
if (reg.IsAllocf(op->rs2))
|
||||
{
|
||||
ass.Vstr(reg.mapFReg(op->rs2, 0), MemOperand(r0));
|
||||
ass.Vstr(reg.mapFReg(op->rs2, 1), MemOperand(r0, 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
ass.Vldr(d0, MemOperand(r8, op->rs2.reg_nofs()));
|
||||
ass.Vstr(d0, MemOperand(r0));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -1157,9 +1176,20 @@ static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool o
|
|||
|
||||
case SZ_64F:
|
||||
ass.Add(r1, r1, r8); //3 opcodes, there's no [REG+REG] VLDR
|
||||
ass.Vldr(d0, MemOperand(r1)); //TODO: use reg alloc
|
||||
|
||||
ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs()));
|
||||
ass.Vldr(d0, MemOperand(r1));
|
||||
if (reg.IsAllocf(op->rd))
|
||||
{
|
||||
ass.Vmov(r0, r1, d0);
|
||||
ass.Vmov(reg.mapFReg(op->rd, 0), r0);
|
||||
ass.Vmov(reg.mapFReg(op->rd, 1), r1);
|
||||
// easier to do just this but we need to use a different op than 32f to distinguish during rewrite
|
||||
//ass.Vldr(reg.mapFReg(op->rd, 0), MemOperand(r1));
|
||||
//ass.Vldr(reg.mapFReg(op->rd, 1), MemOperand(r1, 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
|
@ -1183,7 +1213,16 @@ static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool o
|
|||
|
||||
case SZ_64F:
|
||||
vmem_slowpath(raddr, r0, s0, d0, optp, true);
|
||||
ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs()));
|
||||
if (reg.IsAllocf(op->rd))
|
||||
{
|
||||
ass.Vmov(r0, r1, d0);
|
||||
ass.Vmov(reg.mapFReg(op->rd, 0), r0);
|
||||
ass.Vmov(reg.mapFReg(op->rd, 1), r1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1201,9 +1240,19 @@ static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool o
|
|||
Register rs2 = r2;
|
||||
SRegister rs2f = s2;
|
||||
|
||||
//TODO: use reg alloc
|
||||
if (optp == SZ_64F)
|
||||
ass.Vldr(d0, MemOperand(r8, op->rs2.reg_nofs()));
|
||||
{
|
||||
if (reg.IsAllocf(op->rs2))
|
||||
{
|
||||
ass.Vmov(r2, reg.mapFReg(op->rs2, 0));
|
||||
ass.Vmov(r3, reg.mapFReg(op->rs2, 1));
|
||||
ass.Vmov(d0, r2, r3);
|
||||
}
|
||||
else
|
||||
{
|
||||
ass.Vldr(d0, MemOperand(r8, op->rs2.reg_nofs()));
|
||||
}
|
||||
}
|
||||
else if (op->rs2.is_imm())
|
||||
{
|
||||
ass.Mov(rs2, op->rs2._imm);
|
||||
|
@ -1242,7 +1291,7 @@ static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool o
|
|||
|
||||
case SZ_64F:
|
||||
ass.Add(r1, r1, r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ
|
||||
ass.Vstr(d0, MemOperand(r1)); //TODO: use reg alloc
|
||||
ass.Vstr(d0, MemOperand(r1));
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
|
@ -1358,9 +1407,18 @@ static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool o
|
|||
break;
|
||||
|
||||
case shop_mov64:
|
||||
verify(op->rs1.is_r64() && op->rd.is_r64());
|
||||
ass.Vldr(d0, MemOperand(r8, op->rs1.reg_nofs()));
|
||||
ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs()));
|
||||
verify(op->rs1.is_r64f() && op->rd.is_r64f());
|
||||
if (reg.IsAllocf(op->rd))
|
||||
{
|
||||
verify(reg.IsAllocf(op->rs1));
|
||||
ass.Vmov(reg.mapFReg(op->rd, 0), reg.mapFReg(op->rs1, 0));
|
||||
ass.Vmov(reg.mapFReg(op->rd, 1), reg.mapFReg(op->rs1, 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
ass.Vldr(d0, MemOperand(r8, op->rs1.reg_nofs()));
|
||||
ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs()));
|
||||
}
|
||||
break;
|
||||
|
||||
case shop_jcond:
|
||||
|
@ -1821,8 +1879,16 @@ static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool o
|
|||
|
||||
ass.Add(r0, r1, Operand(r0, LSL, 3));
|
||||
|
||||
ass.Vldr(d0, MemOperand(r0));
|
||||
ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs()));
|
||||
if (reg.IsAllocf(op->rd))
|
||||
{
|
||||
ass.Vldr(reg.mapFReg(op->rd, 0), MemOperand(r0));
|
||||
ass.Vldr(reg.mapFReg(op->rd, 1), MemOperand(r0, 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
ass.Vldr(d0, MemOperand(r0));
|
||||
ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs()));
|
||||
}
|
||||
break;
|
||||
|
||||
case shop_fipr:
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include <aarch64/macro-assembler-aarch64.h>
|
||||
using namespace vixl::aarch64;
|
||||
|
||||
#define ALLOC_F64 true
|
||||
|
||||
enum eReg {
|
||||
W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, W16,
|
||||
W17, W18, W19, W20, W21, W22, W23, W24, W25, W26, W27, W28, W29, W30
|
||||
|
@ -35,7 +37,7 @@ static eFReg alloc_fregs[] = { S8, S9, S10, S11, S12, S13, S14, S15, (eFReg)-1 }
|
|||
|
||||
class Arm64Assembler;
|
||||
|
||||
struct Arm64RegAlloc : RegAlloc<eReg, eFReg>
|
||||
struct Arm64RegAlloc : RegAlloc<eReg, eFReg, ALLOC_F64>
|
||||
{
|
||||
Arm64RegAlloc(Arm64Assembler *assembler) : assembler(assembler) {}
|
||||
|
||||
|
@ -57,9 +59,9 @@ struct Arm64RegAlloc : RegAlloc<eReg, eFReg>
|
|||
return Register::GetWRegFromCode(ereg);
|
||||
}
|
||||
|
||||
const VRegister& MapVRegister(const shil_param& param)
|
||||
const VRegister& MapVRegister(const shil_param& param, int index = 0)
|
||||
{
|
||||
eFReg ereg = mapf(param);
|
||||
eFReg ereg = mapf(param, index);
|
||||
if (ereg == (eFReg)-1)
|
||||
die("VRegister not allocated");
|
||||
return VRegister::GetSRegFromCode(ereg);
|
||||
|
|
|
@ -387,11 +387,28 @@ public:
|
|||
break;
|
||||
|
||||
case shop_mov64:
|
||||
verify(op.rd.is_reg());
|
||||
verify(op.rs1.is_reg() || op.rs1.is_imm());
|
||||
{
|
||||
verify(op.rd.is_reg());
|
||||
verify(op.rs1.is_reg() || op.rs1.is_imm());
|
||||
|
||||
shil_param_to_host_reg(op.rs1, x15);
|
||||
host_reg_to_shil_param(op.rd, x15);
|
||||
if (!regalloc.IsAllocf(op.rd))
|
||||
{
|
||||
verify(!regalloc.IsAllocf(op.rs1));
|
||||
shil_param_to_host_reg(op.rs1, x15);
|
||||
host_reg_to_shil_param(op.rd, x15);
|
||||
}
|
||||
else
|
||||
{
|
||||
const VRegister& rd0 = regalloc.MapVRegister(op.rd, 0);
|
||||
const VRegister& rs0 = regalloc.MapVRegister(op.rs1, 0);
|
||||
if (!rd0.Is(rs0))
|
||||
Fmov(rd0, rs0);
|
||||
const VRegister& rd1 = regalloc.MapVRegister(op.rd, 1);
|
||||
const VRegister& rs1 = regalloc.MapVRegister(op.rs1, 1);
|
||||
if (!rd1.Is(rs1))
|
||||
Fmov(rd1, rs1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case shop_readm:
|
||||
|
@ -904,8 +921,15 @@ public:
|
|||
Add(x1, x1, Operand(regalloc.MapRegister(op.rs1), UXTH, 3));
|
||||
else
|
||||
Add(x1, x1, Operand(op.rs1.imm_value() << 3));
|
||||
Ldr(x2, MemOperand(x1));
|
||||
Str(x2, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
if (regalloc.IsAllocf(op.rd))
|
||||
{
|
||||
Ldp(regalloc.MapVRegister(op.rd, 0), regalloc.MapVRegister(op.rd, 1), MemOperand(x1));
|
||||
}
|
||||
else
|
||||
{
|
||||
Ldr(x2, MemOperand(x1));
|
||||
Str(x2, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
}
|
||||
break;
|
||||
|
||||
case shop_fipr:
|
||||
|
@ -1659,14 +1683,13 @@ private:
|
|||
GenMemAddr(op, &w0);
|
||||
genMmuLookup(op, 0);
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
if (!optimise || !GenReadMemoryFast(op, opid))
|
||||
GenReadMemorySlow(size);
|
||||
GenReadMemorySlow(op.size);
|
||||
|
||||
if (size < 8)
|
||||
if (op.size < 8)
|
||||
host_reg_to_shil_param(op.rd, w0);
|
||||
else
|
||||
Str(x0, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
host_reg_to_shil_param(op.rd, x0);
|
||||
}
|
||||
|
||||
bool GenReadMemoryImmediate(const shil_opcode& op)
|
||||
|
@ -1674,16 +1697,15 @@ private:
|
|||
if (!op.rs1.is_imm())
|
||||
return false;
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
u32 addr = op.rs1._imm;
|
||||
if (mmu_enabled() && mmu_is_translated(addr, size))
|
||||
if (mmu_enabled() && mmu_is_translated(addr, op.size))
|
||||
{
|
||||
if ((addr >> 12) != (block->vaddr >> 12) && ((addr >> 12) != ((block->vaddr + block->guest_opcodes * 2 - 1) >> 12)))
|
||||
// When full mmu is on, only consider addresses in the same 4k page
|
||||
return false;
|
||||
u32 paddr;
|
||||
u32 rv;
|
||||
switch (size)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
rv = mmu_data_translation<MMU_TT_DREAD, u8>(addr, paddr);
|
||||
|
@ -1705,14 +1727,14 @@ private:
|
|||
addr = paddr;
|
||||
}
|
||||
bool isram = false;
|
||||
void* ptr = _vmem_read_const(addr, isram, size > 4 ? 4 : size);
|
||||
void* ptr = _vmem_read_const(addr, isram, op.size > 4 ? 4 : op.size);
|
||||
|
||||
if (isram)
|
||||
{
|
||||
Ldr(x1, reinterpret_cast<uintptr_t>(ptr)); // faster than Mov
|
||||
if (regalloc.IsAllocAny(op.rd))
|
||||
{
|
||||
switch (size)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
Ldrsb(regalloc.MapRegister(op.rd), MemOperand(x1));
|
||||
|
@ -1729,6 +1751,10 @@ private:
|
|||
Ldr(regalloc.MapRegister(op.rd), MemOperand(x1));
|
||||
break;
|
||||
|
||||
case 8:
|
||||
Ldp(regalloc.MapVRegister(op.rd, 0), regalloc.MapVRegister(op.rd, 1), MemOperand(x1));
|
||||
break;
|
||||
|
||||
default:
|
||||
die("Invalid size");
|
||||
break;
|
||||
|
@ -1736,7 +1762,7 @@ private:
|
|||
}
|
||||
else
|
||||
{
|
||||
switch (size)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
Ldrsb(w1, MemOperand(x1));
|
||||
|
@ -1758,7 +1784,7 @@ private:
|
|||
die("Invalid size");
|
||||
break;
|
||||
}
|
||||
if (size == 8)
|
||||
if (op.size == 8)
|
||||
Str(x1, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
else
|
||||
Str(w1, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
|
@ -1767,23 +1793,28 @@ private:
|
|||
else
|
||||
{
|
||||
// Not RAM
|
||||
if (size == 8)
|
||||
if (op.size == 8)
|
||||
{
|
||||
verify(!regalloc.IsAllocAny(op.rd));
|
||||
// Need to call the handler twice
|
||||
Mov(w0, addr);
|
||||
GenCallRuntime((void (*)())ptr);
|
||||
Str(w0, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
if (regalloc.IsAllocf(op.rd))
|
||||
Fmov(regalloc.MapVRegister(op.rd, 0), w0);
|
||||
else
|
||||
Str(w0, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
|
||||
Mov(w0, addr + 4);
|
||||
GenCallRuntime((void (*)())ptr);
|
||||
Str(w0, sh4_context_mem_operand((u8*)op.rd.reg_ptr() + 4));
|
||||
if (regalloc.IsAllocf(op.rd))
|
||||
Fmov(regalloc.MapVRegister(op.rd, 1), w0);
|
||||
else
|
||||
Str(w0, sh4_context_mem_operand((u8*)op.rd.reg_ptr() + 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
Mov(w0, addr);
|
||||
|
||||
switch(size)
|
||||
switch(op.size)
|
||||
{
|
||||
case 1:
|
||||
GenCallRuntime((void (*)())ptr);
|
||||
|
@ -1830,8 +1861,7 @@ private:
|
|||
Ubfx(x1, x0, 0, 29);
|
||||
Add(x1, x1, sizeof(Sh4Context), LeaveFlags);
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
switch(size)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
Ldrsb(w0, MemOperand(x28, x1));
|
||||
|
@ -1862,15 +1892,14 @@ private:
|
|||
GenMemAddr(op, &w0);
|
||||
genMmuLookup(op, 1);
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
if (size != 8)
|
||||
if (op.size != 8)
|
||||
shil_param_to_host_reg(op.rs2, w1);
|
||||
else
|
||||
shil_param_to_host_reg(op.rs2, x1);
|
||||
if (optimise && GenWriteMemoryFast(op, opid))
|
||||
return;
|
||||
|
||||
GenWriteMemorySlow(size);
|
||||
GenWriteMemorySlow(op.size);
|
||||
}
|
||||
|
||||
bool GenWriteMemoryImmediate(const shil_opcode& op)
|
||||
|
@ -1878,16 +1907,15 @@ private:
|
|||
if (!op.rs1.is_imm())
|
||||
return false;
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
u32 addr = op.rs1._imm;
|
||||
if (mmu_enabled() && mmu_is_translated(addr, size))
|
||||
if (mmu_enabled() && mmu_is_translated(addr, op.size))
|
||||
{
|
||||
if ((addr >> 12) != (block->vaddr >> 12) && ((addr >> 12) != ((block->vaddr + block->guest_opcodes * 2 - 1) >> 12)))
|
||||
// When full mmu is on, only consider addresses in the same 4k page
|
||||
return false;
|
||||
u32 paddr;
|
||||
u32 rv;
|
||||
switch (size)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
rv = mmu_data_translation<MMU_TT_DWRITE, u8>(addr, paddr);
|
||||
|
@ -1909,11 +1937,11 @@ private:
|
|||
addr = paddr;
|
||||
}
|
||||
bool isram = false;
|
||||
void* ptr = _vmem_write_const(addr, isram, size > 4 ? 4 : size);
|
||||
void* ptr = _vmem_write_const(addr, isram, op.size > 4 ? 4 : op.size);
|
||||
|
||||
Register reg2;
|
||||
if (size != 8)
|
||||
if (isram)
|
||||
{
|
||||
Register reg2;
|
||||
if (op.rs2.is_imm())
|
||||
{
|
||||
Mov(w1, op.rs2._imm);
|
||||
|
@ -1923,6 +1951,11 @@ private:
|
|||
{
|
||||
reg2 = regalloc.MapRegister(op.rs2);
|
||||
}
|
||||
else if (op.size == 8)
|
||||
{
|
||||
shil_param_to_host_reg(op.rs2, x1);
|
||||
reg2 = x1;
|
||||
}
|
||||
else if (regalloc.IsAllocf(op.rs2))
|
||||
{
|
||||
Fmov(w1, regalloc.MapVRegister(op.rs2));
|
||||
|
@ -1930,11 +1963,9 @@ private:
|
|||
}
|
||||
else
|
||||
die("Invalid rs2 param");
|
||||
}
|
||||
if (isram)
|
||||
{
|
||||
|
||||
Ldr(x0, reinterpret_cast<uintptr_t>(ptr));
|
||||
switch (size)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
Strb(reg2, MemOperand(x0));
|
||||
|
@ -1949,8 +1980,7 @@ private:
|
|||
break;
|
||||
|
||||
case 8:
|
||||
shil_param_to_host_reg(op.rs2, x1);
|
||||
Str(x1, MemOperand(x0));
|
||||
Str(reg2, MemOperand(x0));
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -1962,10 +1992,10 @@ private:
|
|||
{
|
||||
// Not RAM
|
||||
Mov(w0, addr);
|
||||
if (size == 8)
|
||||
shil_param_to_host_reg(op.rs2, x1);
|
||||
if (op.size == 8)
|
||||
{
|
||||
// Need to call the handler twice
|
||||
shil_param_to_host_reg(op.rs2, x1);
|
||||
GenCallRuntime((void (*)())ptr);
|
||||
|
||||
Mov(w0, addr + 4);
|
||||
|
@ -1975,7 +2005,6 @@ private:
|
|||
}
|
||||
else
|
||||
{
|
||||
Mov(w1, reg2);
|
||||
GenCallRuntime((void (*)())ptr);
|
||||
}
|
||||
}
|
||||
|
@ -1996,8 +2025,7 @@ private:
|
|||
Ubfx(x7, x0, 0, 29);
|
||||
Add(x7, x7, sizeof(Sh4Context), LeaveFlags);
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
switch(size)
|
||||
switch(op.size)
|
||||
{
|
||||
case 1:
|
||||
Strb(w1, MemOperand(x28, x7));
|
||||
|
@ -2112,21 +2140,28 @@ private:
|
|||
}
|
||||
else if (param.is_reg())
|
||||
{
|
||||
if (param.is_r64f())
|
||||
if (param.is_r64f() && !regalloc.IsAllocf(param))
|
||||
{
|
||||
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
|
||||
else if (param.is_r32f())
|
||||
}
|
||||
else if (param.is_r32f() || param.is_r64f())
|
||||
{
|
||||
if (regalloc.IsAllocf(param))
|
||||
Fmov(reg, regalloc.MapVRegister(param));
|
||||
Fmov(reg.W(), regalloc.MapVRegister(param, 0));
|
||||
else
|
||||
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
|
||||
Ldr(reg.W(), sh4_context_mem_operand(param.reg_ptr()));
|
||||
if (param.is_r64f())
|
||||
{
|
||||
Fmov(w15, regalloc.MapVRegister(param, 1));
|
||||
Bfm(reg, x15, 32, 31);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (regalloc.IsAllocg(param))
|
||||
Mov(reg, regalloc.MapRegister(param));
|
||||
Mov(reg.W(), regalloc.MapRegister(param));
|
||||
else
|
||||
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
|
||||
Ldr(reg.W(), sh4_context_mem_operand(param.reg_ptr()));
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -2139,7 +2174,17 @@ private:
|
|||
{
|
||||
if (reg.Is64Bits())
|
||||
{
|
||||
Str((const Register&)reg, sh4_context_mem_operand(param.reg_ptr()));
|
||||
if (regalloc.IsAllocf(param))
|
||||
{
|
||||
verify(param.count() == 2);
|
||||
Fmov(regalloc.MapVRegister(param, 0), reg.W());
|
||||
Lsr(reg.X(), reg.X(), 32);
|
||||
Fmov(regalloc.MapVRegister(param, 1), reg.W());
|
||||
}
|
||||
else
|
||||
{
|
||||
Str((const Register&)reg, sh4_context_mem_operand(param.reg_ptr()));
|
||||
}
|
||||
}
|
||||
else if (regalloc.IsAllocg(param))
|
||||
{
|
||||
|
|
|
@ -1665,7 +1665,7 @@ public:
|
|||
|
||||
case shop_readm:
|
||||
{
|
||||
u32 size = op.flags & 0x7f;
|
||||
u32 size = op.size;
|
||||
if (op.rs1.is_imm()) {
|
||||
verify(op.rs2.is_null() && op.rs3.is_null());
|
||||
|
||||
|
@ -1748,7 +1748,7 @@ public:
|
|||
|
||||
case shop_writem:
|
||||
{
|
||||
u32 size = op.flags & 0x7f;
|
||||
u32 size = op.size;
|
||||
|
||||
if (op.rs1.is_imm()) {
|
||||
verify(op.rs3.is_null());
|
||||
|
|
|
@ -215,13 +215,24 @@ public:
|
|||
|
||||
case shop_mov64:
|
||||
{
|
||||
verify(op.rd.is_r64());
|
||||
verify(op.rs1.is_r64());
|
||||
verify(op.rd.is_r64f());
|
||||
verify(op.rs1.is_r64f());
|
||||
|
||||
#if ALLOC_F64 == false
|
||||
mov(rax, (uintptr_t)op.rs1.reg_ptr());
|
||||
mov(rax, qword[rax]);
|
||||
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
||||
mov(qword[rcx], rax);
|
||||
#else
|
||||
Xbyak::Xmm rd = regalloc.MapXRegister(op.rd, 0);
|
||||
Xbyak::Xmm rs = regalloc.MapXRegister(op.rs1, 0);
|
||||
if (rd != rs)
|
||||
movss(rd, rs);
|
||||
rd = regalloc.MapXRegister(op.rd, 1);
|
||||
rs = regalloc.MapXRegister(op.rs1, 1);
|
||||
if (rd != rs)
|
||||
movss(rd, rs);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -244,16 +255,18 @@ public:
|
|||
}
|
||||
genMmuLookup(block, op, 0);
|
||||
|
||||
int size = op.flags & 0x7f;
|
||||
size = size == 1 ? MemSize::S8 : size == 2 ? MemSize::S16 : size == 4 ? MemSize::S32 : MemSize::S64;
|
||||
int size = op.size == 1 ? MemSize::S8 : op.size == 2 ? MemSize::S16 : op.size == 4 ? MemSize::S32 : MemSize::S64;
|
||||
GenCall((void (*)())MemHandlers[optimise ? MemType::Fast : MemType::Slow][size][MemOp::R], mmu_enabled());
|
||||
|
||||
if (size != MemSize::S64)
|
||||
host_reg_to_shil_param(op.rd, eax);
|
||||
else {
|
||||
#if ALLOC_F64 == false
|
||||
if (size == MemSize::S64)
|
||||
{
|
||||
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
||||
mov(qword[rcx], rax);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
host_reg_to_shil_param(op.rd, rax);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -276,15 +289,17 @@ public:
|
|||
}
|
||||
genMmuLookup(block, op, 1);
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
if (size != 8)
|
||||
shil_param_to_host_reg(op.rs2, call_regs[1]);
|
||||
else {
|
||||
#if ALLOC_F64 == false
|
||||
if (op.size == 8)
|
||||
{
|
||||
mov(rax, (uintptr_t)op.rs2.reg_ptr());
|
||||
mov(call_regs64[1], qword[rax]);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
shil_param_to_host_reg(op.rs2, call_regs64[1]);
|
||||
|
||||
size = size == 1 ? MemSize::S8 : size == 2 ? MemSize::S16 : size == 4 ? MemSize::S32 : MemSize::S64;
|
||||
int size = op.size == 1 ? MemSize::S8 : op.size == 2 ? MemSize::S16 : op.size == 4 ? MemSize::S32 : MemSize::S64;
|
||||
GenCall((void (*)())MemHandlers[optimise ? MemType::Fast : MemType::Slow][size][MemOp::W], mmu_enabled());
|
||||
}
|
||||
}
|
||||
|
@ -809,9 +824,8 @@ private:
|
|||
{
|
||||
if (!op.rs1.is_imm())
|
||||
return false;
|
||||
u32 size = op.flags & 0x7f;
|
||||
u32 addr = op.rs1._imm;
|
||||
if (mmu_enabled() && mmu_is_translated(addr, size))
|
||||
if (mmu_enabled() && mmu_is_translated(addr, op.size))
|
||||
{
|
||||
if ((addr >> 12) != (block->vaddr >> 12) && ((addr >> 12) != ((block->vaddr + block->guest_opcodes * 2 - 1) >> 12)))
|
||||
// When full mmu is on, only consider addresses in the same 4k page
|
||||
|
@ -819,7 +833,7 @@ private:
|
|||
|
||||
u32 paddr;
|
||||
u32 rv;
|
||||
switch (size)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
rv = mmu_data_translation<MMU_TT_DREAD, u8>(addr, paddr);
|
||||
|
@ -841,13 +855,13 @@ private:
|
|||
addr = paddr;
|
||||
}
|
||||
bool isram = false;
|
||||
void* ptr = _vmem_read_const(addr, isram, size > 4 ? 4 : size);
|
||||
void* ptr = _vmem_read_const(addr, isram, op.size > 4 ? 4 : op.size);
|
||||
|
||||
if (isram)
|
||||
{
|
||||
// Immediate pointer to RAM: super-duper fast access
|
||||
mov(rax, reinterpret_cast<uintptr_t>(ptr));
|
||||
switch (size)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
if (regalloc.IsAllocg(op.rd))
|
||||
|
@ -885,9 +899,14 @@ private:
|
|||
break;
|
||||
|
||||
case 8:
|
||||
#if ALLOC_F64 == false
|
||||
mov(rcx, qword[rax]);
|
||||
mov(rax, (uintptr_t)op.rd.reg_ptr());
|
||||
mov(qword[rax], rcx);
|
||||
#else
|
||||
movd(regalloc.MapXRegister(op.rd, 0), dword[rax]);
|
||||
movd(regalloc.MapXRegister(op.rd, 1), dword[rax + 4]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -898,26 +917,32 @@ private:
|
|||
else
|
||||
{
|
||||
// Not RAM: the returned pointer is a memory handler
|
||||
if (size == 8)
|
||||
if (op.size == 8)
|
||||
{
|
||||
verify(!regalloc.IsAllocAny(op.rd));
|
||||
|
||||
// Need to call the handler twice
|
||||
mov(call_regs[0], addr);
|
||||
GenCall((void (*)())ptr);
|
||||
#if ALLOC_F64 == false
|
||||
mov(rcx, (size_t)op.rd.reg_ptr());
|
||||
mov(dword[rcx], eax);
|
||||
#else
|
||||
mov(regalloc.MapXRegister(op.rd, 0), eax);
|
||||
#endif
|
||||
|
||||
mov(call_regs[0], addr + 4);
|
||||
GenCall((void (*)())ptr);
|
||||
#if ALLOC_F64 == false
|
||||
mov(rcx, (size_t)op.rd.reg_ptr() + 4);
|
||||
mov(dword[rcx], eax);
|
||||
#else
|
||||
mov(regalloc.MapXRegister(op.rd, 1), eax);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
mov(call_regs[0], addr);
|
||||
|
||||
switch(size)
|
||||
switch(op.size)
|
||||
{
|
||||
case 1:
|
||||
GenCall((void (*)())ptr);
|
||||
|
@ -948,9 +973,8 @@ private:
|
|||
{
|
||||
if (!op.rs1.is_imm())
|
||||
return false;
|
||||
u32 size = op.flags & 0x7f;
|
||||
u32 addr = op.rs1._imm;
|
||||
if (mmu_enabled() && mmu_is_translated(addr, size))
|
||||
if (mmu_enabled() && mmu_is_translated(addr, op.size))
|
||||
{
|
||||
if ((addr >> 12) != (block->vaddr >> 12) && ((addr >> 12) != ((block->vaddr + block->guest_opcodes * 2 - 1) >> 12)))
|
||||
// When full mmu is on, only consider addresses in the same 4k page
|
||||
|
@ -958,7 +982,7 @@ private:
|
|||
|
||||
u32 paddr;
|
||||
u32 rv;
|
||||
switch (size)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
rv = mmu_data_translation<MMU_TT_DWRITE, u8>(addr, paddr);
|
||||
|
@ -980,13 +1004,13 @@ private:
|
|||
addr = paddr;
|
||||
}
|
||||
bool isram = false;
|
||||
void* ptr = _vmem_write_const(addr, isram, size > 4 ? 4 : size);
|
||||
void* ptr = _vmem_write_const(addr, isram, op.size > 4 ? 4 : op.size);
|
||||
|
||||
if (isram)
|
||||
{
|
||||
// Immediate pointer to RAM: super-duper fast access
|
||||
mov(rax, reinterpret_cast<uintptr_t>(ptr));
|
||||
switch (size)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
if (regalloc.IsAllocg(op.rs2))
|
||||
|
@ -1030,9 +1054,14 @@ private:
|
|||
break;
|
||||
|
||||
case 8:
|
||||
#if ALLOC_F64 == false
|
||||
mov(rcx, (uintptr_t)op.rs2.reg_ptr());
|
||||
mov(rcx, qword[rcx]);
|
||||
mov(qword[rax], rcx);
|
||||
#else
|
||||
movd(dword[rax], regalloc.MapXRegister(op.rs2, 0));
|
||||
movd(dword[rax + 4], regalloc.MapXRegister(op.rs2, 1));
|
||||
#endif
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -25,15 +25,18 @@
|
|||
static Xbyak::Operand::Code alloc_regs[] = { Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI, Xbyak::Operand::RSI,
|
||||
Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15, (Xbyak::Operand::Code)-1 };
|
||||
static s8 alloc_fregs[] = { 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1 }; // XMM6 to XMM15 are callee-saved in Windows
|
||||
#define ALLOC_F64 true
|
||||
#else
|
||||
static Xbyak::Operand::Code alloc_regs[] = { Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::R12, Xbyak::Operand::R13,
|
||||
Xbyak::Operand::R14, Xbyak::Operand::R15, (Xbyak::Operand::Code)-1 };
|
||||
static s8 alloc_fregs[] = { 8, 9, 10, 11, -1 }; // XMM8-11
|
||||
// all xmm registers are caller-saved on linux
|
||||
#define ALLOC_F64 false
|
||||
#endif
|
||||
|
||||
class BlockCompiler;
|
||||
|
||||
struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8>
|
||||
struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8, ALLOC_F64>
|
||||
{
|
||||
X64RegAlloc(BlockCompiler *compiler) : compiler(compiler) {}
|
||||
|
||||
|
@ -55,9 +58,9 @@ struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8>
|
|||
return Xbyak::Reg32(ereg);
|
||||
}
|
||||
|
||||
Xbyak::Xmm MapXRegister(const shil_param& param)
|
||||
Xbyak::Xmm MapXRegister(const shil_param& param, int index = 0)
|
||||
{
|
||||
s8 ereg = mapf(param);
|
||||
s8 ereg = mapf(param, index);
|
||||
if (ereg == -1)
|
||||
die("VRegister not allocated");
|
||||
return Xbyak::Xmm(ereg);
|
||||
|
|
|
@ -587,13 +587,19 @@ protected:
|
|||
#ifndef XBYAK32
|
||||
mov(rcx, (uintptr_t)&sin_table);
|
||||
mov(rcx, qword[rcx + rax * 8]);
|
||||
#if ALLOC_F64 == false
|
||||
mov(rdx, (uintptr_t)op.rd.reg_ptr());
|
||||
mov(qword[rdx], rcx);
|
||||
#else
|
||||
movd(mapXRegister(op.rd, 0), ecx);
|
||||
shr(rcx, 32);
|
||||
movd(mapXRegister(op.rd, 1), ecx);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef EXPLODE_SPANS
|
||||
#if ALLOC_F64 == true
|
||||
movss(mapXRegister(op.rd, 0), dword[(size_t)&sin_table + eax * 8]);
|
||||
movss(mapXRegister(op.rd, 1), dword[(size_t)&sin_table[0].u[1] + eax * 8]);
|
||||
#else
|
||||
|
@ -653,15 +659,25 @@ protected:
|
|||
}
|
||||
else if (param.is_reg())
|
||||
{
|
||||
if (param.is_r32f())
|
||||
if (isAllocf(param))
|
||||
{
|
||||
if (isAllocf(param))
|
||||
if (param.is_r32f() || param.is_r64f())
|
||||
{
|
||||
Xbyak::Xmm sreg = mapXRegister(param);
|
||||
Xbyak::Xmm sreg = mapXRegister(param, 0);
|
||||
if (!reg.isXMM())
|
||||
movd((const Xbyak::Reg32 &)reg, sreg);
|
||||
movd(reg.cvt32(), sreg);
|
||||
else if (reg != sreg)
|
||||
movss((const Xbyak::Xmm &)reg, sreg);
|
||||
#ifndef XBYAK32
|
||||
if (param.is_r64f())
|
||||
{
|
||||
sreg = mapXRegister(param, 1);
|
||||
verify(reg != rax);
|
||||
movd(eax, sreg);
|
||||
shl(rax, 32);
|
||||
or_(reg, rax);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -670,44 +686,41 @@ protected:
|
|||
{
|
||||
#ifndef XBYAK32
|
||||
mov(rax, (size_t)param.reg_ptr());
|
||||
mov((const Xbyak::Reg32 &)reg, dword[rax]);
|
||||
mov(reg.cvt32(), dword[rax]);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
mov((const Xbyak::Reg32 &)reg, dword[param.reg_ptr()]);
|
||||
mov(reg.cvt32(), dword[param.reg_ptr()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (isAllocg(param))
|
||||
{
|
||||
Xbyak::Reg32 sreg = mapRegister(param);
|
||||
if (reg.isXMM())
|
||||
movd((const Xbyak::Xmm &)reg, sreg);
|
||||
else if (reg != sreg)
|
||||
mov(reg.cvt32(), sreg);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isAllocg(param))
|
||||
if (ArchX64)
|
||||
{
|
||||
Xbyak::Reg32 sreg = mapRegister(param);
|
||||
if (reg.isXMM())
|
||||
movd((const Xbyak::Xmm &)reg, sreg);
|
||||
else if (reg != sreg)
|
||||
mov((const Xbyak::Reg32 &)reg, sreg);
|
||||
#ifndef XBYAK32
|
||||
mov(rax, (size_t)param.reg_ptr());
|
||||
if (!reg.isXMM())
|
||||
mov(reg.cvt32(), dword[rax]);
|
||||
else
|
||||
movss((const Xbyak::Xmm &)reg, dword[rax]);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ArchX64)
|
||||
{
|
||||
#ifndef XBYAK32
|
||||
mov(rax, (size_t)param.reg_ptr());
|
||||
if (!reg.isXMM())
|
||||
mov((const Xbyak::Reg32 &)reg, dword[rax]);
|
||||
else
|
||||
movss((const Xbyak::Xmm &)reg, dword[rax]);
|
||||
#endif
|
||||
}
|
||||
if (!reg.isXMM())
|
||||
mov(reg.cvt32(), dword[param.reg_ptr()]);
|
||||
else
|
||||
{
|
||||
if (!reg.isXMM())
|
||||
mov((const Xbyak::Reg32 &)reg, dword[param.reg_ptr()]);
|
||||
else
|
||||
movss((const Xbyak::Xmm &)reg, dword[param.reg_ptr()]);
|
||||
}
|
||||
movss((const Xbyak::Xmm &)reg, dword[param.reg_ptr()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -724,17 +737,25 @@ protected:
|
|||
{
|
||||
Xbyak::Reg32 sreg = mapRegister(param);
|
||||
if (!reg.isXMM())
|
||||
mov(sreg, (const Xbyak::Reg32 &)reg);
|
||||
mov(sreg, reg.cvt32());
|
||||
else if (reg != sreg)
|
||||
movd(sreg, (const Xbyak::Xmm &)reg);
|
||||
}
|
||||
else if (isAllocf(param))
|
||||
{
|
||||
Xbyak::Xmm sreg = mapXRegister(param);
|
||||
Xbyak::Xmm sreg = mapXRegister(param, 0);
|
||||
if (!reg.isXMM())
|
||||
movd(sreg, (const Xbyak::Reg32 &)reg);
|
||||
movd(sreg, reg.cvt32());
|
||||
else if (reg != sreg)
|
||||
movss(sreg, (const Xbyak::Xmm &)reg);
|
||||
#ifndef XBYAK32
|
||||
if (param.is_r64f())
|
||||
{
|
||||
sreg = mapXRegister(param, 1);
|
||||
shr(reg, 32);
|
||||
movd(sreg, reg.cvt32());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -743,7 +764,7 @@ protected:
|
|||
#ifndef XBYAK32
|
||||
mov(rax, (size_t)param.reg_ptr());
|
||||
if (!reg.isXMM())
|
||||
mov(dword[rax], (const Xbyak::Reg32 &)reg);
|
||||
mov(dword[rax], reg.cvt32());
|
||||
else
|
||||
movss(dword[rax], (const Xbyak::Xmm &)reg);
|
||||
#endif
|
||||
|
@ -751,7 +772,7 @@ protected:
|
|||
else
|
||||
{
|
||||
if (!reg.isXMM())
|
||||
mov(dword[param.reg_ptr()], (const Xbyak::Reg32 &)reg);
|
||||
mov(dword[param.reg_ptr()], reg.cvt32());
|
||||
else
|
||||
movss(dword[param.reg_ptr()], (const Xbyak::Xmm &)reg);
|
||||
}
|
||||
|
@ -763,16 +784,16 @@ private:
|
|||
return static_cast<T*>(this)->regalloc.MapRegister(param);
|
||||
}
|
||||
|
||||
Xbyak::Xmm mapXRegister(const shil_param& param) {
|
||||
return static_cast<T*>(this)->regalloc.MapXRegister(param);
|
||||
Xbyak::Xmm mapXRegister(const shil_param& param, int index = 0) {
|
||||
return static_cast<T*>(this)->regalloc.MapXRegister(param, index);
|
||||
}
|
||||
|
||||
int mapg(const shil_param& param) {
|
||||
return (int)static_cast<T*>(this)->regalloc.mapg(param);
|
||||
}
|
||||
|
||||
int mapf(const shil_param& param) {
|
||||
return (int)static_cast<T*>(this)->regalloc.mapf(param);
|
||||
int mapf(const shil_param& param, int index = 0) {
|
||||
return (int)static_cast<T*>(this)->regalloc.mapf(param, index);
|
||||
}
|
||||
|
||||
bool isAllocg(const shil_param& param) {
|
||||
|
|
|
@ -526,15 +526,14 @@ bool X86Compiler::genReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* b
|
|||
{
|
||||
if (!op.rs1.is_imm())
|
||||
return false;
|
||||
u32 size = op.flags & 0x7f;
|
||||
u32 addr = op.rs1.imm_value();
|
||||
bool isram = false;
|
||||
void* ptr = _vmem_read_const(addr, isram, size > 4 ? 4 : size);
|
||||
void* ptr = _vmem_read_const(addr, isram, op.size > 4 ? 4 : op.size);
|
||||
|
||||
if (isram)
|
||||
{
|
||||
// Immediate pointer to RAM: super-duper fast access
|
||||
switch (size)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
if (regalloc.IsAllocg(op.rd))
|
||||
|
@ -569,14 +568,12 @@ bool X86Compiler::genReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* b
|
|||
break;
|
||||
|
||||
case 8:
|
||||
#ifdef EXPLODE_SPANS
|
||||
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1))
|
||||
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd))
|
||||
{
|
||||
movd(regalloc.MapXRegister(op.rd, 0), dword[ptr]);
|
||||
movd(regalloc.MapXRegister(op.rd, 1), dword[(u32 *)ptr + 1]);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
movq(xmm0, qword[ptr]);
|
||||
movq(qword[op.rd.reg_ptr()], xmm0);
|
||||
|
@ -591,7 +588,7 @@ bool X86Compiler::genReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* b
|
|||
else
|
||||
{
|
||||
// Not RAM: the returned pointer is a memory handler
|
||||
if (size == 8)
|
||||
if (op.size == 8)
|
||||
{
|
||||
verify(!regalloc.IsAllocAny(op.rd));
|
||||
|
||||
|
@ -608,7 +605,7 @@ bool X86Compiler::genReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* b
|
|||
{
|
||||
mov(ecx, addr);
|
||||
|
||||
switch(size)
|
||||
switch(op.size)
|
||||
{
|
||||
case 1:
|
||||
genCall((void (DYNACALL *)())ptr);
|
||||
|
@ -639,15 +636,14 @@ bool X86Compiler::genWriteMemImmediate(const shil_opcode& op, RuntimeBlockInfo*
|
|||
{
|
||||
if (!op.rs1.is_imm())
|
||||
return false;
|
||||
u32 size = op.flags & 0x7f;
|
||||
u32 addr = op.rs1.imm_value();
|
||||
bool isram = false;
|
||||
void* ptr = _vmem_write_const(addr, isram, size > 4 ? 4 : size);
|
||||
void* ptr = _vmem_write_const(addr, isram, op.size > 4 ? 4 : op.size);
|
||||
|
||||
if (isram)
|
||||
{
|
||||
// Immediate pointer to RAM: super-duper fast access
|
||||
switch (size)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
if (regalloc.IsAllocg(op.rs2))
|
||||
|
@ -697,14 +693,12 @@ bool X86Compiler::genWriteMemImmediate(const shil_opcode& op, RuntimeBlockInfo*
|
|||
break;
|
||||
|
||||
case 8:
|
||||
#ifdef EXPLODE_SPANS
|
||||
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1))
|
||||
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2))
|
||||
{
|
||||
movd(dword[ptr], regalloc.MapXRegister(op.rs2, 0));
|
||||
movd(dword[(u32 *)ptr + 1], regalloc.MapXRegister(op.rs2, 1));
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
movq(xmm0, qword[op.rs2.reg_ptr()]);
|
||||
movq(qword[ptr], xmm0);
|
||||
|
|
|
@ -268,10 +268,10 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode&
|
|||
break;
|
||||
|
||||
case shop_mov64:
|
||||
verify(op.rd.is_r64());
|
||||
verify(op.rs1.is_r64());
|
||||
verify(op.rd.is_r64f());
|
||||
verify(op.rs1.is_r64f());
|
||||
|
||||
#ifdef EXPLODE_SPANS
|
||||
#if ALLOC_F64 == true
|
||||
movss(regalloc.MapXRegister(op.rd, 0), regalloc.MapXRegister(op.rs1, 0));
|
||||
movss(regalloc.MapXRegister(op.rd, 1), regalloc.MapXRegister(op.rs1, 1));
|
||||
#else
|
||||
|
@ -297,7 +297,7 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode&
|
|||
}
|
||||
|
||||
int memOpSize;
|
||||
switch (op.flags & 0x7f)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
memOpSize = MemSize::S8;
|
||||
|
@ -329,14 +329,12 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode&
|
|||
}
|
||||
else
|
||||
{
|
||||
#ifdef EXPLODE_SPANS
|
||||
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1))
|
||||
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd))
|
||||
{
|
||||
mov(regalloc.MapXRegister(op.rd, 0), xmm0);
|
||||
mov(regalloc.MapXRegister(op.rd, 1), xmm1);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
verify(!regalloc.IsAllocAny(op.rd));
|
||||
movss(dword[op.rd.reg_ptr()], xmm0);
|
||||
|
@ -361,7 +359,7 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode&
|
|||
}
|
||||
|
||||
int memOpSize;
|
||||
switch (op.flags & 0x7f)
|
||||
switch (op.size)
|
||||
{
|
||||
case 1:
|
||||
memOpSize = MemSize::S8;
|
||||
|
@ -382,14 +380,12 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode&
|
|||
else if (memOpSize == MemSize::F32)
|
||||
shil_param_to_host_reg(op.rs2, xmm0);
|
||||
else {
|
||||
#ifdef EXPLODE_SPANS
|
||||
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1))
|
||||
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2))
|
||||
{
|
||||
mov(xmm0, regalloc.MapXRegister(op.rs2, 0));
|
||||
mov(xmm1, regalloc.MapXRegister(op.rs2, 1));
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
movd(xmm0, dword[op.rs2.reg_ptr()]);
|
||||
movd(xmm1, dword[op.rs2.reg_ptr() + 1]);
|
||||
|
|
|
@ -19,9 +19,11 @@
|
|||
#pragma once
|
||||
#include "hw/sh4/dyna/ssa_regalloc.h"
|
||||
|
||||
#define ALLOC_F64 false
|
||||
|
||||
class X86Compiler;
|
||||
|
||||
struct X86RegAlloc : RegAlloc<Xbyak::Operand::Code, s8>
|
||||
struct X86RegAlloc : RegAlloc<Xbyak::Operand::Code, s8, ALLOC_F64>
|
||||
{
|
||||
X86RegAlloc(X86Compiler *compiler) : compiler(compiler) {}
|
||||
|
||||
|
@ -40,9 +42,9 @@ struct X86RegAlloc : RegAlloc<Xbyak::Operand::Code, s8>
|
|||
return Xbyak::Reg32(ereg);
|
||||
}
|
||||
|
||||
Xbyak::Xmm MapXRegister(const shil_param& param)
|
||||
Xbyak::Xmm MapXRegister(const shil_param& param, int index = 0)
|
||||
{
|
||||
s8 ereg = mapf(param);
|
||||
s8 ereg = mapf(param, index);
|
||||
if (ereg == -1)
|
||||
die("VRegister not allocated");
|
||||
return Xbyak::Xmm(ereg);
|
||||
|
|
Loading…
Reference in New Issue