sh4: use double for fipr and ftrv. divide before sqrt for fsrra. ssa fix
dynarec: use double to implement fipr and ftrv except on arm32 interpreter: always use double for fipr and ftrv fsrra: perform division before square root fmac: use std::fma or native fma op get rid of unused dynarec op shop_swap ssa: dead register pass must assume interpreter fallback modifies all registers ssa: replace reg+0 address by reg in constant propagation pass decoder: replace address offset 0 by null param for indexed mem access
This commit is contained in:
parent
9aa7371d82
commit
ec3ad9b328
|
@ -508,6 +508,8 @@ static void dec_param(DecParam p,shil_param& r1,shil_param& r2, u32 op)
|
||||||
u32 shft=p-PRM_RN_D4_x1;
|
u32 shft=p-PRM_RN_D4_x1;
|
||||||
r1=mk_regi(reg_r0+GetN(op));
|
r1=mk_regi(reg_r0+GetN(op));
|
||||||
r2=mk_imm(GetImm4(op)<<shft);
|
r2=mk_imm(GetImm4(op)<<shft);
|
||||||
|
if (r2.imm_value() == 0)
|
||||||
|
r2 = shil_param();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -523,6 +525,8 @@ static void dec_param(DecParam p,shil_param& r1,shil_param& r2, u32 op)
|
||||||
u32 shft=p-PRM_RM_D4_x1;
|
u32 shft=p-PRM_RM_D4_x1;
|
||||||
r1=mk_regi(reg_r0+GetM(op));
|
r1=mk_regi(reg_r0+GetM(op));
|
||||||
r2=mk_imm(GetImm4(op)<<shft);
|
r2=mk_imm(GetImm4(op)<<shft);
|
||||||
|
if (r2.imm_value() == 0)
|
||||||
|
r2 = shil_param();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -538,6 +542,8 @@ static void dec_param(DecParam p,shil_param& r1,shil_param& r2, u32 op)
|
||||||
u32 shft=p-PRM_GBR_D8_x1;
|
u32 shft=p-PRM_GBR_D8_x1;
|
||||||
r1=mk_regi(reg_gbr);
|
r1=mk_regi(reg_gbr);
|
||||||
r2=mk_imm(GetImm8(op)<<shft);
|
r2=mk_imm(GetImm8(op)<<shft);
|
||||||
|
if (r2.imm_value() == 0)
|
||||||
|
r2 = shil_param();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -979,7 +985,7 @@ bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
|
||||||
blk->guest_opcodes++;
|
blk->guest_opcodes++;
|
||||||
dec_updateBlockCycles(blk, op);
|
dec_updateBlockCycles(blk, op);
|
||||||
|
|
||||||
if (OpDesc[op]->IsFloatingPoint())
|
if (!blk->has_fpu_op && OpDesc[op]->IsFloatingPoint())
|
||||||
{
|
{
|
||||||
if (sr.FD == 1)
|
if (sr.FD == 1)
|
||||||
{
|
{
|
||||||
|
|
|
@ -135,6 +135,28 @@ shil_compile( \
|
||||||
die("This opcode requires native dynarec implementation"); \
|
die("This opcode requires native dynarec implementation"); \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
#if SHIL_MODE==1
|
||||||
|
|
||||||
|
template<int Stride = 1>
|
||||||
|
static inline float innerProduct(const float *f1, const float *f2)
|
||||||
|
{
|
||||||
|
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64 || HOST_CPU == CPU_ARM64
|
||||||
|
const double f = (double)f1[0] * f2[Stride * 0]
|
||||||
|
+ (double)f1[1] * f2[Stride * 1]
|
||||||
|
+ (double)f1[2] * f2[Stride * 2]
|
||||||
|
+ (double)f1[3] * f2[Stride * 3];
|
||||||
|
return fixNaN((float)f);
|
||||||
|
#else
|
||||||
|
const float f = f1[0] * f2[Stride * 0]
|
||||||
|
+ f1[1] * f2[Stride * 1]
|
||||||
|
+ f1[2] * f2[Stride * 2]
|
||||||
|
+ f1[3] * f2[Stride * 3];
|
||||||
|
return fixNaN(f);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define BIN_OP_I(z)
|
#define BIN_OP_I(z)
|
||||||
|
@ -415,24 +437,6 @@ shil_compile
|
||||||
|
|
||||||
shil_opc_end()
|
shil_opc_end()
|
||||||
|
|
||||||
|
|
||||||
//shop_swap -- swap all bytes in word
|
|
||||||
shil_opc(swap)
|
|
||||||
shil_canonical
|
|
||||||
(
|
|
||||||
u32,f1,(u32 r1),
|
|
||||||
return (r1 >>24) | ((r1 >>16)&0xFF00) |((r1&0xFF00)<<8) | (r1<<24);
|
|
||||||
)
|
|
||||||
|
|
||||||
shil_compile
|
|
||||||
(
|
|
||||||
shil_cf_arg_u32(rs1);
|
|
||||||
shil_cf(f1);
|
|
||||||
shil_cf_rv_u32(rd);
|
|
||||||
)
|
|
||||||
|
|
||||||
shil_opc_end()
|
|
||||||
|
|
||||||
//shop_shld
|
//shop_shld
|
||||||
shil_opc(shld)
|
shil_opc(shld)
|
||||||
shil_canonical
|
shil_canonical
|
||||||
|
@ -909,31 +913,12 @@ shil_opc_end()
|
||||||
//shop_fipr
|
//shop_fipr
|
||||||
shil_opc(fipr)
|
shil_opc(fipr)
|
||||||
|
|
||||||
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
|
|
||||||
shil_canonical
|
shil_canonical
|
||||||
(
|
(
|
||||||
f32,f1,(const float* fn, const float* fm),
|
f32,f1,(const float* fn, const float* fm),
|
||||||
|
|
||||||
double idp = (double)fn[0] * fm[0];
|
return innerProduct(fn, fm);
|
||||||
idp += (double)fn[1] * fm[1];
|
|
||||||
idp += (double)fn[2] * fm[2];
|
|
||||||
idp += (double)fn[3] * fm[3];
|
|
||||||
|
|
||||||
return fixNaN((float)idp);
|
|
||||||
)
|
)
|
||||||
#else
|
|
||||||
shil_canonical
|
|
||||||
(
|
|
||||||
f32,f1,(float* fn, float* fm),
|
|
||||||
|
|
||||||
float idp = fn[0] * fm[0];
|
|
||||||
idp+=fn[1]*fm[1];
|
|
||||||
idp+=fn[2]*fm[2];
|
|
||||||
idp+=fn[3]*fm[3];
|
|
||||||
|
|
||||||
return fixNaN(idp);
|
|
||||||
)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
shil_compile
|
shil_compile
|
||||||
(
|
(
|
||||||
|
@ -942,74 +927,24 @@ shil_compile
|
||||||
shil_cf(f1);
|
shil_cf(f1);
|
||||||
shil_cf_rv_f32(rd);
|
shil_cf_rv_f32(rd);
|
||||||
)
|
)
|
||||||
|
|
||||||
shil_opc_end()
|
shil_opc_end()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//shop_ftrv
|
//shop_ftrv
|
||||||
shil_opc(ftrv)
|
shil_opc(ftrv)
|
||||||
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
|
|
||||||
shil_canonical
|
shil_canonical
|
||||||
(
|
(
|
||||||
void,f1,(float* fd, const float* fn, const float* fm),
|
void,f1,(float *fd, const float *fn, const float *fm),
|
||||||
|
|
||||||
double v1 = (double)fm[0] * fn[0] +
|
float v1 = innerProduct<4>(fn, fm);
|
||||||
(double)fm[4] * fn[1] +
|
float v2 = innerProduct<4>(fn, fm + 1);
|
||||||
(double)fm[8] * fn[2] +
|
float v3 = innerProduct<4>(fn, fm + 2);
|
||||||
(double)fm[12] * fn[3];
|
float v4 = innerProduct<4>(fn, fm + 3);
|
||||||
|
fd[0] = v1;
|
||||||
double v2 = (double)fm[1] * fn[0] +
|
fd[1] = v2;
|
||||||
(double)fm[5] * fn[1] +
|
fd[2] = v3;
|
||||||
(double)fm[9] * fn[2] +
|
fd[3] = v4;
|
||||||
(double)fm[13] * fn[3];
|
|
||||||
|
|
||||||
double v3 = (double)fm[2] * fn[0] +
|
|
||||||
(double)fm[6] * fn[1] +
|
|
||||||
(double)fm[10] * fn[2] +
|
|
||||||
(double)fm[14] * fn[3];
|
|
||||||
|
|
||||||
double v4 = (double)fm[3] * fn[0] +
|
|
||||||
(double)fm[7] * fn[1] +
|
|
||||||
(double)fm[11] * fn[2] +
|
|
||||||
(double)fm[15] * fn[3];
|
|
||||||
|
|
||||||
fd[0] = fixNaN((float)v1);
|
|
||||||
fd[1] = fixNaN((float)v2);
|
|
||||||
fd[2] = fixNaN((float)v3);
|
|
||||||
fd[3] = fixNaN((float)v4);
|
|
||||||
)
|
)
|
||||||
#else
|
|
||||||
shil_canonical
|
|
||||||
(
|
|
||||||
void,f1,(float* fd,float* fn, float* fm),
|
|
||||||
|
|
||||||
float v1 = fm[0] * fn[0] +
|
|
||||||
fm[4] * fn[1] +
|
|
||||||
fm[8] * fn[2] +
|
|
||||||
fm[12] * fn[3];
|
|
||||||
|
|
||||||
float v2 = fm[1] * fn[0] +
|
|
||||||
fm[5] * fn[1] +
|
|
||||||
fm[9] * fn[2] +
|
|
||||||
fm[13] * fn[3];
|
|
||||||
|
|
||||||
float v3 = fm[2] * fn[0] +
|
|
||||||
fm[6] * fn[1] +
|
|
||||||
fm[10] * fn[2] +
|
|
||||||
fm[14] * fn[3];
|
|
||||||
|
|
||||||
float v4 = fm[3] * fn[0] +
|
|
||||||
fm[7] * fn[1] +
|
|
||||||
fm[11] * fn[2] +
|
|
||||||
fm[15] * fn[3];
|
|
||||||
|
|
||||||
fd[0] = fixNaN(v1);
|
|
||||||
fd[1] = fixNaN(v2);
|
|
||||||
fd[2] = fixNaN(v3);
|
|
||||||
fd[3] = fixNaN(v4);
|
|
||||||
)
|
|
||||||
#endif
|
|
||||||
shil_compile
|
shil_compile
|
||||||
(
|
(
|
||||||
shil_cf_arg_ptr(rs2);
|
shil_cf_arg_ptr(rs2);
|
||||||
|
@ -1024,7 +959,7 @@ shil_opc(fmac)
|
||||||
shil_canonical
|
shil_canonical
|
||||||
(
|
(
|
||||||
f32,f1,(float fn, float f0,float fm),
|
f32,f1,(float fn, float f0,float fm),
|
||||||
return fixNaN(fn + f0 * fm);
|
return fixNaN(std::fma(f0, fm, fn));
|
||||||
)
|
)
|
||||||
shil_compile
|
shil_compile
|
||||||
(
|
(
|
||||||
|
@ -1038,7 +973,18 @@ shil_opc_end()
|
||||||
|
|
||||||
//shop_fsrra
|
//shop_fsrra
|
||||||
shil_opc(fsrra)
|
shil_opc(fsrra)
|
||||||
UN_OP_F(1/sqrtf)
|
shil_canonical
|
||||||
|
(
|
||||||
|
f32,f1,(float fn),
|
||||||
|
|
||||||
|
return std::sqrt(1.f / fn);
|
||||||
|
)
|
||||||
|
shil_compile
|
||||||
|
(
|
||||||
|
shil_cf_arg_f32(rs1);
|
||||||
|
shil_cf(f1);
|
||||||
|
shil_cf_rv_f32(rd);
|
||||||
|
)
|
||||||
shil_opc_end()
|
shil_opc_end()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -167,9 +167,6 @@ bool SSAOptimizer::ExecuteConstOp(shil_opcode* op)
|
||||||
case shop_swaplb:
|
case shop_swaplb:
|
||||||
rd = shil_opcl_swaplb::f1::impl(rs1);
|
rd = shil_opcl_swaplb::f1::impl(rs1);
|
||||||
break;
|
break;
|
||||||
case shop_swap:
|
|
||||||
rd = shil_opcl_swap::f1::impl(rs1);
|
|
||||||
break;
|
|
||||||
case shop_seteq:
|
case shop_seteq:
|
||||||
rd = shil_opcl_seteq::f1::impl(rs1, rs2);
|
rd = shil_opcl_seteq::f1::impl(rs1, rs2);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -70,6 +70,7 @@ public:
|
||||||
|
|
||||||
for (shil_opcode& op : block->oplist)
|
for (shil_opcode& op : block->oplist)
|
||||||
{
|
{
|
||||||
|
// FIXME shop_ifb should be assumed to increase versions too? (increment all reg_versions[])
|
||||||
AddVersionToOperand(op.rs1, false);
|
AddVersionToOperand(op.rs1, false);
|
||||||
AddVersionToOperand(op.rs2, false);
|
AddVersionToOperand(op.rs2, false);
|
||||||
AddVersionToOperand(op.rs3, false);
|
AddVersionToOperand(op.rs3, false);
|
||||||
|
@ -212,26 +213,18 @@ private:
|
||||||
}
|
}
|
||||||
else if (op.op == shop_readm || op.op == shop_writem)
|
else if (op.op == shop_readm || op.op == shop_writem)
|
||||||
{
|
{
|
||||||
if (op.rs1.is_imm())
|
if (op.rs1.is_imm() && !op.rs3.is_reg())
|
||||||
{
|
{
|
||||||
if (op.rs3.is_imm())
|
// Merge base addr and offset
|
||||||
{
|
if (op.rs3.is_imm()) {
|
||||||
// Merge base addr and offset
|
|
||||||
op.rs1._imm += op.rs3.imm_value();
|
op.rs1._imm += op.rs3.imm_value();
|
||||||
op.rs3.type = FMT_NULL;
|
op.rs3.type = FMT_NULL;
|
||||||
}
|
}
|
||||||
else if (op.rs3.is_reg())
|
|
||||||
{
|
|
||||||
// Swap rs1 and rs3 so that rs1 is never an immediate operand
|
|
||||||
shil_param t = op.rs1;
|
|
||||||
op.rs1 = op.rs3;
|
|
||||||
op.rs3 = t;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we know the address to read and it's in the same memory page(s) as the block
|
// If we know the address to read and it's in the same memory page(s) as the block
|
||||||
// and if those pages are read-only, then we can directly read the memory at compile time
|
// and if those pages are read-only, then we can directly read the memory at compile time
|
||||||
// and propagate the read value as a constant.
|
// and propagate the read value as a constant.
|
||||||
if (op.rs1.is_imm() && op.op == shop_readm && block->read_only
|
if (op.op == shop_readm && block->read_only
|
||||||
&& (op.rs1._imm >> 12) >= (block->vaddr >> 12)
|
&& (op.rs1._imm >> 12) >= (block->vaddr >> 12)
|
||||||
&& (op.rs1._imm >> 12) <= ((block->vaddr + block->sh4_code_size - 1) >> 12)
|
&& (op.rs1._imm >> 12) <= ((block->vaddr + block->sh4_code_size - 1) >> 12)
|
||||||
&& op.size <= 4)
|
&& op.size <= 4)
|
||||||
|
@ -263,6 +256,15 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (op.rs1.is_imm() && op.rs3.is_reg())
|
||||||
|
// Swap rs1 and rs3 so that rs1 is never an immediate operand
|
||||||
|
std::swap(op.rs1, op.rs3);
|
||||||
|
if (op.rs3.is_imm() && op.rs3.imm_value() == 0)
|
||||||
|
// 0 displacement has no effect
|
||||||
|
op.rs3.type = FMT_NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (ExecuteConstOp(&op))
|
else if (ExecuteConstOp(&op))
|
||||||
{
|
{
|
||||||
|
@ -440,9 +442,9 @@ private:
|
||||||
for (size_t opnum = 0; opnum < block->oplist.size(); opnum++)
|
for (size_t opnum = 0; opnum < block->oplist.size(); opnum++)
|
||||||
{
|
{
|
||||||
shil_opcode& op = block->oplist[opnum];
|
shil_opcode& op = block->oplist[opnum];
|
||||||
if (op.rs2.is_imm())
|
if (op.rs2.is_imm() || op.rs2.is_null())
|
||||||
{
|
{
|
||||||
if (op.rs2.imm_value() == 0)
|
if (op.rs2.is_null() || op.rs2.imm_value() == 0)
|
||||||
{
|
{
|
||||||
// a & 0 == 0
|
// a & 0 == 0
|
||||||
// a * 0 == 0
|
// a * 0 == 0
|
||||||
|
@ -590,10 +592,15 @@ private:
|
||||||
defnum = opnum;
|
defnum = opnum;
|
||||||
|
|
||||||
// find alias redef
|
// find alias redef
|
||||||
if (DefinesHigherVersion(op->rd, alias.second) && aliasdef == (size_t)-1)
|
if (aliasdef == (size_t)-1)
|
||||||
aliasdef = opnum;
|
{
|
||||||
else if (DefinesHigherVersion(op->rd2, alias.second) && aliasdef == (size_t)-1)
|
if (DefinesHigherVersion(op->rd, alias.second))
|
||||||
aliasdef = opnum;
|
aliasdef = opnum;
|
||||||
|
else if (DefinesHigherVersion(op->rd2, alias.second))
|
||||||
|
aliasdef = opnum;
|
||||||
|
else if (op->op == shop_ifb)
|
||||||
|
aliasdef = opnum;
|
||||||
|
}
|
||||||
|
|
||||||
// find last use
|
// find last use
|
||||||
if (UsesRegValue(op->rs1, alias.first))
|
if (UsesRegValue(op->rs1, alias.first))
|
||||||
|
|
|
@ -362,7 +362,7 @@ sh4op(i1111_nnnn_0111_1101)
|
||||||
u32 n = GetN(op);
|
u32 n = GetN(op);
|
||||||
if (fpscr.PR==0)
|
if (fpscr.PR==0)
|
||||||
{
|
{
|
||||||
fr[n] = (float)(1/sqrtf(fr[n]));
|
fr[n] = sqrtf(1.f / fr[n]);
|
||||||
CHECK_FPU_32(fr[n]);
|
CHECK_FPU_32(fr[n]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -406,23 +406,12 @@ sh4op(i1111_nnmm_1110_1101)
|
||||||
int m=(GetN(op)&0x3)<<2;
|
int m=(GetN(op)&0x3)<<2;
|
||||||
if (fpscr.PR == 0)
|
if (fpscr.PR == 0)
|
||||||
{
|
{
|
||||||
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
|
|
||||||
// multiplications are done with 28 bits of precision (53 - 25) and the final sum at 30 bits
|
|
||||||
double idp = (double)fr[n + 0] * fr[m + 0];
|
double idp = (double)fr[n + 0] * fr[m + 0];
|
||||||
idp += (double)fr[n + 1] * fr[m + 1];
|
idp += (double)fr[n + 1] * fr[m + 1];
|
||||||
idp += (double)fr[n + 2] * fr[m + 2];
|
idp += (double)fr[n + 2] * fr[m + 2];
|
||||||
idp += (double)fr[n + 3] * fr[m + 3];
|
idp += (double)fr[n + 3] * fr[m + 3];
|
||||||
|
|
||||||
fr[n + 3] = fixNaN((float)idp);
|
fr[n + 3] = fixNaN((float)idp);
|
||||||
#else
|
|
||||||
float rv = fr[n + 0] * fr[m + 0];
|
|
||||||
rv += fr[n + 1] * fr[m + 1];
|
|
||||||
rv += fr[n + 2] * fr[m + 2];
|
|
||||||
rv += fr[n + 3] * fr[m + 3];
|
|
||||||
|
|
||||||
CHECK_FPU_32(rv);
|
|
||||||
fr[n + 3] = rv;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -521,7 +510,7 @@ sh4op(i1111_nnnn_0110_1101)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
setDRn(op, fixNaN64(sqrt(getDRn(op))));
|
setDRn(op, fixNaN64(std::sqrt(getDRn(op))));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -567,7 +556,7 @@ sh4op(i1111_nnnn_mmmm_1110)
|
||||||
u32 n = GetN(op);
|
u32 n = GetN(op);
|
||||||
u32 m = GetM(op);
|
u32 m = GetM(op);
|
||||||
|
|
||||||
fr[n] =(f32) ((f64)fr[n]+(f64)fr[0] * (f64)fr[m]);
|
fr[n] = std::fma(fr[0], fr[m], fr[n]);
|
||||||
CHECK_FPU_32(fr[n]);
|
CHECK_FPU_32(fr[n]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -591,7 +580,6 @@ sh4op(i1111_nn01_1111_1101)
|
||||||
|
|
||||||
if (fpscr.PR==0)
|
if (fpscr.PR==0)
|
||||||
{
|
{
|
||||||
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
|
|
||||||
double v1 = (double)xf[0] * fr[n + 0] +
|
double v1 = (double)xf[0] * fr[n + 0] +
|
||||||
(double)xf[4] * fr[n + 1] +
|
(double)xf[4] * fr[n + 1] +
|
||||||
(double)xf[8] * fr[n + 2] +
|
(double)xf[8] * fr[n + 2] +
|
||||||
|
@ -616,39 +604,6 @@ sh4op(i1111_nn01_1111_1101)
|
||||||
fr[n + 1] = fixNaN((float)v2);
|
fr[n + 1] = fixNaN((float)v2);
|
||||||
fr[n + 2] = fixNaN((float)v3);
|
fr[n + 2] = fixNaN((float)v3);
|
||||||
fr[n + 3] = fixNaN((float)v4);
|
fr[n + 3] = fixNaN((float)v4);
|
||||||
#else
|
|
||||||
float v1, v2, v3, v4;
|
|
||||||
|
|
||||||
v1 = xf[0] * fr[n + 0] +
|
|
||||||
xf[4] * fr[n + 1] +
|
|
||||||
xf[8] * fr[n + 2] +
|
|
||||||
xf[12] * fr[n + 3];
|
|
||||||
|
|
||||||
v2 = xf[1] * fr[n + 0] +
|
|
||||||
xf[5] * fr[n + 1] +
|
|
||||||
xf[9] * fr[n + 2] +
|
|
||||||
xf[13] * fr[n + 3];
|
|
||||||
|
|
||||||
v3 = xf[2] * fr[n + 0] +
|
|
||||||
xf[6] * fr[n + 1] +
|
|
||||||
xf[10] * fr[n + 2] +
|
|
||||||
xf[14] * fr[n + 3];
|
|
||||||
|
|
||||||
v4 = xf[3] * fr[n + 0] +
|
|
||||||
xf[7] * fr[n + 1] +
|
|
||||||
xf[11] * fr[n + 2] +
|
|
||||||
xf[15] * fr[n + 3];
|
|
||||||
|
|
||||||
CHECK_FPU_32(v1);
|
|
||||||
CHECK_FPU_32(v2);
|
|
||||||
CHECK_FPU_32(v3);
|
|
||||||
CHECK_FPU_32(v4);
|
|
||||||
|
|
||||||
fr[n + 0] = v1;
|
|
||||||
fr[n + 1] = v2;
|
|
||||||
fr[n + 2] = v3;
|
|
||||||
fr[n + 3] = v4;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -16,8 +16,6 @@
|
||||||
#include "hw/sh4/sh4_cache.h"
|
#include "hw/sh4/sh4_cache.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define iNimp cpu_iNimp
|
|
||||||
|
|
||||||
//Read Mem macros
|
//Read Mem macros
|
||||||
|
|
||||||
#define ReadMemU32(to,addr) to=ReadMem32(addr)
|
#define ReadMemU32(to,addr) to=ReadMem32(addr)
|
||||||
|
@ -41,11 +39,6 @@
|
||||||
#define WriteMemBOU8(addr,offset,data) WriteMemU8(addr+offset,data)
|
#define WriteMemBOU8(addr,offset,data) WriteMemU8(addr+offset,data)
|
||||||
|
|
||||||
// 0xxx
|
// 0xxx
|
||||||
void cpu_iNimp(u32 op, const char* info)
|
|
||||||
{
|
|
||||||
ERROR_LOG(INTERPRETER, "Unimplemented opcode: %08X next_pc: %08X pr: %08X msg: %s", op, next_pc, pr, info);
|
|
||||||
die("iNimp reached\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
//stc GBR,<REG_N>
|
//stc GBR,<REG_N>
|
||||||
sh4op(i0000_nnnn_0001_0010)
|
sh4op(i0000_nnnn_0001_0010)
|
||||||
|
|
|
@ -80,9 +80,9 @@ static u64 dec_MRd(DecParam d,DecParam s,u32 sz) { return dec_Fill(DM_ReadM,d,s,
|
||||||
//d= reg to read from
|
//d= reg to read from
|
||||||
static u64 dec_MWt(DecParam d,DecParam s,u32 sz) { return dec_Fill(DM_WriteM,d,s,shop_writem,sz); }
|
static u64 dec_MWt(DecParam d,DecParam s,u32 sz) { return dec_Fill(DM_WriteM,d,s,shop_writem,sz); }
|
||||||
|
|
||||||
sh4_opcodelistentry missing_opcode = {dec_illegalOp, iNotImplemented, 0, 0, ReadWritePC, "missing", 0, 0, CO, 1 };
|
static sh4_opcodelistentry missing_opcode = {dec_illegalOp, iNotImplemented, 0, 0, ReadWritePC, "missing", 0, 0, CO, 1 };
|
||||||
|
|
||||||
sh4_opcodelistentry opcodes[]=
|
static sh4_opcodelistentry opcodes[]=
|
||||||
{
|
{
|
||||||
//HLE
|
//HLE
|
||||||
{0, reios_trap, Mask_none, REIOS_OPCODE, Branch_dir, "reios_trap", 100, 100, CO, 1 },
|
{0, reios_trap, Mask_none, REIOS_OPCODE, Branch_dir, "reios_trap", 100, 100, CO, 1 },
|
||||||
|
@ -344,7 +344,7 @@ sh4_opcodelistentry opcodes[]=
|
||||||
{0,0,0,0,ReadWritePC}//Branch in order to stop the block and save PC ect :)
|
{0,0,0,0,ReadWritePC}//Branch in order to stop the block and save PC ect :)
|
||||||
};
|
};
|
||||||
|
|
||||||
void BuildOpcodeTables()
|
static void BuildOpcodeTables()
|
||||||
{
|
{
|
||||||
|
|
||||||
for (int i=0;i<0x10000;i++)
|
for (int i=0;i<0x10000;i++)
|
||||||
|
|
|
@ -76,7 +76,6 @@ struct sh4_opcodelistentry
|
||||||
};
|
};
|
||||||
|
|
||||||
extern sh4_opcodelistentry* OpDesc[0x10000];
|
extern sh4_opcodelistentry* OpDesc[0x10000];
|
||||||
extern sh4_opcodelistentry opcodes[];
|
|
||||||
|
|
||||||
void DissasembleOpcode(u16 opcode,u32 pc,char* Dissasm);
|
void DissasembleOpcode(u16 opcode,u32 pc,char* Dissasm);
|
||||||
enum DecParam
|
enum DecParam
|
||||||
|
|
|
@ -1933,8 +1933,8 @@ void Arm32Assembler::compileOp(RuntimeBlockInfo* block, shil_opcode* op, bool op
|
||||||
|
|
||||||
case shop_fsrra:
|
case shop_fsrra:
|
||||||
Vmov(s1, 1.f);
|
Vmov(s1, 1.f);
|
||||||
Vsqrt(s0, reg.mapFReg(op->rs1));
|
Vdiv(s0, s1, reg.mapFReg(op->rs1));
|
||||||
Vdiv(reg.mapFReg(op->rd), s1, s0);
|
Vsqrt(reg.mapFReg(op->rd), s0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case shop_fsetgt:
|
case shop_fsetgt:
|
||||||
|
@ -1986,7 +1986,6 @@ void Arm32Assembler::compileOp(RuntimeBlockInfo* block, shil_opcode* op, bool op
|
||||||
|
|
||||||
case shop_fipr:
|
case shop_fipr:
|
||||||
{
|
{
|
||||||
|
|
||||||
QRegister _r1 = q0;
|
QRegister _r1 = q0;
|
||||||
QRegister _r2 = q0;
|
QRegister _r2 = q0;
|
||||||
|
|
||||||
|
|
|
@ -879,9 +879,9 @@ public:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case shop_fsrra:
|
case shop_fsrra:
|
||||||
Fsqrt(s0, regalloc.MapVRegister(op.rs1));
|
|
||||||
Fmov(s1, 1.f);
|
Fmov(s1, 1.f);
|
||||||
Fdiv(regalloc.MapVRegister(op.rd), s1, s0);
|
Fdiv(s0, s1, regalloc.MapVRegister(op.rs1));
|
||||||
|
Fsqrt(regalloc.MapVRegister(op.rd), s0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case shop_fsetgt:
|
case shop_fsetgt:
|
||||||
|
@ -907,6 +907,7 @@ public:
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/* fall back to the canonical implementations for better precision
|
||||||
case shop_fipr:
|
case shop_fipr:
|
||||||
Add(x9, x28, sh4_context_mem_operand(op.rs1.reg_ptr()).GetOffset());
|
Add(x9, x28, sh4_context_mem_operand(op.rs1.reg_ptr()).GetOffset());
|
||||||
Ld1(v0.V4S(), MemOperand(x9));
|
Ld1(v0.V4S(), MemOperand(x9));
|
||||||
|
@ -937,6 +938,7 @@ public:
|
||||||
Add(x9, x28, sh4_context_mem_operand(op.rd.reg_ptr()).GetOffset());
|
Add(x9, x28, sh4_context_mem_operand(op.rd.reg_ptr()).GetOffset());
|
||||||
St1(v5.V4S(), MemOperand(x9));
|
St1(v5.V4S(), MemOperand(x9));
|
||||||
break;
|
break;
|
||||||
|
*/
|
||||||
|
|
||||||
case shop_frswap:
|
case shop_frswap:
|
||||||
Add(x9, x28, sh4_context_mem_operand(op.rs1.reg_ptr()).GetOffset());
|
Add(x9, x28, sh4_context_mem_operand(op.rs1.reg_ptr()).GetOffset());
|
||||||
|
@ -1077,13 +1079,11 @@ public:
|
||||||
switch (size)
|
switch (size)
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
GenCallRuntime(addrspace::read8);
|
GenCallRuntime(addrspace::read8SX32);
|
||||||
Sxtb(w0, w0);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
GenCallRuntime(addrspace::read16);
|
GenCallRuntime(addrspace::read16SX32);
|
||||||
Sxth(w0, w0);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 4:
|
case 4:
|
||||||
|
@ -1497,7 +1497,7 @@ public:
|
||||||
// w0: vaddr, w1: addr
|
// w0: vaddr, w1: addr
|
||||||
checkBlockFpu = GetCursorAddress<DynaCode *>();
|
checkBlockFpu = GetCursorAddress<DynaCode *>();
|
||||||
Label fpu_enabled;
|
Label fpu_enabled;
|
||||||
Ldr(w10, sh4_context_mem_operand(&sr));
|
Ldr(w10, sh4_context_mem_operand(&sr.status));
|
||||||
Tbz(w10, 15, &fpu_enabled); // test SR.FD bit
|
Tbz(w10, 15, &fpu_enabled); // test SR.FD bit
|
||||||
|
|
||||||
Mov(w1, Sh4Ex_FpuDisabled); // exception code
|
Mov(w1, Sh4Ex_FpuDisabled); // exception code
|
||||||
|
|
|
@ -135,7 +135,7 @@ public:
|
||||||
if (mmu_enabled() && block->has_fpu_op)
|
if (mmu_enabled() && block->has_fpu_op)
|
||||||
{
|
{
|
||||||
Xbyak::Label fpu_enabled;
|
Xbyak::Label fpu_enabled;
|
||||||
mov(rax, (uintptr_t)&sr);
|
mov(rax, (uintptr_t)&sr.status);
|
||||||
test(dword[rax], 0x8000); // test SR.FD bit
|
test(dword[rax], 0x8000); // test SR.FD bit
|
||||||
jz(fpu_enabled);
|
jz(fpu_enabled);
|
||||||
mov(call_regs[0], block->vaddr); // pc
|
mov(call_regs[0], block->vaddr); // pc
|
||||||
|
|
|
@ -537,19 +537,18 @@ protected:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case shop_fsrra:
|
case shop_fsrra:
|
||||||
// RSQRTSS has an |error| <= 1.5*2^-12 where the SH4 FSRRA needs |error| <= 2^-21
|
|
||||||
sqrtss(xmm0, mapXRegister(op.rs1));
|
|
||||||
if (ArchX64)
|
if (ArchX64)
|
||||||
{
|
{
|
||||||
mov(eax, 0x3f800000); // 1.0
|
mov(eax, 0x3f800000); // 1.0
|
||||||
movd(mapXRegister(op.rd), eax);
|
movd(xmm0, eax);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
static float one = 1.f;
|
static float one = 1.f;
|
||||||
movss(mapXRegister(op.rd), dword[&one]);
|
movss(xmm0, dword[&one]);
|
||||||
}
|
}
|
||||||
divss(mapXRegister(op.rd), xmm0);
|
divss(xmm0, mapXRegister(op.rs1));
|
||||||
|
sqrtss(mapXRegister(op.rd), xmm0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case shop_fsetgt:
|
case shop_fsetgt:
|
||||||
|
|
|
@ -108,7 +108,7 @@ void X86Compiler::compile(RuntimeBlockInfo* block, bool force_checks, bool optim
|
||||||
if (mmu_enabled() && block->has_fpu_op)
|
if (mmu_enabled() && block->has_fpu_op)
|
||||||
{
|
{
|
||||||
Xbyak::Label fpu_enabled;
|
Xbyak::Label fpu_enabled;
|
||||||
mov(eax, dword[&sr]);
|
mov(eax, dword[&sr.status]);
|
||||||
test(eax, 0x8000); // test SR.FD bit
|
test(eax, 0x8000); // test SR.FD bit
|
||||||
jz(fpu_enabled);
|
jz(fpu_enabled);
|
||||||
push(Sh4Ex_FpuDisabled); // exception code
|
push(Sh4Ex_FpuDisabled); // exception code
|
||||||
|
|
Loading…
Reference in New Issue