sh4: use double for fipr and ftrv. divide before sqrt for fsrra. ssa fix
dynarec: use double to implement fipr and ftrv except on arm32 interpreter: always use double for fipr and ftrv fsrra: perform division before square root fmac: use std::fma or native fma op get rid of unused dynarec op shop_swap ssa: dead register pass must assume interpreter fallback modifies all registers ssa: replace reg+0 address by reg in constant propagation pass decoder: replace address offset 0 by null param for indexed mem access
This commit is contained in:
parent
9aa7371d82
commit
ec3ad9b328
|
@ -508,6 +508,8 @@ static void dec_param(DecParam p,shil_param& r1,shil_param& r2, u32 op)
|
|||
u32 shft=p-PRM_RN_D4_x1;
|
||||
r1=mk_regi(reg_r0+GetN(op));
|
||||
r2=mk_imm(GetImm4(op)<<shft);
|
||||
if (r2.imm_value() == 0)
|
||||
r2 = shil_param();
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -523,6 +525,8 @@ static void dec_param(DecParam p,shil_param& r1,shil_param& r2, u32 op)
|
|||
u32 shft=p-PRM_RM_D4_x1;
|
||||
r1=mk_regi(reg_r0+GetM(op));
|
||||
r2=mk_imm(GetImm4(op)<<shft);
|
||||
if (r2.imm_value() == 0)
|
||||
r2 = shil_param();
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -538,6 +542,8 @@ static void dec_param(DecParam p,shil_param& r1,shil_param& r2, u32 op)
|
|||
u32 shft=p-PRM_GBR_D8_x1;
|
||||
r1=mk_regi(reg_gbr);
|
||||
r2=mk_imm(GetImm8(op)<<shft);
|
||||
if (r2.imm_value() == 0)
|
||||
r2 = shil_param();
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -979,7 +985,7 @@ bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
|
|||
blk->guest_opcodes++;
|
||||
dec_updateBlockCycles(blk, op);
|
||||
|
||||
if (OpDesc[op]->IsFloatingPoint())
|
||||
if (!blk->has_fpu_op && OpDesc[op]->IsFloatingPoint())
|
||||
{
|
||||
if (sr.FD == 1)
|
||||
{
|
||||
|
|
|
@ -135,6 +135,28 @@ shil_compile( \
|
|||
die("This opcode requires native dynarec implementation"); \
|
||||
)
|
||||
|
||||
#if SHIL_MODE==1
|
||||
|
||||
template<int Stride = 1>
|
||||
static inline float innerProduct(const float *f1, const float *f2)
|
||||
{
|
||||
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64 || HOST_CPU == CPU_ARM64
|
||||
const double f = (double)f1[0] * f2[Stride * 0]
|
||||
+ (double)f1[1] * f2[Stride * 1]
|
||||
+ (double)f1[2] * f2[Stride * 2]
|
||||
+ (double)f1[3] * f2[Stride * 3];
|
||||
return fixNaN((float)f);
|
||||
#else
|
||||
const float f = f1[0] * f2[Stride * 0]
|
||||
+ f1[1] * f2[Stride * 1]
|
||||
+ f1[2] * f2[Stride * 2]
|
||||
+ f1[3] * f2[Stride * 3];
|
||||
return fixNaN(f);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define BIN_OP_I(z)
|
||||
|
@ -415,24 +437,6 @@ shil_compile
|
|||
|
||||
shil_opc_end()
|
||||
|
||||
|
||||
//shop_swap -- swap all bytes in word
|
||||
shil_opc(swap)
|
||||
shil_canonical
|
||||
(
|
||||
u32,f1,(u32 r1),
|
||||
return (r1 >>24) | ((r1 >>16)&0xFF00) |((r1&0xFF00)<<8) | (r1<<24);
|
||||
)
|
||||
|
||||
shil_compile
|
||||
(
|
||||
shil_cf_arg_u32(rs1);
|
||||
shil_cf(f1);
|
||||
shil_cf_rv_u32(rd);
|
||||
)
|
||||
|
||||
shil_opc_end()
|
||||
|
||||
//shop_shld
|
||||
shil_opc(shld)
|
||||
shil_canonical
|
||||
|
@ -909,31 +913,12 @@ shil_opc_end()
|
|||
//shop_fipr
|
||||
shil_opc(fipr)
|
||||
|
||||
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
|
||||
shil_canonical
|
||||
(
|
||||
f32,f1,(const float* fn, const float* fm),
|
||||
|
||||
double idp = (double)fn[0] * fm[0];
|
||||
idp += (double)fn[1] * fm[1];
|
||||
idp += (double)fn[2] * fm[2];
|
||||
idp += (double)fn[3] * fm[3];
|
||||
|
||||
return fixNaN((float)idp);
|
||||
return innerProduct(fn, fm);
|
||||
)
|
||||
#else
|
||||
shil_canonical
|
||||
(
|
||||
f32,f1,(float* fn, float* fm),
|
||||
|
||||
float idp = fn[0] * fm[0];
|
||||
idp+=fn[1]*fm[1];
|
||||
idp+=fn[2]*fm[2];
|
||||
idp+=fn[3]*fm[3];
|
||||
|
||||
return fixNaN(idp);
|
||||
)
|
||||
#endif
|
||||
|
||||
shil_compile
|
||||
(
|
||||
|
@ -942,74 +927,24 @@ shil_compile
|
|||
shil_cf(f1);
|
||||
shil_cf_rv_f32(rd);
|
||||
)
|
||||
|
||||
shil_opc_end()
|
||||
|
||||
|
||||
|
||||
//shop_ftrv
|
||||
shil_opc(ftrv)
|
||||
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
|
||||
shil_canonical
|
||||
(
|
||||
void,f1,(float* fd, const float* fn, const float* fm),
|
||||
void,f1,(float *fd, const float *fn, const float *fm),
|
||||
|
||||
double v1 = (double)fm[0] * fn[0] +
|
||||
(double)fm[4] * fn[1] +
|
||||
(double)fm[8] * fn[2] +
|
||||
(double)fm[12] * fn[3];
|
||||
|
||||
double v2 = (double)fm[1] * fn[0] +
|
||||
(double)fm[5] * fn[1] +
|
||||
(double)fm[9] * fn[2] +
|
||||
(double)fm[13] * fn[3];
|
||||
|
||||
double v3 = (double)fm[2] * fn[0] +
|
||||
(double)fm[6] * fn[1] +
|
||||
(double)fm[10] * fn[2] +
|
||||
(double)fm[14] * fn[3];
|
||||
|
||||
double v4 = (double)fm[3] * fn[0] +
|
||||
(double)fm[7] * fn[1] +
|
||||
(double)fm[11] * fn[2] +
|
||||
(double)fm[15] * fn[3];
|
||||
|
||||
fd[0] = fixNaN((float)v1);
|
||||
fd[1] = fixNaN((float)v2);
|
||||
fd[2] = fixNaN((float)v3);
|
||||
fd[3] = fixNaN((float)v4);
|
||||
float v1 = innerProduct<4>(fn, fm);
|
||||
float v2 = innerProduct<4>(fn, fm + 1);
|
||||
float v3 = innerProduct<4>(fn, fm + 2);
|
||||
float v4 = innerProduct<4>(fn, fm + 3);
|
||||
fd[0] = v1;
|
||||
fd[1] = v2;
|
||||
fd[2] = v3;
|
||||
fd[3] = v4;
|
||||
)
|
||||
#else
|
||||
shil_canonical
|
||||
(
|
||||
void,f1,(float* fd,float* fn, float* fm),
|
||||
|
||||
float v1 = fm[0] * fn[0] +
|
||||
fm[4] * fn[1] +
|
||||
fm[8] * fn[2] +
|
||||
fm[12] * fn[3];
|
||||
|
||||
float v2 = fm[1] * fn[0] +
|
||||
fm[5] * fn[1] +
|
||||
fm[9] * fn[2] +
|
||||
fm[13] * fn[3];
|
||||
|
||||
float v3 = fm[2] * fn[0] +
|
||||
fm[6] * fn[1] +
|
||||
fm[10] * fn[2] +
|
||||
fm[14] * fn[3];
|
||||
|
||||
float v4 = fm[3] * fn[0] +
|
||||
fm[7] * fn[1] +
|
||||
fm[11] * fn[2] +
|
||||
fm[15] * fn[3];
|
||||
|
||||
fd[0] = fixNaN(v1);
|
||||
fd[1] = fixNaN(v2);
|
||||
fd[2] = fixNaN(v3);
|
||||
fd[3] = fixNaN(v4);
|
||||
)
|
||||
#endif
|
||||
shil_compile
|
||||
(
|
||||
shil_cf_arg_ptr(rs2);
|
||||
|
@ -1024,7 +959,7 @@ shil_opc(fmac)
|
|||
shil_canonical
|
||||
(
|
||||
f32,f1,(float fn, float f0,float fm),
|
||||
return fixNaN(fn + f0 * fm);
|
||||
return fixNaN(std::fma(f0, fm, fn));
|
||||
)
|
||||
shil_compile
|
||||
(
|
||||
|
@ -1038,7 +973,18 @@ shil_opc_end()
|
|||
|
||||
//shop_fsrra
|
||||
shil_opc(fsrra)
|
||||
UN_OP_F(1/sqrtf)
|
||||
shil_canonical
|
||||
(
|
||||
f32,f1,(float fn),
|
||||
|
||||
return std::sqrt(1.f / fn);
|
||||
)
|
||||
shil_compile
|
||||
(
|
||||
shil_cf_arg_f32(rs1);
|
||||
shil_cf(f1);
|
||||
shil_cf_rv_f32(rd);
|
||||
)
|
||||
shil_opc_end()
|
||||
|
||||
|
||||
|
|
|
@ -167,9 +167,6 @@ bool SSAOptimizer::ExecuteConstOp(shil_opcode* op)
|
|||
case shop_swaplb:
|
||||
rd = shil_opcl_swaplb::f1::impl(rs1);
|
||||
break;
|
||||
case shop_swap:
|
||||
rd = shil_opcl_swap::f1::impl(rs1);
|
||||
break;
|
||||
case shop_seteq:
|
||||
rd = shil_opcl_seteq::f1::impl(rs1, rs2);
|
||||
break;
|
||||
|
|
|
@ -70,6 +70,7 @@ public:
|
|||
|
||||
for (shil_opcode& op : block->oplist)
|
||||
{
|
||||
// FIXME shop_ifb should be assumed to increase versions too? (increment all reg_versions[])
|
||||
AddVersionToOperand(op.rs1, false);
|
||||
AddVersionToOperand(op.rs2, false);
|
||||
AddVersionToOperand(op.rs3, false);
|
||||
|
@ -212,26 +213,18 @@ private:
|
|||
}
|
||||
else if (op.op == shop_readm || op.op == shop_writem)
|
||||
{
|
||||
if (op.rs1.is_imm())
|
||||
if (op.rs1.is_imm() && !op.rs3.is_reg())
|
||||
{
|
||||
if (op.rs3.is_imm())
|
||||
{
|
||||
// Merge base addr and offset
|
||||
// Merge base addr and offset
|
||||
if (op.rs3.is_imm()) {
|
||||
op.rs1._imm += op.rs3.imm_value();
|
||||
op.rs3.type = FMT_NULL;
|
||||
}
|
||||
else if (op.rs3.is_reg())
|
||||
{
|
||||
// Swap rs1 and rs3 so that rs1 is never an immediate operand
|
||||
shil_param t = op.rs1;
|
||||
op.rs1 = op.rs3;
|
||||
op.rs3 = t;
|
||||
}
|
||||
|
||||
// If we know the address to read and it's in the same memory page(s) as the block
|
||||
// and if those pages are read-only, then we can directly read the memory at compile time
|
||||
// and propagate the read value as a constant.
|
||||
if (op.rs1.is_imm() && op.op == shop_readm && block->read_only
|
||||
if (op.op == shop_readm && block->read_only
|
||||
&& (op.rs1._imm >> 12) >= (block->vaddr >> 12)
|
||||
&& (op.rs1._imm >> 12) <= ((block->vaddr + block->sh4_code_size - 1) >> 12)
|
||||
&& op.size <= 4)
|
||||
|
@ -263,6 +256,15 @@ private:
|
|||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op.rs1.is_imm() && op.rs3.is_reg())
|
||||
// Swap rs1 and rs3 so that rs1 is never an immediate operand
|
||||
std::swap(op.rs1, op.rs3);
|
||||
if (op.rs3.is_imm() && op.rs3.imm_value() == 0)
|
||||
// 0 displacement has no effect
|
||||
op.rs3.type = FMT_NULL;
|
||||
}
|
||||
}
|
||||
else if (ExecuteConstOp(&op))
|
||||
{
|
||||
|
@ -440,9 +442,9 @@ private:
|
|||
for (size_t opnum = 0; opnum < block->oplist.size(); opnum++)
|
||||
{
|
||||
shil_opcode& op = block->oplist[opnum];
|
||||
if (op.rs2.is_imm())
|
||||
if (op.rs2.is_imm() || op.rs2.is_null())
|
||||
{
|
||||
if (op.rs2.imm_value() == 0)
|
||||
if (op.rs2.is_null() || op.rs2.imm_value() == 0)
|
||||
{
|
||||
// a & 0 == 0
|
||||
// a * 0 == 0
|
||||
|
@ -590,10 +592,15 @@ private:
|
|||
defnum = opnum;
|
||||
|
||||
// find alias redef
|
||||
if (DefinesHigherVersion(op->rd, alias.second) && aliasdef == (size_t)-1)
|
||||
aliasdef = opnum;
|
||||
else if (DefinesHigherVersion(op->rd2, alias.second) && aliasdef == (size_t)-1)
|
||||
aliasdef = opnum;
|
||||
if (aliasdef == (size_t)-1)
|
||||
{
|
||||
if (DefinesHigherVersion(op->rd, alias.second))
|
||||
aliasdef = opnum;
|
||||
else if (DefinesHigherVersion(op->rd2, alias.second))
|
||||
aliasdef = opnum;
|
||||
else if (op->op == shop_ifb)
|
||||
aliasdef = opnum;
|
||||
}
|
||||
|
||||
// find last use
|
||||
if (UsesRegValue(op->rs1, alias.first))
|
||||
|
|
|
@ -362,7 +362,7 @@ sh4op(i1111_nnnn_0111_1101)
|
|||
u32 n = GetN(op);
|
||||
if (fpscr.PR==0)
|
||||
{
|
||||
fr[n] = (float)(1/sqrtf(fr[n]));
|
||||
fr[n] = sqrtf(1.f / fr[n]);
|
||||
CHECK_FPU_32(fr[n]);
|
||||
}
|
||||
else
|
||||
|
@ -406,23 +406,12 @@ sh4op(i1111_nnmm_1110_1101)
|
|||
int m=(GetN(op)&0x3)<<2;
|
||||
if (fpscr.PR == 0)
|
||||
{
|
||||
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
|
||||
// multiplications are done with 28 bits of precision (53 - 25) and the final sum at 30 bits
|
||||
double idp = (double)fr[n + 0] * fr[m + 0];
|
||||
idp += (double)fr[n + 1] * fr[m + 1];
|
||||
idp += (double)fr[n + 2] * fr[m + 2];
|
||||
idp += (double)fr[n + 3] * fr[m + 3];
|
||||
|
||||
fr[n + 3] = fixNaN((float)idp);
|
||||
#else
|
||||
float rv = fr[n + 0] * fr[m + 0];
|
||||
rv += fr[n + 1] * fr[m + 1];
|
||||
rv += fr[n + 2] * fr[m + 2];
|
||||
rv += fr[n + 3] * fr[m + 3];
|
||||
|
||||
CHECK_FPU_32(rv);
|
||||
fr[n + 3] = rv;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -521,7 +510,7 @@ sh4op(i1111_nnnn_0110_1101)
|
|||
}
|
||||
else
|
||||
{
|
||||
setDRn(op, fixNaN64(sqrt(getDRn(op))));
|
||||
setDRn(op, fixNaN64(std::sqrt(getDRn(op))));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -567,7 +556,7 @@ sh4op(i1111_nnnn_mmmm_1110)
|
|||
u32 n = GetN(op);
|
||||
u32 m = GetM(op);
|
||||
|
||||
fr[n] =(f32) ((f64)fr[n]+(f64)fr[0] * (f64)fr[m]);
|
||||
fr[n] = std::fma(fr[0], fr[m], fr[n]);
|
||||
CHECK_FPU_32(fr[n]);
|
||||
}
|
||||
else
|
||||
|
@ -591,7 +580,6 @@ sh4op(i1111_nn01_1111_1101)
|
|||
|
||||
if (fpscr.PR==0)
|
||||
{
|
||||
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
|
||||
double v1 = (double)xf[0] * fr[n + 0] +
|
||||
(double)xf[4] * fr[n + 1] +
|
||||
(double)xf[8] * fr[n + 2] +
|
||||
|
@ -616,39 +604,6 @@ sh4op(i1111_nn01_1111_1101)
|
|||
fr[n + 1] = fixNaN((float)v2);
|
||||
fr[n + 2] = fixNaN((float)v3);
|
||||
fr[n + 3] = fixNaN((float)v4);
|
||||
#else
|
||||
float v1, v2, v3, v4;
|
||||
|
||||
v1 = xf[0] * fr[n + 0] +
|
||||
xf[4] * fr[n + 1] +
|
||||
xf[8] * fr[n + 2] +
|
||||
xf[12] * fr[n + 3];
|
||||
|
||||
v2 = xf[1] * fr[n + 0] +
|
||||
xf[5] * fr[n + 1] +
|
||||
xf[9] * fr[n + 2] +
|
||||
xf[13] * fr[n + 3];
|
||||
|
||||
v3 = xf[2] * fr[n + 0] +
|
||||
xf[6] * fr[n + 1] +
|
||||
xf[10] * fr[n + 2] +
|
||||
xf[14] * fr[n + 3];
|
||||
|
||||
v4 = xf[3] * fr[n + 0] +
|
||||
xf[7] * fr[n + 1] +
|
||||
xf[11] * fr[n + 2] +
|
||||
xf[15] * fr[n + 3];
|
||||
|
||||
CHECK_FPU_32(v1);
|
||||
CHECK_FPU_32(v2);
|
||||
CHECK_FPU_32(v3);
|
||||
CHECK_FPU_32(v4);
|
||||
|
||||
fr[n + 0] = v1;
|
||||
fr[n + 1] = v2;
|
||||
fr[n + 2] = v3;
|
||||
fr[n + 3] = v4;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -16,8 +16,6 @@
|
|||
#include "hw/sh4/sh4_cache.h"
|
||||
#endif
|
||||
|
||||
#define iNimp cpu_iNimp
|
||||
|
||||
//Read Mem macros
|
||||
|
||||
#define ReadMemU32(to,addr) to=ReadMem32(addr)
|
||||
|
@ -41,11 +39,6 @@
|
|||
#define WriteMemBOU8(addr,offset,data) WriteMemU8(addr+offset,data)
|
||||
|
||||
// 0xxx
|
||||
void cpu_iNimp(u32 op, const char* info)
|
||||
{
|
||||
ERROR_LOG(INTERPRETER, "Unimplemented opcode: %08X next_pc: %08X pr: %08X msg: %s", op, next_pc, pr, info);
|
||||
die("iNimp reached\n");
|
||||
}
|
||||
|
||||
//stc GBR,<REG_N>
|
||||
sh4op(i0000_nnnn_0001_0010)
|
||||
|
|
|
@ -80,9 +80,9 @@ static u64 dec_MRd(DecParam d,DecParam s,u32 sz) { return dec_Fill(DM_ReadM,d,s,
|
|||
//d= reg to read from
|
||||
static u64 dec_MWt(DecParam d,DecParam s,u32 sz) { return dec_Fill(DM_WriteM,d,s,shop_writem,sz); }
|
||||
|
||||
sh4_opcodelistentry missing_opcode = {dec_illegalOp, iNotImplemented, 0, 0, ReadWritePC, "missing", 0, 0, CO, 1 };
|
||||
static sh4_opcodelistentry missing_opcode = {dec_illegalOp, iNotImplemented, 0, 0, ReadWritePC, "missing", 0, 0, CO, 1 };
|
||||
|
||||
sh4_opcodelistentry opcodes[]=
|
||||
static sh4_opcodelistentry opcodes[]=
|
||||
{
|
||||
//HLE
|
||||
{0, reios_trap, Mask_none, REIOS_OPCODE, Branch_dir, "reios_trap", 100, 100, CO, 1 },
|
||||
|
@ -344,7 +344,7 @@ sh4_opcodelistentry opcodes[]=
|
|||
{0,0,0,0,ReadWritePC}//Branch in order to stop the block and save PC ect :)
|
||||
};
|
||||
|
||||
void BuildOpcodeTables()
|
||||
static void BuildOpcodeTables()
|
||||
{
|
||||
|
||||
for (int i=0;i<0x10000;i++)
|
||||
|
|
|
@ -76,7 +76,6 @@ struct sh4_opcodelistentry
|
|||
};
|
||||
|
||||
extern sh4_opcodelistentry* OpDesc[0x10000];
|
||||
extern sh4_opcodelistentry opcodes[];
|
||||
|
||||
void DissasembleOpcode(u16 opcode,u32 pc,char* Dissasm);
|
||||
enum DecParam
|
||||
|
|
|
@ -1933,8 +1933,8 @@ void Arm32Assembler::compileOp(RuntimeBlockInfo* block, shil_opcode* op, bool op
|
|||
|
||||
case shop_fsrra:
|
||||
Vmov(s1, 1.f);
|
||||
Vsqrt(s0, reg.mapFReg(op->rs1));
|
||||
Vdiv(reg.mapFReg(op->rd), s1, s0);
|
||||
Vdiv(s0, s1, reg.mapFReg(op->rs1));
|
||||
Vsqrt(reg.mapFReg(op->rd), s0);
|
||||
break;
|
||||
|
||||
case shop_fsetgt:
|
||||
|
@ -1986,7 +1986,6 @@ void Arm32Assembler::compileOp(RuntimeBlockInfo* block, shil_opcode* op, bool op
|
|||
|
||||
case shop_fipr:
|
||||
{
|
||||
|
||||
QRegister _r1 = q0;
|
||||
QRegister _r2 = q0;
|
||||
|
||||
|
|
|
@ -879,9 +879,9 @@ public:
|
|||
break;
|
||||
|
||||
case shop_fsrra:
|
||||
Fsqrt(s0, regalloc.MapVRegister(op.rs1));
|
||||
Fmov(s1, 1.f);
|
||||
Fdiv(regalloc.MapVRegister(op.rd), s1, s0);
|
||||
Fdiv(s0, s1, regalloc.MapVRegister(op.rs1));
|
||||
Fsqrt(regalloc.MapVRegister(op.rd), s0);
|
||||
break;
|
||||
|
||||
case shop_fsetgt:
|
||||
|
@ -907,6 +907,7 @@ public:
|
|||
}
|
||||
break;
|
||||
|
||||
/* fall back to the canonical implementations for better precision
|
||||
case shop_fipr:
|
||||
Add(x9, x28, sh4_context_mem_operand(op.rs1.reg_ptr()).GetOffset());
|
||||
Ld1(v0.V4S(), MemOperand(x9));
|
||||
|
@ -937,6 +938,7 @@ public:
|
|||
Add(x9, x28, sh4_context_mem_operand(op.rd.reg_ptr()).GetOffset());
|
||||
St1(v5.V4S(), MemOperand(x9));
|
||||
break;
|
||||
*/
|
||||
|
||||
case shop_frswap:
|
||||
Add(x9, x28, sh4_context_mem_operand(op.rs1.reg_ptr()).GetOffset());
|
||||
|
@ -1077,13 +1079,11 @@ public:
|
|||
switch (size)
|
||||
{
|
||||
case 1:
|
||||
GenCallRuntime(addrspace::read8);
|
||||
Sxtb(w0, w0);
|
||||
GenCallRuntime(addrspace::read8SX32);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
GenCallRuntime(addrspace::read16);
|
||||
Sxth(w0, w0);
|
||||
GenCallRuntime(addrspace::read16SX32);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
|
@ -1497,7 +1497,7 @@ public:
|
|||
// w0: vaddr, w1: addr
|
||||
checkBlockFpu = GetCursorAddress<DynaCode *>();
|
||||
Label fpu_enabled;
|
||||
Ldr(w10, sh4_context_mem_operand(&sr));
|
||||
Ldr(w10, sh4_context_mem_operand(&sr.status));
|
||||
Tbz(w10, 15, &fpu_enabled); // test SR.FD bit
|
||||
|
||||
Mov(w1, Sh4Ex_FpuDisabled); // exception code
|
||||
|
|
|
@ -135,7 +135,7 @@ public:
|
|||
if (mmu_enabled() && block->has_fpu_op)
|
||||
{
|
||||
Xbyak::Label fpu_enabled;
|
||||
mov(rax, (uintptr_t)&sr);
|
||||
mov(rax, (uintptr_t)&sr.status);
|
||||
test(dword[rax], 0x8000); // test SR.FD bit
|
||||
jz(fpu_enabled);
|
||||
mov(call_regs[0], block->vaddr); // pc
|
||||
|
|
|
@ -537,19 +537,18 @@ protected:
|
|||
break;
|
||||
|
||||
case shop_fsrra:
|
||||
// RSQRTSS has an |error| <= 1.5*2^-12 where the SH4 FSRRA needs |error| <= 2^-21
|
||||
sqrtss(xmm0, mapXRegister(op.rs1));
|
||||
if (ArchX64)
|
||||
{
|
||||
mov(eax, 0x3f800000); // 1.0
|
||||
movd(mapXRegister(op.rd), eax);
|
||||
movd(xmm0, eax);
|
||||
}
|
||||
else
|
||||
{
|
||||
static float one = 1.f;
|
||||
movss(mapXRegister(op.rd), dword[&one]);
|
||||
movss(xmm0, dword[&one]);
|
||||
}
|
||||
divss(mapXRegister(op.rd), xmm0);
|
||||
divss(xmm0, mapXRegister(op.rs1));
|
||||
sqrtss(mapXRegister(op.rd), xmm0);
|
||||
break;
|
||||
|
||||
case shop_fsetgt:
|
||||
|
|
|
@ -108,7 +108,7 @@ void X86Compiler::compile(RuntimeBlockInfo* block, bool force_checks, bool optim
|
|||
if (mmu_enabled() && block->has_fpu_op)
|
||||
{
|
||||
Xbyak::Label fpu_enabled;
|
||||
mov(eax, dword[&sr]);
|
||||
mov(eax, dword[&sr.status]);
|
||||
test(eax, 0x8000); // test SR.FD bit
|
||||
jz(fpu_enabled);
|
||||
push(Sh4Ex_FpuDisabled); // exception code
|
||||
|
|
Loading…
Reference in New Issue