use doubles to emulate FIPR on x86. avoid crash with per-game config

Don't store the renderer type in per-game config as this crashes.
Fixes Sonic Adventure falling off track
Other non-functional changes
This commit is contained in:
Flyinghead 2019-12-13 12:27:43 +01:00
parent c683e0ded8
commit 530cdaa23f
7 changed files with 66 additions and 41 deletions

View File

@ -914,18 +914,33 @@ shil_opc_end()
//shop_fipr
shil_opc(fipr)
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
shil_canonical
(
f32,f1,(float* fn, float* fm),
float idp;
idp=fn[0]*fm[0];
// Using double for better precision on x86 (Sonic Adventure)
double idp = (double)fn[0] * fm[0];
idp += (double)fn[1] * fm[1];
idp += (double)fn[2] * fm[2];
idp += (double)fn[3] * fm[3];
return fixNaN((float)idp);
)
#else
shil_canonical
(
f32,f1,(float* fn, float* fm),
float idp = fn[0] * fm[0];
idp+=fn[1]*fm[1];
idp+=fn[2]*fm[2];
idp+=fn[3]*fm[3];
return fixNaN(idp);
)
#endif
shil_compile
(

View File

@ -506,14 +506,21 @@ sh4op(i1111_nnmm_1110_1101)
int m=(GetN(op)&0x3)<<2;
if(fpscr.PR ==0)
{
float idp;
idp=fr[n+0]*fr[m+0];
idp+=fr[n+1]*fr[m+1];
idp+=fr[n+2]*fr[m+2];
idp+=fr[n+3]*fr[m+3];
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
double idp = (double)fr[n + 0] * fr[m + 0];
idp += (double)fr[n + 1] * fr[m + 1];
idp += (double)fr[n + 2] * fr[m + 2];
idp += (double)fr[n + 3] * fr[m + 3];
float rv = (float)idp;
#else
float rv = fr[n + 0] * fr[m + 0];
rv += fr[n + 1] * fr[m + 1];
rv += fr[n + 2] * fr[m + 2];
rv += fr[n + 3] * fr[m + 3];
#endif
CHECK_FPU_32(idp);
fr[n+3]=idp;
CHECK_FPU_32(rv);
fr[n + 3] = rv;
}
else
{

View File

@ -62,18 +62,14 @@ void Sh4_int_Run()
void Sh4_int_Stop()
{
if (sh4_int_bCpuRun)
{
sh4_int_bCpuRun=false;
}
}
void Sh4_int_Start()
{
if (!sh4_int_bCpuRun)
{
sh4_int_bCpuRun=true;
}
}
void Sh4_int_Step()
{
@ -92,14 +88,10 @@ void Sh4_int_Step()
void Sh4_int_Skip()
{
if (sh4_int_bCpuRun)
{
WARN_LOG(INTERPRETER, "Sh4 Is running, can't Skip");
}
else
{
next_pc += 2;
}
}
void Sh4_int_Reset(bool hard)
{

View File

@ -145,6 +145,8 @@ union sr_status_t
u32 status;
};
#define STATUS_MASK 0x700083F2
//Status register bitfield
struct sr_t
{
@ -319,12 +321,12 @@ extern u8* sh4_dyna_rcb;
INLINE u32 sh4_sr_GetFull()
{
return (p_sh4rcb->cntx.sr.status & 0x700083F2) | p_sh4rcb->cntx.sr.T;
return (p_sh4rcb->cntx.sr.status & STATUS_MASK) | p_sh4rcb->cntx.sr.T;
}
INLINE void sh4_sr_SetFull(u32 value)
{
p_sh4rcb->cntx.sr.status=value & 0x700083F2;
p_sh4rcb->cntx.sr.status=value & STATUS_MASK;
p_sh4rcb->cntx.sr.T=value&1;
}

View File

@ -796,6 +796,8 @@ void LoadSettings(bool game_specific)
settings.rend.WidescreenGameHacks = cfgLoadBool(config_section, "rend.WidescreenGameHacks", settings.rend.WidescreenGameHacks);
settings.pvr.ta_skip = cfgLoadInt(config_section, "ta.skip", settings.pvr.ta_skip);
if (!game_specific)
// crashes if switching gl <-> vulkan
settings.pvr.rend = cfgLoadInt(config_section, "pvr.rend", settings.pvr.rend);
settings.pvr.MaxThreads = cfgLoadInt(config_section, "pvr.MaxThreads", settings.pvr.MaxThreads);

View File

@ -1568,8 +1568,6 @@ public:
if (smc_checks)
{
opcodeExec* op;
int check_size = block->sh4_code_size;
switch (block->sh4_code_size)
{
case 4:

View File

@ -1089,25 +1089,34 @@ public:
case shop_fipr:
{
mov(rax, (size_t)op.rs1.reg_ptr());
movaps(regalloc.MapXRegister(op.rd), dword[rax]);
mov(rax, (size_t)op.rs2.reg_ptr());
mulps(regalloc.MapXRegister(op.rd), dword[rax]);
// Using doubles for better precision
const Xbyak::Xmm &rd = regalloc.MapXRegister(op.rd);
// Only first-generation 64-bit CPUs lack SSE3 support
if (cpu.has(Xbyak::util::Cpu::tSSE3))
{
haddps(rd, rd);
haddps(rd, rd);
}
else
{
movhlps(xmm1, rd);
addps(rd, xmm1);
movaps(xmm1, rd);
shufps(xmm1, xmm1,1);
addss(rd, xmm1);
}
mov(rax, (size_t)op.rs1.reg_ptr());
mov(rcx, (size_t)op.rs2.reg_ptr());
pxor(xmm1, xmm1);
pxor(xmm0, xmm0);
pxor(xmm2, xmm2);
cvtss2sd(xmm1, dword[rax]);
cvtss2sd(xmm0, dword[rcx]);
mulsd(xmm0, xmm1);
pxor(xmm1, xmm1);
cvtss2sd(xmm2, dword[rax + 4]);
cvtss2sd(xmm1, dword[rcx + 4]);
mulsd(xmm1, xmm2);
pxor(xmm2, xmm2);
cvtss2sd(xmm2, dword[rax + 8]);
addsd(xmm1, xmm0);
pxor(xmm0, xmm0);
cvtss2sd(xmm0, dword[rcx + 8]);
mulsd(xmm0, xmm2);
pxor(xmm2, xmm2);
cvtss2sd(xmm2, dword[rax + 12]);
addsd(xmm1, xmm0);
pxor(xmm0, xmm0);
cvtss2sd(xmm0, dword[rcx + 12]);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm1);
cvtsd2ss(rd, xmm0);
}
break;