diff --git a/core/hw/sh4/dyna/shil_canonical.h b/core/hw/sh4/dyna/shil_canonical.h index 0af91477b..8204b1ab0 100644 --- a/core/hw/sh4/dyna/shil_canonical.h +++ b/core/hw/sh4/dyna/shil_canonical.h @@ -914,18 +914,33 @@ shil_opc_end() //shop_fipr shil_opc(fipr) + +#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64 shil_canonical ( f32,f1,(float* fn, float* fm), - float idp; - idp=fn[0]*fm[0]; + // Using double for better precision on x86 (Sonic Adventure) + double idp = (double)fn[0] * fm[0]; + idp += (double)fn[1] * fm[1]; + idp += (double)fn[2] * fm[2]; + idp += (double)fn[3] * fm[3]; + + return fixNaN((float)idp); +) +#else +shil_canonical +( +f32,f1,(float* fn, float* fm), + + float idp = fn[0] * fm[0]; idp+=fn[1]*fm[1]; idp+=fn[2]*fm[2]; idp+=fn[3]*fm[3]; return fixNaN(idp); ) +#endif shil_compile ( diff --git a/core/hw/sh4/interpr/sh4_fpu.cpp b/core/hw/sh4/interpr/sh4_fpu.cpp index 6f6c167de..5551bcde2 100644 --- a/core/hw/sh4/interpr/sh4_fpu.cpp +++ b/core/hw/sh4/interpr/sh4_fpu.cpp @@ -506,14 +506,21 @@ sh4op(i1111_nnmm_1110_1101) int m=(GetN(op)&0x3)<<2; if(fpscr.PR ==0) { - float idp; - idp=fr[n+0]*fr[m+0]; - idp+=fr[n+1]*fr[m+1]; - idp+=fr[n+2]*fr[m+2]; - idp+=fr[n+3]*fr[m+3]; +#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64 + double idp = (double)fr[n + 0] * fr[m + 0]; + idp += (double)fr[n + 1] * fr[m + 1]; + idp += (double)fr[n + 2] * fr[m + 2]; + idp += (double)fr[n + 3] * fr[m + 3]; + float rv = (float)idp; +#else + float rv = fr[n + 0] * fr[m + 0]; + rv += fr[n + 1] * fr[m + 1]; + rv += fr[n + 2] * fr[m + 2]; + rv += fr[n + 3] * fr[m + 3]; +#endif - CHECK_FPU_32(idp); - fr[n+3]=idp; + CHECK_FPU_32(rv); + fr[n + 3] = rv; } else { diff --git a/core/hw/sh4/interpr/sh4_interpreter.cpp b/core/hw/sh4/interpr/sh4_interpreter.cpp index 4d2d21eac..14f183f82 100644 --- a/core/hw/sh4/interpr/sh4_interpreter.cpp +++ b/core/hw/sh4/interpr/sh4_interpreter.cpp @@ -62,17 +62,13 @@ void Sh4_int_Run() void Sh4_int_Stop() { if (sh4_int_bCpuRun) - { sh4_int_bCpuRun=false; - } } void Sh4_int_Start() { if (!sh4_int_bCpuRun) - { sh4_int_bCpuRun=true; - } } void Sh4_int_Step() @@ -92,13 +88,9 @@ void Sh4_int_Step() void Sh4_int_Skip() { if (sh4_int_bCpuRun) - { WARN_LOG(INTERPRETER, "Sh4 Is running, can't Skip"); - } else - { - next_pc+=2; - } + next_pc += 2; } void Sh4_int_Reset(bool hard) diff --git a/core/hw/sh4/sh4_if.h b/core/hw/sh4/sh4_if.h index cd11b02dd..480f9faca 100644 --- a/core/hw/sh4/sh4_if.h +++ b/core/hw/sh4/sh4_if.h @@ -145,6 +145,8 @@ union sr_status_t u32 status; }; +#define STATUS_MASK 0x700083F2 + //Status register bitfield struct sr_t { @@ -319,12 +321,12 @@ extern u8* sh4_dyna_rcb; INLINE u32 sh4_sr_GetFull() { - return (p_sh4rcb->cntx.sr.status & 0x700083F2) | p_sh4rcb->cntx.sr.T; + return (p_sh4rcb->cntx.sr.status & STATUS_MASK) | p_sh4rcb->cntx.sr.T; } INLINE void sh4_sr_SetFull(u32 value) { - p_sh4rcb->cntx.sr.status=value & 0x700083F2; + p_sh4rcb->cntx.sr.status=value & STATUS_MASK; p_sh4rcb->cntx.sr.T=value&1; } diff --git a/core/nullDC.cpp b/core/nullDC.cpp index a67476a18..e70e5276d 100755 --- a/core/nullDC.cpp +++ b/core/nullDC.cpp @@ -796,7 +796,9 @@ void LoadSettings(bool game_specific) settings.rend.WidescreenGameHacks = cfgLoadBool(config_section, "rend.WidescreenGameHacks", settings.rend.WidescreenGameHacks); settings.pvr.ta_skip = cfgLoadInt(config_section, "ta.skip", settings.pvr.ta_skip); - settings.pvr.rend = cfgLoadInt(config_section, "pvr.rend", settings.pvr.rend); + if (!game_specific) + // crashes if switching gl <-> vulkan + settings.pvr.rend = cfgLoadInt(config_section, "pvr.rend", settings.pvr.rend); settings.pvr.MaxThreads = cfgLoadInt(config_section, "pvr.MaxThreads", settings.pvr.MaxThreads); settings.pvr.SynchronousRender = cfgLoadBool(config_section, "pvr.SynchronousRendering", settings.pvr.SynchronousRender); diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index 0d1ca1016..51a1c86e3 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -1568,8 +1568,6 @@ public: if (smc_checks) { opcodeExec* op; - int check_size = block->sh4_code_size; - switch (block->sh4_code_size) { case 4: diff --git a/core/rec-x64/rec_x64.cpp b/core/rec-x64/rec_x64.cpp index ead5a216d..88d454bab 100644 --- a/core/rec-x64/rec_x64.cpp +++ b/core/rec-x64/rec_x64.cpp @@ -1089,25 +1089,34 @@ public: case shop_fipr: { - mov(rax, (size_t)op.rs1.reg_ptr()); - movaps(regalloc.MapXRegister(op.rd), dword[rax]); - mov(rax, (size_t)op.rs2.reg_ptr()); - mulps(regalloc.MapXRegister(op.rd), dword[rax]); + // Using doubles for better precision const Xbyak::Xmm &rd = regalloc.MapXRegister(op.rd); - // Only first-generation 64-bit CPUs lack SSE3 support - if (cpu.has(Xbyak::util::Cpu::tSSE3)) - { - haddps(rd, rd); - haddps(rd, rd); - } - else - { - movhlps(xmm1, rd); - addps(rd, xmm1); - movaps(xmm1, rd); - shufps(xmm1, xmm1,1); - addss(rd, xmm1); - } + mov(rax, (size_t)op.rs1.reg_ptr()); + mov(rcx, (size_t)op.rs2.reg_ptr()); + pxor(xmm1, xmm1); + pxor(xmm0, xmm0); + pxor(xmm2, xmm2); + cvtss2sd(xmm1, dword[rax]); + cvtss2sd(xmm0, dword[rcx]); + mulsd(xmm0, xmm1); + pxor(xmm1, xmm1); + cvtss2sd(xmm2, dword[rax + 4]); + cvtss2sd(xmm1, dword[rcx + 4]); + mulsd(xmm1, xmm2); + pxor(xmm2, xmm2); + cvtss2sd(xmm2, dword[rax + 8]); + addsd(xmm1, xmm0); + pxor(xmm0, xmm0); + cvtss2sd(xmm0, dword[rcx + 8]); + mulsd(xmm0, xmm2); + pxor(xmm2, xmm2); + cvtss2sd(xmm2, dword[rax + 12]); + addsd(xmm1, xmm0); + pxor(xmm0, xmm0); + cvtss2sd(xmm0, dword[rcx + 12]); + mulsd(xmm0, xmm2); + addsd(xmm0, xmm1); + cvtsd2ss(rd, xmm0); } break;