diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 99827e662d..5cebb2b9e8 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -294,7 +294,7 @@ bool ppu_interpreter::MFVSCR(ppu_thread& ppu, ppu_opcode_t op) { // TODO: In precise interpreter, SAT and NJ flags must be implemented and warnings removed LOG_WARNING(PPU, "MFVSCR"); - ppu.vr[op.vd] = v128::from32(0, 0, 0, ppu.sat | (u32{ppu.nj} << 16)); + ppu.vr[op.vd] = v128::from32(0, 0, 0, u32{ppu.sat} | (u32{ppu.nj} << 16)); return true; } @@ -2777,7 +2777,7 @@ bool ppu_interpreter::MFSPR(ppu_thread& ppu, ppu_opcode_t op) case 0x009: ppu.gpr[op.rd] = ppu.ctr; break; case 0x100: ppu.gpr[op.rd] = ppu.vrsave; break; - case 0x10C: ppu.gpr[op.rd] = get_timebased_time() & 0xffffffff; break; + case 0x10C: ppu.gpr[op.rd] = get_timebased_time(); break; case 0x10D: ppu.gpr[op.rd] = get_timebased_time() >> 32; break; default: fmt::throw_exception("MFSPR 0x%x" HERE, n); } @@ -2817,9 +2817,9 @@ bool ppu_interpreter::MFTB(ppu_thread& ppu, ppu_opcode_t op) switch (n) { - case 0x10C: ppu.gpr[op.rd] = get_timebased_time() & 0xffffffff; break; + case 0x10C: ppu.gpr[op.rd] = get_timebased_time(); break; case 0x10D: ppu.gpr[op.rd] = get_timebased_time() >> 32; break; - default: fmt::throw_exception("MFSPR 0x%x" HERE, n); + default: fmt::throw_exception("MFTB 0x%x" HERE, n); } return true; @@ -3326,7 +3326,7 @@ bool ppu_interpreter::EXTSB(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::STFIWX(ppu_thread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; - vm::write32(vm::cast(addr, HERE), (u32&)ppu.fpr[op.frs]); + vm::write32(vm::cast(addr, HERE), (u32)(u64&)ppu.fpr[op.frs]); return true; } @@ -3602,9 +3602,7 @@ bool ppu_interpreter::FSQRTS(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FRES(ppu_thread& ppu, ppu_opcode_t op) { - f32 value = f32(ppu.fpr[op.frb]); - _mm_store_ss(&value, _mm_rcp_ss(_mm_load_ss(&value))); - ppu.fpr[op.frd] = value; + ppu.fpr[op.frd] = f32(1.0 / ppu.fpr[op.frb]); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); return true; } @@ -3708,14 +3706,14 @@ bool ppu_interpreter::FRSP(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FCTIW(ppu_thread& ppu, ppu_opcode_t op) { - (s32&)ppu.fpr[op.frd] = s32(ppu.fpr[op.frb]); + (s64&)ppu.fpr[op.frd] = _mm_cvtsd_si32(_mm_load_sd(&ppu.fpr[op.frb])); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); return true; } bool ppu_interpreter::FCTIWZ(ppu_thread& ppu, ppu_opcode_t op) { - (s32&)ppu.fpr[op.frd] = _mm_cvttsd_si32(_mm_load_sd(&ppu.fpr[op.frb])); + (s64&)ppu.fpr[op.frd] = _mm_cvttsd_si32(_mm_load_sd(&ppu.fpr[op.frb])); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); return true; } @@ -3764,9 +3762,7 @@ bool ppu_interpreter::FMUL(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FRSQRTE(ppu_thread& ppu, ppu_opcode_t op) { - f32 value = f32(ppu.fpr[op.frb]); - _mm_store_ss(&value, _mm_rsqrt_ss(_mm_load_ss(&value))); - ppu.fpr[op.frd] = value; + ppu.fpr[op.frd] = 1.0 / std::sqrt(ppu.fpr[op.frb]); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); return true; } @@ -3835,7 +3831,7 @@ bool ppu_interpreter::FABS(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FCTID(ppu_thread& ppu, ppu_opcode_t op) { - (s64&)ppu.fpr[op.frd] = s64(ppu.fpr[op.frb]); + (s64&)ppu.fpr[op.frd] = _mm_cvtsd_si64(_mm_load_sd(&ppu.fpr[op.frb])); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); return true; } @@ -3849,7 +3845,7 @@ bool ppu_interpreter::FCTIDZ(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FCFID(ppu_thread& ppu, ppu_opcode_t op) { - ppu.fpr[op.frd] = static_cast((s64&)ppu.fpr[op.frb]); + _mm_store_sd(&ppu.fpr[op.frd], _mm_cvtsi64_sd(_mm_setzero_pd(), (s64&)ppu.fpr[op.frb])); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); return true; } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 1ef028f68d..3f6147514a 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -43,6 +43,8 @@ #include "Modules/cellMsgDialog.h" #endif +#include + extern u64 get_system_time(); namespace vm { using namespace ps3; } @@ -121,7 +123,7 @@ extern thread_local std::string(*g_tls_log_prefix)(); void ppu_thread::cpu_task() { - //SetHostRoundingMode(FPSCR_RN_NEAR); + std::fesetround(FE_TONEAREST); // Execute cmd_queue while (cmd64 cmd = cmd_wait()) @@ -538,7 +540,7 @@ static void ppu_initialize() { "__trace", (u64)&ppu_trace }, { "__hlecall", (u64)&ppu_execute_function }, { "__syscall", (u64)&ppu_execute_syscall }, - { "__get_tbl", (u64)&get_timebased_time }, + { "__get_tb", (u64)&get_timebased_time }, { "__lwarx", (u64)&ppu_lwarx }, { "__ldarx", (u64)&ppu_ldarx }, { "__stwcx", (u64)&ppu_stwcx }, diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 5f309092e2..210272d652 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -2768,10 +2768,10 @@ void PPUTranslator::MFSPR(ppu_opcode_t op) result = ZExt(m_ir->CreateLoad(m_reg_vrsave)); break; case 0x10C: // MFTB - result = ZExt(Call(GetType(), m_pure_attr, "__get_tbl")); + result = Call(GetType(), m_pure_attr, "__get_tb"); break; case 0x10D: // MFTBU - result = ZExt(Call(GetType(), m_pure_attr, "__get_tbh")); + result = m_ir->CreateLShr(Call(GetType(), m_pure_attr, "__get_tb"), 32); break; default: result = Call(GetType(), fmt::format("__mfspr_%u", n)); @@ -2802,7 +2802,21 @@ void PPUTranslator::LVXL(ppu_opcode_t op) void PPUTranslator::MFTB(ppu_opcode_t op) { - return MFSPR(op); + Value* result; + switch (const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5)) + { + case 0x10C: // MFTB + result = Call(GetType(), m_pure_attr, "__get_tb"); + break; + case 0x10D: // MFTBU + result = m_ir->CreateLShr(Call(GetType(), m_pure_attr, "__get_tb"), 32); + break; + default: + result = Call(GetType(), fmt::format("__mftb_%u", n)); + break; + } + + SetGpr(op.rd, result); } void PPUTranslator::LWAUX(ppu_opcode_t op) @@ -3966,8 +3980,8 @@ Value* PPUTranslator::GetFpr(u32 r, u32 bits, bool as_int) void PPUTranslator::SetFpr(u32 r, Value* val) { const auto f64_val = - val->getType() == GetType() ? m_ir->CreateBitCast(ZExt(val), GetType()) : - val->getType() == GetType() ? m_ir->CreateBitCast(val, GetType()) : + val->getType() == GetType() ? m_ir->CreateBitCast(SExt(val), GetType()) : + val->getType() == GetType() ? m_ir->CreateBitCast(val, GetType()) : val->getType() == GetType() ? m_ir->CreateFPExt(val, GetType()) : val; m_ir->CreateAlignedStore(f64_val, m_fpr[r], 8);