PPU fixes

This commit is contained in:
Nekotekina 2017-02-07 16:14:44 +03:00
parent ead67d8e67
commit 47b4c649f1
3 changed files with 34 additions and 22 deletions

View File

@ -294,7 +294,7 @@ bool ppu_interpreter::MFVSCR(ppu_thread& ppu, ppu_opcode_t op)
{ {
// TODO: In precise interpreter, SAT and NJ flags must be implemented and warnings removed // TODO: In precise interpreter, SAT and NJ flags must be implemented and warnings removed
LOG_WARNING(PPU, "MFVSCR"); LOG_WARNING(PPU, "MFVSCR");
ppu.vr[op.vd] = v128::from32(0, 0, 0, ppu.sat | (u32{ppu.nj} << 16)); ppu.vr[op.vd] = v128::from32(0, 0, 0, u32{ppu.sat} | (u32{ppu.nj} << 16));
return true; return true;
} }
@ -2777,7 +2777,7 @@ bool ppu_interpreter::MFSPR(ppu_thread& ppu, ppu_opcode_t op)
case 0x009: ppu.gpr[op.rd] = ppu.ctr; break; case 0x009: ppu.gpr[op.rd] = ppu.ctr; break;
case 0x100: ppu.gpr[op.rd] = ppu.vrsave; break; case 0x100: ppu.gpr[op.rd] = ppu.vrsave; break;
case 0x10C: ppu.gpr[op.rd] = get_timebased_time() & 0xffffffff; break; case 0x10C: ppu.gpr[op.rd] = get_timebased_time(); break;
case 0x10D: ppu.gpr[op.rd] = get_timebased_time() >> 32; break; case 0x10D: ppu.gpr[op.rd] = get_timebased_time() >> 32; break;
default: fmt::throw_exception("MFSPR 0x%x" HERE, n); default: fmt::throw_exception("MFSPR 0x%x" HERE, n);
} }
@ -2817,9 +2817,9 @@ bool ppu_interpreter::MFTB(ppu_thread& ppu, ppu_opcode_t op)
switch (n) switch (n)
{ {
case 0x10C: ppu.gpr[op.rd] = get_timebased_time() & 0xffffffff; break; case 0x10C: ppu.gpr[op.rd] = get_timebased_time(); break;
case 0x10D: ppu.gpr[op.rd] = get_timebased_time() >> 32; break; case 0x10D: ppu.gpr[op.rd] = get_timebased_time() >> 32; break;
default: fmt::throw_exception("MFSPR 0x%x" HERE, n); default: fmt::throw_exception("MFTB 0x%x" HERE, n);
} }
return true; return true;
@ -3326,7 +3326,7 @@ bool ppu_interpreter::EXTSB(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::STFIWX(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::STFIWX(ppu_thread& ppu, ppu_opcode_t op)
{ {
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
vm::write32(vm::cast(addr, HERE), (u32&)ppu.fpr[op.frs]); vm::write32(vm::cast(addr, HERE), (u32)(u64&)ppu.fpr[op.frs]);
return true; return true;
} }
@ -3602,9 +3602,7 @@ bool ppu_interpreter::FSQRTS(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::FRES(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FRES(ppu_thread& ppu, ppu_opcode_t op)
{ {
f32 value = f32(ppu.fpr[op.frb]); ppu.fpr[op.frd] = f32(1.0 / ppu.fpr[op.frb]);
_mm_store_ss(&value, _mm_rcp_ss(_mm_load_ss(&value)));
ppu.fpr[op.frd] = value;
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true; return true;
} }
@ -3708,14 +3706,14 @@ bool ppu_interpreter::FRSP(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::FCTIW(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FCTIW(ppu_thread& ppu, ppu_opcode_t op)
{ {
(s32&)ppu.fpr[op.frd] = s32(ppu.fpr[op.frb]); (s64&)ppu.fpr[op.frd] = _mm_cvtsd_si32(_mm_load_sd(&ppu.fpr[op.frb]));
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true; return true;
} }
bool ppu_interpreter::FCTIWZ(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FCTIWZ(ppu_thread& ppu, ppu_opcode_t op)
{ {
(s32&)ppu.fpr[op.frd] = _mm_cvttsd_si32(_mm_load_sd(&ppu.fpr[op.frb])); (s64&)ppu.fpr[op.frd] = _mm_cvttsd_si32(_mm_load_sd(&ppu.fpr[op.frb]));
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true; return true;
} }
@ -3764,9 +3762,7 @@ bool ppu_interpreter::FMUL(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::FRSQRTE(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FRSQRTE(ppu_thread& ppu, ppu_opcode_t op)
{ {
f32 value = f32(ppu.fpr[op.frb]); ppu.fpr[op.frd] = 1.0 / std::sqrt(ppu.fpr[op.frb]);
_mm_store_ss(&value, _mm_rsqrt_ss(_mm_load_ss(&value)));
ppu.fpr[op.frd] = value;
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true; return true;
} }
@ -3835,7 +3831,7 @@ bool ppu_interpreter::FABS(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::FCTID(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FCTID(ppu_thread& ppu, ppu_opcode_t op)
{ {
(s64&)ppu.fpr[op.frd] = s64(ppu.fpr[op.frb]); (s64&)ppu.fpr[op.frd] = _mm_cvtsd_si64(_mm_load_sd(&ppu.fpr[op.frb]));
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true; return true;
} }
@ -3849,7 +3845,7 @@ bool ppu_interpreter::FCTIDZ(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::FCFID(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FCFID(ppu_thread& ppu, ppu_opcode_t op)
{ {
ppu.fpr[op.frd] = static_cast<double>((s64&)ppu.fpr[op.frb]); _mm_store_sd(&ppu.fpr[op.frd], _mm_cvtsi64_sd(_mm_setzero_pd(), (s64&)ppu.fpr[op.frb]));
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true; return true;
} }

View File

@ -43,6 +43,8 @@
#include "Modules/cellMsgDialog.h" #include "Modules/cellMsgDialog.h"
#endif #endif
#include <cfenv>
extern u64 get_system_time(); extern u64 get_system_time();
namespace vm { using namespace ps3; } namespace vm { using namespace ps3; }
@ -121,7 +123,7 @@ extern thread_local std::string(*g_tls_log_prefix)();
void ppu_thread::cpu_task() void ppu_thread::cpu_task()
{ {
//SetHostRoundingMode(FPSCR_RN_NEAR); std::fesetround(FE_TONEAREST);
// Execute cmd_queue // Execute cmd_queue
while (cmd64 cmd = cmd_wait()) while (cmd64 cmd = cmd_wait())
@ -538,7 +540,7 @@ static void ppu_initialize()
{ "__trace", (u64)&ppu_trace }, { "__trace", (u64)&ppu_trace },
{ "__hlecall", (u64)&ppu_execute_function }, { "__hlecall", (u64)&ppu_execute_function },
{ "__syscall", (u64)&ppu_execute_syscall }, { "__syscall", (u64)&ppu_execute_syscall },
{ "__get_tbl", (u64)&get_timebased_time }, { "__get_tb", (u64)&get_timebased_time },
{ "__lwarx", (u64)&ppu_lwarx }, { "__lwarx", (u64)&ppu_lwarx },
{ "__ldarx", (u64)&ppu_ldarx }, { "__ldarx", (u64)&ppu_ldarx },
{ "__stwcx", (u64)&ppu_stwcx }, { "__stwcx", (u64)&ppu_stwcx },

View File

@ -2768,10 +2768,10 @@ void PPUTranslator::MFSPR(ppu_opcode_t op)
result = ZExt(m_ir->CreateLoad(m_reg_vrsave)); result = ZExt(m_ir->CreateLoad(m_reg_vrsave));
break; break;
case 0x10C: // MFTB case 0x10C: // MFTB
result = ZExt(Call(GetType<u32>(), m_pure_attr, "__get_tbl")); result = Call(GetType<u64>(), m_pure_attr, "__get_tb");
break; break;
case 0x10D: // MFTBU case 0x10D: // MFTBU
result = ZExt(Call(GetType<u32>(), m_pure_attr, "__get_tbh")); result = m_ir->CreateLShr(Call(GetType<u64>(), m_pure_attr, "__get_tb"), 32);
break; break;
default: default:
result = Call(GetType<u64>(), fmt::format("__mfspr_%u", n)); result = Call(GetType<u64>(), fmt::format("__mfspr_%u", n));
@ -2802,7 +2802,21 @@ void PPUTranslator::LVXL(ppu_opcode_t op)
void PPUTranslator::MFTB(ppu_opcode_t op) void PPUTranslator::MFTB(ppu_opcode_t op)
{ {
return MFSPR(op); Value* result;
switch (const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5))
{
case 0x10C: // MFTB
result = Call(GetType<u64>(), m_pure_attr, "__get_tb");
break;
case 0x10D: // MFTBU
result = m_ir->CreateLShr(Call(GetType<u64>(), m_pure_attr, "__get_tb"), 32);
break;
default:
result = Call(GetType<u64>(), fmt::format("__mftb_%u", n));
break;
}
SetGpr(op.rd, result);
} }
void PPUTranslator::LWAUX(ppu_opcode_t op) void PPUTranslator::LWAUX(ppu_opcode_t op)
@ -3966,8 +3980,8 @@ Value* PPUTranslator::GetFpr(u32 r, u32 bits, bool as_int)
void PPUTranslator::SetFpr(u32 r, Value* val) void PPUTranslator::SetFpr(u32 r, Value* val)
{ {
const auto f64_val = const auto f64_val =
val->getType() == GetType<u32>() ? m_ir->CreateBitCast(ZExt(val), GetType<f64>()) : val->getType() == GetType<s32>() ? m_ir->CreateBitCast(SExt(val), GetType<f64>()) :
val->getType() == GetType<u64>() ? m_ir->CreateBitCast(val, GetType<f64>()) : val->getType() == GetType<s64>() ? m_ir->CreateBitCast(val, GetType<f64>()) :
val->getType() == GetType<f32>() ? m_ir->CreateFPExt(val, GetType<f64>()) : val; val->getType() == GetType<f32>() ? m_ir->CreateFPExt(val, GetType<f64>()) : val;
m_ir->CreateAlignedStore(f64_val, m_fpr[r], 8); m_ir->CreateAlignedStore(f64_val, m_fpr[r], 8);