ppu: Improve FCTIW, FCTIWZ, FCTID and FCTIDZ

This commit is contained in:
Eladash 2019-07-31 20:35:08 +03:00 committed by Ivan
parent d48d424b19
commit a560498cd4
2 changed files with 26 additions and 16 deletions

View File

@ -4735,14 +4735,18 @@ bool ppu_interpreter::FRSP(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::FCTIW(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(_mm_cvtsd_si32(_mm_load_sd(&ppu.fpr[op.frb])));
const f64 b = ppu.fpr[op.frb];
const s32 res = b >= f64(INT32_MAX) ? INT32_MAX : _mm_cvtsd_si32(_mm_load_sd(&b));
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(res);
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true;
}
bool ppu_interpreter::FCTIWZ(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(_mm_cvttsd_si32(_mm_load_sd(&ppu.fpr[op.frb])));
const auto b = _mm_load_sd(&ppu.fpr[op.frb]);
const auto res = _mm_xor_si128(_mm_cvttpd_epi32(b), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(0x80000000))));
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(_mm_extract_epi32(res, 0));
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true;
}
@ -4859,14 +4863,18 @@ bool ppu_interpreter::FABS(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::FCTID(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(_mm_cvtsd_si64(_mm_load_sd(&ppu.fpr[op.frb])));
const auto b = _mm_load_sd(&ppu.fpr[op.frb]);
const auto res = _mm_xor_si128(_mm_set1_epi64x(_mm_cvtsd_si64(b)), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(f64(1ull << 63)))));
ppu.fpr[op.frd] = std::bit_cast<f64>(_mm_extract_epi64(res, 0));
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true;
}
bool ppu_interpreter::FCTIDZ(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(_mm_cvttsd_si64(_mm_load_sd(&ppu.fpr[op.frb])));
const auto b = _mm_load_sd(&ppu.fpr[op.frb]);
const auto res = _mm_xor_si128(_mm_set1_epi64x(_mm_cvtsd_si64(b)), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(f64(1ull << 63)))));
ppu.fpr[op.frd] = std::bit_cast<f64>(_mm_extract_epi64(res, 0));
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true;
}

View File

@ -4004,11 +4004,8 @@ void PPUTranslator::FRSP(ppu_opcode_t op)
void PPUTranslator::FCTIW(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
//const auto sat_l = m_ir->CreateFCmpULT(b, ConstantFP::get(GetType<f64>(), -std::pow(2, 31))); // TODO ???
//const auto sat_h = m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::pow(2, 31)));
//const auto converted = m_ir->CreateFPToSI(FP_SAT_OP(sat_l, b), GetType<s64>());
//SetFpr(op.frd, m_ir->CreateSelect(sat_h, m_ir->getInt64(0x7fffffff), converted));
SetFpr(op.frd, Call(GetType<s32>(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, uint64_t{0})));
SetFpr(op.frd, m_ir->CreateSelect(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), f64(INT32_MAX))), m_ir->getInt32(INT32_MAX),
Call(GetType<s32>(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fctiw_get_fr", b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fctiw_get_fi", b));
@ -4021,7 +4018,10 @@ void PPUTranslator::FCTIW(ppu_opcode_t op)
void PPUTranslator::FCTIWZ(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
SetFpr(op.frd, Call(GetType<s32>(), "llvm.x86.sse2.cvttsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, uint64_t{0})));
const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::exp2l(31.))), GetType<s32>());
// fix result saturation (0x80000000 -> 0x7fffffff)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s32>(), "llvm.x86.sse2.cvttsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
}
void PPUTranslator::FDIV(ppu_opcode_t op)
@ -4248,11 +4248,10 @@ void PPUTranslator::FABS(ppu_opcode_t op)
void PPUTranslator::FCTID(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
//const auto sat_l = m_ir->CreateFCmpULT(b, ConstantFP::get(GetType<f64>(), -std::pow(2, 63)));
//const auto sat_h = m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::pow(2, 63)));
//const auto converted = m_ir->CreateFPToSI(FP_SAT_OP(sat_l, b), GetType<s64>());
//SetFpr(op.frd, m_ir->CreateSelect(sat_h, m_ir->getInt64(0x7fffffffffffffff), converted));
SetFpr(op.frd, Call(GetType<s64>(), "llvm.x86.sse2.cvtsd2si64", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, uint64_t{0})));
const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::exp2l(31.))), GetType<s64>());
// fix result saturation (0x8000000000000000 -> 0x7fffffffffffffff)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s64>(), "llvm.x86.sse2.cvtsd2si64", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fctid_get_fr", b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fctid_get_fi", b));
@ -4265,7 +4264,10 @@ void PPUTranslator::FCTID(ppu_opcode_t op)
void PPUTranslator::FCTIDZ(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
SetFpr(op.frd, Call(GetType<s64>(), "llvm.x86.sse2.cvttsd2si64", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, uint64_t{0})));
const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::exp2l(31.))), GetType<s64>());
// fix result saturation (0x8000000000000000 -> 0x7fffffffffffffff)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s64>(), "llvm.x86.sse2.cvttsd2si64", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
}
void PPUTranslator::FCFID(ppu_opcode_t op)