PPU Precise/LLVM: Support NJ modes (#8617)

This commit is contained in:
Eladash 2020-07-25 09:41:41 +03:00 committed by GitHub
parent 3354c800d7
commit 917069e31a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 98 additions and 55 deletions

View File

@ -359,6 +359,8 @@ public:
}
const g_ppu_scale_table;
constexpr u32 ppu_inf_u32 = 0x7F800000u;
static const f32 ppu_inf_f32 = std::bit_cast<f32>(ppu_inf_u32);
constexpr u32 ppu_nan_u32 = 0x7FC00000u;
static const f32 ppu_nan_f32 = std::bit_cast<f32>(ppu_nan_u32);
static const v128 ppu_vec_nans = v128::from32p(ppu_nan_u32);
@ -403,6 +405,14 @@ v128 vec_handle_nan(__m128 result, Args... args)
return vec_handle_nan(v128::fromF(result), v128::fromF(args)...);
}
// Flush denormals to zero if NJ is 1
inline v128 vec_handle_denormal(ppu_thread& ppu, v128 a)
{
const auto mask = v128::from32p(ppu.jm_mask);
const auto nz = v128::fromV(_mm_srli_epi32(v128::eq32(mask & a, v128{}).vi, 1));
return v128::andnot(nz, a);
}
bool ppu_interpreter::MFVSCR(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.vr[op.vd] = v128::from32(0, 0, 0, u32{ppu.sat} | (u32{ppu.nj} << 16));
@ -414,6 +424,7 @@ bool ppu_interpreter::MTVSCR(ppu_thread& ppu, ppu_opcode_t op)
const u32 vscr = ppu.vr[op.vb]._u32[3];
ppu.sat = (vscr & 1) != 0;
ppu.nj = (vscr & 0x10000) != 0;
ppu.jm_mask = ppu.nj ? ppu_inf_u32 : 0x7fff'ffff;
return true;
}
@ -427,10 +438,10 @@ bool ppu_interpreter::VADDCUW(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::VADDFP(ppu_thread& ppu, ppu_opcode_t op)
{
const auto a = ppu.vr[op.va];
const auto b = ppu.vr[op.vb];
const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]);
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]);
const auto result = v128::addfs(a, b);
ppu.vr[op.vd] = vec_handle_nan(result, a, b);
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b));
return true;
}
@ -958,26 +969,26 @@ bool ppu_interpreter::VLOGEFP(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter_fast::VMADDFP(ppu_thread& ppu, ppu_opcode_t op)
{
const auto a = ppu.vr[op.va].vf;
const auto b = ppu.vr[op.vb].vf;
const auto c = ppu.vr[op.vc].vf;
const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]).vf;
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]).vf;
const auto c = vec_handle_denormal(ppu, ppu.vr[op.vc]).vf;
const auto result = _mm_add_ps(_mm_mul_ps(a, c), b);
ppu.vr[op.vd] = vec_handle_nan(result);
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result));
return true;
}
bool ppu_interpreter_precise::VMADDFP(ppu_thread& ppu, ppu_opcode_t op)
{
const auto a = ppu.vr[op.va];
const auto b = ppu.vr[op.vb];
const auto c = ppu.vr[op.vc];
ppu.vr[op.rd] = vec_handle_nan(v128::fma32f(a, c, b), a, b, c);
const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]);
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]);
const auto c = vec_handle_denormal(ppu, ppu.vr[op.vc]);
ppu.vr[op.rd] = vec_handle_denormal(ppu, vec_handle_nan(v128::fma32f(a, c, b), a, b, c));
return true;
}
bool ppu_interpreter::VMAXFP(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.vr[op.vd] = vec_handle_nan(_mm_max_ps(ppu.vr[op.va].vf, ppu.vr[op.vb].vf));
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(_mm_max_ps(ppu.vr[op.va].vf, ppu.vr[op.vb].vf)));
return true;
}
@ -1123,7 +1134,7 @@ bool ppu_interpreter::VMINFP(ppu_thread& ppu, ppu_opcode_t op)
const auto a = ppu.vr[op.va].vf;
const auto b = ppu.vr[op.vb].vf;
const auto result = _mm_or_ps(_mm_min_ps(a, b), _mm_min_ps(b, a));
ppu.vr[op.vd] = vec_handle_nan(result, a, b);
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b));
return true;
}
@ -1463,18 +1474,18 @@ bool ppu_interpreter_fast::VNMSUBFP(ppu_thread& ppu, ppu_opcode_t op)
const auto a = _mm_sub_ps(_mm_mul_ps(ppu.vr[op.va].vf, ppu.vr[op.vc].vf), ppu.vr[op.vb].vf);
const auto b = _mm_set1_ps(-0.0f);
const auto result = _mm_xor_ps(a, b);
ppu.vr[op.vd] = vec_handle_nan(result, a, b);
ppu.vr[op.vd] = vec_handle_nan(result);
return true;
}
bool ppu_interpreter_precise::VNMSUBFP(ppu_thread& ppu, ppu_opcode_t op)
{
const auto m = _mm_set1_ps(-0.0f);
const auto a = ppu.vr[op.va];
const auto c = ppu.vr[op.vc];
const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]);
const auto c = vec_handle_denormal(ppu, ppu.vr[op.vc]);
const auto b = v128::fromF(_mm_xor_ps(ppu.vr[op.vb].vf, m));
const auto r = v128::fromF(_mm_xor_ps(v128::fma32f(a, c, b).vf, m));
ppu.vr[op.rd] = vec_handle_nan(r, a, b, c);
ppu.vr[op.rd] = vec_handle_denormal(ppu, vec_handle_nan(r, a, b, c));
return true;
}
@ -1874,15 +1885,15 @@ bool ppu_interpreter_precise::VPKUWUS(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::VREFP(ppu_thread& ppu, ppu_opcode_t op)
{
const auto a = _mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f);
const auto b = ppu.vr[op.vb].vf;
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]).vf;
const auto result = _mm_div_ps(a, b);
ppu.vr[op.vd] = vec_handle_nan(result, a, b);
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b));
return true;
}
bool ppu_interpreter::VRFIM(ppu_thread& ppu, ppu_opcode_t op)
{
const auto b = ppu.vr[op.vb];
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]);
v128 d;
for (uint w = 0; w < 4; w++)
@ -1890,7 +1901,7 @@ bool ppu_interpreter::VRFIM(ppu_thread& ppu, ppu_opcode_t op)
d._f[w] = std::floor(b._f[w]);
}
ppu.vr[op.vd] = vec_handle_nan(d, b);
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(d, b));
return true;
}
@ -1904,13 +1915,13 @@ bool ppu_interpreter::VRFIN(ppu_thread& ppu, ppu_opcode_t op)
d._f[w] = std::nearbyint(b._f[w]);
}
ppu.vr[op.vd] = vec_handle_nan(d, b);
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(d, b));
return true;
}
bool ppu_interpreter::VRFIP(ppu_thread& ppu, ppu_opcode_t op)
{
const auto b = ppu.vr[op.vb];
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]);
v128 d;
for (uint w = 0; w < 4; w++)
@ -1918,7 +1929,7 @@ bool ppu_interpreter::VRFIP(ppu_thread& ppu, ppu_opcode_t op)
d._f[w] = std::ceil(b._f[w]);
}
ppu.vr[op.vd] = vec_handle_nan(d, b);
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(d, b));
return true;
}
@ -1932,7 +1943,7 @@ bool ppu_interpreter::VRFIZ(ppu_thread& ppu, ppu_opcode_t op)
d._f[w] = std::truncf(b._f[w]);
}
ppu.vr[op.vd] = vec_handle_nan(d, b);
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(d, b));
return true;
}
@ -1978,9 +1989,9 @@ bool ppu_interpreter::VRLW(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::VRSQRTEFP(ppu_thread& ppu, ppu_opcode_t op)
{
const auto a = _mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f);
const auto b = ppu.vr[op.vb].vf;
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]).vf;
const auto result = _mm_div_ps(a, _mm_sqrt_ps(b));
ppu.vr[op.vd] = vec_handle_nan(result, a, b);
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b));
return true;
}
@ -2277,10 +2288,10 @@ bool ppu_interpreter::VSUBCUW(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::VSUBFP(ppu_thread& ppu, ppu_opcode_t op)
{
const auto a = ppu.vr[op.va];
const auto b = ppu.vr[op.vb];
const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]);
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]);
const auto result = v128::subfs(a, b);
ppu.vr[op.vd] = vec_handle_nan(result, a, b);
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b));
return true;
}

View File

@ -1613,6 +1613,7 @@ extern void ppu_initialize(const ppu_module& info)
non_win32,
accurate_fma,
accurate_ppu_vector_nan,
java_mode_handling,
__bitset_enum_max
};
@ -1630,6 +1631,10 @@ extern void ppu_initialize(const ppu_module& info)
{
settings += ppu_settings::accurate_ppu_vector_nan;
}
if (g_cfg.core.llvm_ppu_jm_handling)
{
settings += ppu_settings::java_mode_handling;
}
// Write version, hash, CPU, settings
fmt::append(obj_name, "v3-tane-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));

View File

@ -186,7 +186,10 @@ public:
exception, the corresponding element in the target vr is cleared to '0'. In both cases, the '0'
has the same sign as the denormalized or underflowing value.
*/
bool nj = false;
bool nj = true;
// Optimization: precomputed java-mode mask for handling denormals
u32 jm_mask = 0x7f80'0000;
u32 raddr{0}; // Reservation addr
u64 rtime{0};

View File

@ -46,6 +46,8 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
thread_struct.insert(thread_struct.end(), 3, GetType<bool>()); // so, ov, ca
thread_struct.insert(thread_struct.end(), 1, GetType<u8>()); // cnt
thread_struct.insert(thread_struct.end(), 2, GetType<bool>()); // sat, nj
thread_struct.emplace_back(ArrayType::get(GetType<char>(), 2)); // Padding
thread_struct.insert(thread_struct.end(), 1, GetType<u32>()); // jm_mask
m_thread_type = StructType::create(m_context, thread_struct, "context_t");
@ -231,6 +233,25 @@ Value* PPUTranslator::VecHandleNan(Value* val)
return val;
}
Value* PPUTranslator::VecHandleDenormal(Value* val)
{
const auto type = val->getType();
const auto value = type == GetType<u32[4]>() ? val : m_ir->CreateBitCast(val, GetType<u32[4]>());
const auto mask = SExt(m_ir->CreateICmpEQ(m_ir->CreateAnd(value, Broadcast(RegLoad(m_jm_mask), 4)), ConstantVector::getSplat(4, m_ir->getInt32(0))), GetType<s32[4]>());
const auto nz = m_ir->CreateLShr(mask, 1);
const auto result = m_ir->CreateAnd(m_ir->CreateNot(nz), value);
return type == GetType<u32[4]>() ? result : m_ir->CreateBitCast(result, type);
}
Value* PPUTranslator::VecHandleResult(Value* val)
{
val = g_cfg.core.llvm_ppu_accurate_vector_nan ? VecHandleNan(val) : val;
val = g_cfg.core.llvm_ppu_jm_handling ? VecHandleDenormal(val) : val;
return val;
}
Value* PPUTranslator::GetAddr(u64 _add)
{
if (m_reloc)
@ -609,7 +630,9 @@ void PPUTranslator::MFVSCR(ppu_opcode_t op)
void PPUTranslator::MTVSCR(ppu_opcode_t op)
{
const auto vscr = m_ir->CreateExtractElement(GetVr(op.vb, VrType::vi32), m_ir->getInt32(m_is_be ? 3 : 0));
RegStore(Trunc(m_ir->CreateLShr(vscr, 16), GetType<bool>()), m_nj);
const auto nj = Trunc(m_ir->CreateLShr(vscr, 16), GetType<bool>());
RegStore(nj, m_nj);
if (g_cfg.core.llvm_ppu_jm_handling) RegStore(m_ir->CreateSelect(nj, m_ir->getInt32(0x7f80'0000), m_ir->getInt32(0x7fff'ffff)), m_jm_mask);
RegStore(Trunc(vscr, GetType<bool>()), m_sat);
}
@ -625,7 +648,7 @@ void PPUTranslator::VADDFP(ppu_opcode_t op)
const auto a = get_vr<f32[4]>(op.va);
const auto b = get_vr<f32[4]>(op.vb);
set_vr(op.vd, vec_handle_nan(a + b));
set_vr(op.vd, vec_handle_result(a + b));
}
void PPUTranslator::VADDSBS(ppu_opcode_t op)
@ -930,7 +953,7 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op)
if (data == v128{})
{
set_vr(op.vd, vec_handle_nan(a * c));
set_vr(op.vd, vec_handle_result(a * c));
ppu_log.notice("LLVM: VMADDFP with 0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
return;
}
@ -938,7 +961,7 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op)
if (m_use_fma)
{
SetVr(op.vd, VecHandleNan(m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), { a.value, c.value, b.value })));
SetVr(op.vd, VecHandleResult(m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), { a.value, c.value, b.value })));
return;
}
@ -948,13 +971,13 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op)
const auto xc = m_ir->CreateFPExt(c.value, get_type<f64[4]>());
const auto xr = m_ir->CreateCall(get_intrinsic<f64[4]>(llvm::Intrinsic::fmuladd), {xa, xc, xb});
SetVr(op.vd, VecHandleNan(m_ir->CreateFPTrunc(xr, get_type<f32[4]>())));
SetVr(op.vd, VecHandleResult(m_ir->CreateFPTrunc(xr, get_type<f32[4]>())));
}
void PPUTranslator::VMAXFP(ppu_opcode_t op)
{
const auto ab = GetVrs(VrType::vf, op.va, op.vb);
SetVr(op.vd, VecHandleNan(m_ir->CreateSelect(m_ir->CreateFCmpOGT(ab[0], ab[1]), ab[0], ab[1])));
SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOGT(ab[0], ab[1]), ab[0], ab[1])));
}
void PPUTranslator::VMAXSB(ppu_opcode_t op)
@ -1026,7 +1049,7 @@ void PPUTranslator::VMHRADDSHS(ppu_opcode_t op)
void PPUTranslator::VMINFP(ppu_opcode_t op)
{
const auto ab = GetVrs(VrType::vf, op.va, op.vb);
SetVr(op.vd, VecHandleNan(m_ir->CreateSelect(m_ir->CreateFCmpOLT(ab[0], ab[1]), ab[0], ab[1])));
SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOLT(ab[0], ab[1]), ab[0], ab[1])));
}
void PPUTranslator::VMINSB(ppu_opcode_t op)
@ -1236,7 +1259,7 @@ void PPUTranslator::VNMSUBFP(ppu_opcode_t op)
if (data == v128{})
{
set_vr(op.vd, vec_handle_nan(-a * c));
set_vr(op.vd, vec_handle_result(-a * c));
ppu_log.notice("LLVM: VNMSUBFP with 0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
return;
}
@ -1245,7 +1268,7 @@ void PPUTranslator::VNMSUBFP(ppu_opcode_t op)
// Differs from the emulated path with regards to negative zero
if (m_use_fma)
{
SetVr(op.vd, VecHandleNan(m_ir->CreateFNeg(m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), { a.value, c.value, m_ir->CreateFNeg(b.value) }))));
SetVr(op.vd, VecHandleResult(m_ir->CreateFNeg(m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), { a.value, c.value, m_ir->CreateFNeg(b.value) }))));
return;
}
@ -1255,7 +1278,7 @@ void PPUTranslator::VNMSUBFP(ppu_opcode_t op)
const auto xc = m_ir->CreateFPExt(c.value, get_type<f64[4]>());
const auto xr = m_ir->CreateFNeg(m_ir->CreateFSub(m_ir->CreateFMul(xa, xc), xb));
SetVr(op.vd, VecHandleNan(m_ir->CreateFPTrunc(xr, get_type<f32[4]>())));
SetVr(op.vd, VecHandleResult(m_ir->CreateFPTrunc(xr, get_type<f32[4]>())));
}
void PPUTranslator::VNOR(ppu_opcode_t op)
@ -1361,28 +1384,28 @@ void PPUTranslator::VPKUWUS(ppu_opcode_t op)
void PPUTranslator::VREFP(ppu_opcode_t op)
{
const auto result = VecHandleNan(m_ir->CreateFDiv(ConstantVector::getSplat(4, ConstantFP::get(GetType<f32>(), 1.0)), GetVr(op.vb, VrType::vf)));
const auto result = VecHandleResult(m_ir->CreateFDiv(ConstantVector::getSplat(4, ConstantFP::get(GetType<f32>(), 1.0)), GetVr(op.vb, VrType::vf)));
SetVr(op.vd, result);
}
void PPUTranslator::VRFIM(ppu_opcode_t op)
{
SetVr(op.vd, VecHandleNan(Call(GetType<f32[4]>(), "llvm.floor.v4f32", GetVr(op.vb, VrType::vf))));
SetVr(op.vd, VecHandleResult(Call(GetType<f32[4]>(), "llvm.floor.v4f32", GetVr(op.vb, VrType::vf))));
}
void PPUTranslator::VRFIN(ppu_opcode_t op)
{
SetVr(op.vd, VecHandleNan(Call(GetType<f32[4]>(), "llvm.nearbyint.v4f32", GetVr(op.vb, VrType::vf))));
SetVr(op.vd, VecHandleResult(Call(GetType<f32[4]>(), "llvm.nearbyint.v4f32", GetVr(op.vb, VrType::vf))));
}
void PPUTranslator::VRFIP(ppu_opcode_t op)
{
SetVr(op.vd, VecHandleNan(Call(GetType<f32[4]>(), "llvm.ceil.v4f32", GetVr(op.vb, VrType::vf))));
SetVr(op.vd, VecHandleResult(Call(GetType<f32[4]>(), "llvm.ceil.v4f32", GetVr(op.vb, VrType::vf))));
}
void PPUTranslator::VRFIZ(ppu_opcode_t op)
{
SetVr(op.vd, VecHandleNan(Call(GetType<f32[4]>(), "llvm.trunc.v4f32", GetVr(op.vb, VrType::vf))));
SetVr(op.vd, VecHandleResult(Call(GetType<f32[4]>(), "llvm.trunc.v4f32", GetVr(op.vb, VrType::vf))));
}
void PPUTranslator::VRLB(ppu_opcode_t op)
@ -1407,7 +1430,7 @@ void PPUTranslator::VRSQRTEFP(ppu_opcode_t op)
{
const auto result = m_ir->CreateFDiv(ConstantVector::getSplat(4, ConstantFP::get(GetType<f32>(), 1.0)), Call(GetType<f32[4]>(), "llvm.sqrt.v4f32", GetVr(op.vb, VrType::vf)));
SetVr(op.vd, VecHandleNan(result));
SetVr(op.vd, VecHandleResult(result));
}
void PPUTranslator::VSEL(ppu_opcode_t op)
@ -1565,7 +1588,7 @@ void PPUTranslator::VSUBFP(ppu_opcode_t op)
{
const auto a = get_vr<f32[4]>(op.va);
const auto b = get_vr<f32[4]>(op.vb);
SetVr(op.vd, VecHandleNan(eval(a - b).eval(m_ir)));
SetVr(op.vd, VecHandleResult(eval(a - b).eval(m_ir)));
}
void PPUTranslator::VSUBSBS(ppu_opcode_t op)

View File

@ -52,9 +52,9 @@ class PPUTranslator final : public cpu_translator
llvm::Value* m_mtocr_table{};
llvm::Value* m_globals[173];
llvm::Value* m_globals[175];
llvm::Value** const m_g_cr = m_globals + 99;
llvm::Value* m_locals[173];
llvm::Value* m_locals[175];
llvm::Value** const m_gpr = m_locals + 3;
llvm::Value** const m_fpr = m_locals + 35;
llvm::Value** const m_vr = m_locals + 67;
@ -77,6 +77,7 @@ class PPUTranslator final : public cpu_translator
DEF_VALUE(m_cnt, m_g_cnt, 170) // XER.CNT
DEF_VALUE(m_sat, m_g_sat, 171) // VSCR.SAT bit, sticky saturation flag
DEF_VALUE(m_nj, m_g_nj, 172) // VSCR.NJ bit, non-Java mode
DEF_VALUE(m_jm_mask, m_g_jm_mask, 174) // Java-Mode helper mask
#undef DEF_VALUE
public:
@ -102,15 +103,14 @@ public:
}
llvm::Value* VecHandleNan(llvm::Value* val);
llvm::Value* VecHandleDenormal(llvm::Value* val);
llvm::Value* VecHandleResult(llvm::Value* val);
template <typename T>
auto vec_handle_nan(T&& expr)
auto vec_handle_result(T&& expr)
{
value_t<typename T::type> result;
if (g_cfg.core.llvm_ppu_accurate_vector_nan)
result.value = VecHandleNan(expr.eval(m_ir));
else
result.value = expr.eval(m_ir);
result.value = VecHandleResult(expr.eval(m_ir));
return result;
}

View File

@ -52,6 +52,7 @@ struct cfg_root : cfg::node
cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false };
cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true };
cfg::_bool llvm_accurate_dfma{ this, "LLVM Accurate DFMA", true }; // Enable accurate double-precision FMA for CPUs which do not support it natively
cfg::_bool llvm_ppu_jm_handling{ this, "PPU LLVM Java Mode Handling", false }; // Respect current Java Mode for alti-vec ops by PPU LLVM
cfg::_bool llvm_ppu_accurate_vector_nan{ this, "PPU LLVM Accurate Vector NaN values", false };
cfg::_int<-64, 64> stub_ppu_traps{ this, "Stub PPU Traps", 0, true }; // Hack, skip PPU traps for rare cases where the trap is continueable (specify relative instructions to skip)