Merge xfloat options

This commit is contained in:
Megamouse 2023-07-23 09:09:24 +02:00 committed by Elad Ashkenazi
parent e6009794ee
commit 343ba8733b
7 changed files with 70 additions and 62 deletions

View File

@ -5316,7 +5316,7 @@ public:
if (src > 0x40000)
{
// Use the xfloat hint to create 256-bit (4x double) PHI
llvm::Type* type = g_cfg.core.spu_accurate_xfloat && bb.reg_maybe_xf[i] ? get_type<f64[4]>() : get_reg_type(i);
llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type<f64[4]>() : get_reg_type(i);
const auto _phi = m_ir->CreatePHI(type, ::size32(bb.preds), fmt::format("phi0x%05x_r%u", baddr, i));
m_block->phi[i] = _phi;
@ -8876,7 +8876,7 @@ public:
void FREST(spu_opcode_t op)
{
// TODO
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto a = get_vr<f32[4]>(op.ra);
const auto mask_ov = sext<s32[4]>(bitcast<s32[4]>(fabs(a)) > splat<s32[4]>(0x7e7fffff));
@ -8885,7 +8885,7 @@ public:
return;
}
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
register_intrinsic("spu_frest", [&](llvm::CallInst* ci)
{
@ -8918,13 +8918,13 @@ public:
void FRSQEST(spu_opcode_t op)
{
// TODO
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, fsplat<f64[4]>(1.0) / fsqrt(fabs(get_vr<f64[4]>(op.ra))));
return;
}
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci)
{
@ -8956,7 +8956,7 @@ public:
void FCGT(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, sext<s32[4]>(fcmp_ord(get_vr<f64[4]>(op.ra) > get_vr<f64[4]>(op.rb))));
return;
@ -9003,7 +9003,7 @@ public:
return eval(sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
}
if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed)
{
const auto ai = eval(bitcast<s32[4]>(a));
const auto bi = eval(bitcast<s32[4]>(b));
@ -9034,7 +9034,7 @@ public:
void FCMGT(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, sext<s32[4]>(fcmp_ord(fabs(get_vr<f64[4]>(op.ra)) > fabs(get_vr<f64[4]>(op.rb)))));
return;
@ -9080,7 +9080,7 @@ public:
return eval(sext<s32[4]>(mai > mbi));
}
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
return eval(sext<s32[4]>(fcmp_uno(ma > mb) & (mai > mbi)));
}
@ -9101,7 +9101,7 @@ public:
void FA(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, get_vr<f64[4]>(op.ra) + get_vr<f64[4]>(op.rb));
return;
@ -9126,7 +9126,7 @@ public:
void FS(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, get_vr<f64[4]>(op.ra) - get_vr<f64[4]>(op.rb));
return;
@ -9137,7 +9137,7 @@ public:
const auto a = value<f32[4]>(ci->getOperand(0));
const auto b = value<f32[4]>(ci->getOperand(1));
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
const auto bc = clamp_smax(b); // for #4478
return eval(a - bc);
@ -9159,7 +9159,7 @@ public:
void FM(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, get_vr<f64[4]>(op.ra) * get_vr<f64[4]>(op.rb));
return;
@ -9170,7 +9170,7 @@ public:
const auto a = value<f32[4]>(ci->getOperand(0));
const auto b = value<f32[4]>(ci->getOperand(1));
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
if (a.value == b.value)
{
@ -9206,7 +9206,7 @@ public:
void FESD(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto r = zshuffle(get_vr<f64[4]>(op.ra), 1, 3);
const auto d = bitcast<s64[2]>(r);
@ -9236,7 +9236,7 @@ public:
void FRDS(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto r = get_vr<f64[2]>(op.ra);
const auto d = bitcast<s64[2]>(r);
@ -9267,7 +9267,7 @@ public:
void FCEQ(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, sext<s32[4]>(fcmp_ord(get_vr<f64[4]>(op.ra) == get_vr<f64[4]>(op.rb))));
return;
@ -9320,7 +9320,7 @@ public:
return eval(sext<s32[4]>(bitcast<s32[4]>(a) == bitcast<s32[4]>(b)));
}
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
return eval(sext<s32[4]>(fcmp_ord(a == b)) | sext<s32[4]>(bitcast<s32[4]>(a) == bitcast<s32[4]>(b)));
}
@ -9341,7 +9341,7 @@ public:
void FCMEQ(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, sext<s32[4]>(fcmp_ord(fabs(get_vr<f64[4]>(op.ra)) == fabs(get_vr<f64[4]>(op.rb)))));
return;
@ -9397,7 +9397,7 @@ public:
return eval(sext<s32[4]>(bitcast<s32[4]>(fa) == bitcast<s32[4]>(fb)));
}
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
return eval(sext<s32[4]>(fcmp_ord(fa == fb)) | sext<s32[4]>(bitcast<s32[4]>(fa) == bitcast<s32[4]>(fb)));
}
@ -9490,7 +9490,7 @@ public:
void FNMS(spu_opcode_t op)
{
// See FMA.
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
set_vr(op.rt4, fmuladd(-a, b, c));
@ -9503,7 +9503,7 @@ public:
const auto b = value<f32[4]>(ci->getOperand(1));
const auto c = value<f32[4]>(ci->getOperand(2));
if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed)
{
return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c);
}
@ -9525,7 +9525,7 @@ public:
void FMA(spu_opcode_t op)
{
// Hardware FMA produces the same result as multiple + add on the limited double range (xfloat).
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
set_vr(op.rt4, fmuladd(a, b, c));
@ -9538,7 +9538,7 @@ public:
const auto b = value<f32[4]>(ci->getOperand(1));
const auto c = value<f32[4]>(ci->getOperand(2));
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
const auto ma = sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)));
const auto mb = sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)));
@ -9599,7 +9599,7 @@ public:
void FMS(spu_opcode_t op)
{
// See FMA.
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
set_vr(op.rt4, fmuladd(a, b, -c));
@ -9612,7 +9612,7 @@ public:
const auto b = value<f32[4]>(ci->getOperand(1));
const auto c = value<f32[4]>(ci->getOperand(2));
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
return fma32x4(clamp_smax(a), clamp_smax(b), eval(-c));
}
@ -9646,7 +9646,7 @@ public:
void FI(spu_opcode_t op)
{
// TODO
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, get_vr<f64[4]>(op.rb));
// const auto [a, b] = get_vrs<f64[4]>(op.ra, op.rb);
@ -9674,7 +9674,7 @@ public:
return bitcast<f32[4]>((b & 0xff800000u) | (bitcast<u32[4]>(fpcast<f32[4]>(bnew)) & ~0xff800000u)); // Inject old sign and exponent
});
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
register_intrinsic("spu_re", [&](llvm::CallInst* ci)
{
@ -9733,7 +9733,7 @@ public:
void CFLTS(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
value_t<f64[4]> a = get_vr<f64[4]>(op.ra);
value_t<f64[4]> s;
@ -9807,7 +9807,7 @@ public:
void CFLTU(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
value_t<f64[4]> a = get_vr<f64[4]>(op.ra);
value_t<f64[4]> s;
@ -9890,7 +9890,7 @@ public:
void CSFLT(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
value_t<s32[4]> a = get_vr<s32[4]>(op.ra);
value_t<f64[4]> r;
@ -9930,7 +9930,7 @@ public:
void CUFLT(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
value_t<s32[4]> a = get_vr<s32[4]>(op.ra);
value_t<f64[4]> r;

View File

@ -66,9 +66,7 @@ struct cfg_root : cfg::node
cfg::uint<0, 10000> mfc_transfers_timeout{ this, "MFC Commands Timeout", 0, true };
cfg::_bool mfc_shuffling_in_steps{ this, "MFC Commands Shuffling In Steps", false, true };
cfg::_enum<tsx_usage> enable_TSX{ this, "Enable TSX", enable_tsx_by_default() ? tsx_usage::enabled : tsx_usage::disabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully
cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false };
cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true };
cfg::_bool spu_relaxed_xfloat{ this, "Relaxed xfloat", true }; // Approximate accuracy for only the "FCGT", "FNMS", "FREST" AND "FRSQEST" instructions
cfg::_enum<xfloat_accuracy> spu_xfloat_accuracy{ this, "XFloat Accuracy", xfloat_accuracy::approximate, false };
cfg::_int<-1, 14> ppu_128_reservations_loop_max_length{ this, "Accurate PPU 128-byte Reservation Op Max Length", 0, true }; // -1: Always accurate, 0: Never accurate, 1-14: max accurate loop length
cfg::_int<-64, 64> stub_ppu_traps{ this, "Stub PPU Traps", 0, true }; // Hack, skip PPU traps for rare cases where the trap is continueable (specify relative instructions to skip)
cfg::_bool full_width_avx512{ this, "Full Width AVX-512", true };

View File

@ -664,3 +664,20 @@ void fmt_class_string<output_scaling_mode>::format(std::string& out, u64 arg)
return unknown;
});
}
template <>
void fmt_class_string<xfloat_accuracy>::format(std::string& out, u64 arg)
{
format_enum(out, arg, [](xfloat_accuracy value)
{
switch (value)
{
case xfloat_accuracy::accurate: return "Accurate";
case xfloat_accuracy::approximate: return "Approximate";
case xfloat_accuracy::relaxed: return "Relaxed";
case xfloat_accuracy::inaccurate: return "Inaccurate";
}
return unknown;
});
}

View File

@ -320,3 +320,11 @@ enum class stereo_render_mode_options
side_by_side,
over_under
};
enum class xfloat_accuracy
{
accurate,
approximate,
relaxed, // Approximate accuracy for only the "FCGT", "FNMS", "FREST" AND "FRSQEST" instructions
inaccurate
};

View File

@ -1283,6 +1283,14 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
case midi_device_type::keyboard: return tr("Keyboard", "Midi Device Type");
}
break;
case emu_settings_type::XFloatAccuracy:
switch (static_cast<xfloat_accuracy>(index))
{
case xfloat_accuracy::accurate: return tr("Accurate XFloat");
case xfloat_accuracy::approximate: return tr("Approximate XFloat");
case xfloat_accuracy::relaxed: return tr("Relaxed XFloat");
case xfloat_accuracy::inaccurate: return tr("Inaccurate XFloat");
}
default:
break;
}

View File

@ -26,8 +26,7 @@ enum class emu_settings_type
AccurateClineStores,
AccurateRSXAccess,
FIFOAccuracy,
AccurateXFloat,
ApproximateXFloat,
XFloatAccuracy,
AccuratePPU128Loop,
MFCCommandsShuffling,
NumPPUThreads,
@ -212,8 +211,7 @@ inline static const QMap<emu_settings_type, cfg_location> settings_location =
{ emu_settings_type::AccurateClineStores, { "Core", "Accurate Cache Line Stores"}},
{ emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}},
{ emu_settings_type::FIFOAccuracy, { "Core", "RSX FIFO Accuracy"}},
{ emu_settings_type::AccurateXFloat, { "Core", "Accurate xfloat"}},
{ emu_settings_type::ApproximateXFloat, { "Core", "Approximate xfloat"}},
{ emu_settings_type::XFloatAccuracy, { "Core", "XFloat Accuracy"}},
{ emu_settings_type::MFCCommandsShuffling, { "Core", "MFC Commands Shuffling Limit"}},
{ emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}},
{ emu_settings_type::SPUBlockSize, { "Core", "SPU Block Size"}},

View File

@ -265,30 +265,9 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
SubscribeTooltip(ui->spuLoopDetection, tooltips.settings.spu_loop_detection);
// Comboboxes
m_emu_settings->EnhanceComboBox(ui->xfloatAccuracy, emu_settings_type::XFloatAccuracy);
SubscribeTooltip(ui->gb_xfloat_accuracy, tooltips.settings.xfloat);
ui->xfloatAccuracy->addItem(tr("Accurate XFloat"));
ui->xfloatAccuracy->addItem(tr("Approximate XFloat"));
ui->xfloatAccuracy->addItem(tr("Relaxed XFloat"));
connect(ui->xfloatAccuracy, QOverload<int>::of(&QComboBox::currentIndexChanged), this, [this](int index)
{
if (index < 0) return;
m_emu_settings->SetSetting(emu_settings_type::AccurateXFloat, index == 0 ? "true" : "false");
m_emu_settings->SetSetting(emu_settings_type::ApproximateXFloat, index == 1 ? "true" : "false");
});
connect(m_emu_settings.get(), &emu_settings::RestoreDefaultsSignal, this, [this]()
{
ui->xfloatAccuracy->setCurrentIndex(1);
});
if (m_emu_settings->GetSetting(emu_settings_type::AccurateXFloat) == "true")
ui->xfloatAccuracy->setCurrentIndex(0);
else if (m_emu_settings->GetSetting(emu_settings_type::ApproximateXFloat) == "true")
ui->xfloatAccuracy->setCurrentIndex(1);
else
ui->xfloatAccuracy->setCurrentIndex(2);
remove_item(ui->xfloatAccuracy, static_cast<int>(xfloat_accuracy::inaccurate), static_cast<int>(g_cfg.core.spu_xfloat_accuracy.def));
m_emu_settings->EnhanceComboBox(ui->spuBlockSize, emu_settings_type::SPUBlockSize);
SubscribeTooltip(ui->gb_spuBlockSize, tooltips.settings.spu_block_size);