rsx: Add floor workaround for GPUs with rounding issues

- Mainly affects nvidia where x/w * w can sometimes return a value smaller than x.
  In such conditions, floor(x) will return x-1 if x is an integer which is horribly wrong and exaggerates minor precision drift to great proportions.
This commit is contained in:
kd-11 2021-06-09 01:37:59 +03:00 committed by kd-11
parent 2d3fe7ce1c
commit 20bd723e7c
4 changed files with 20 additions and 3 deletions

View File

@ -357,6 +357,7 @@ void GLFragmentProgram::Decompile(const RSXFragmentProgram& prog)
{
const auto driver_caps = gl::get_driver_caps();
decompiler.device_props.has_native_half_support = driver_caps.NV_gpu_shader5_supported || driver_caps.AMD_gpu_shader_half_float_supported;
decompiler.device_props.has_low_precision_rounding = driver_caps.vendor_NVIDIA;
}
decompiler.Task();

View File

@ -828,7 +828,21 @@ std::string FragmentProgramDecompiler::BuildCode()
"#define _builtin_sqrt(x) sqrt(abs(x))\n"
"#define _builtin_rcp(x) (1. / x)\n"
"#define _builtin_rsq(x) (1. / _builtin_sqrt(x))\n"
"#define _builtin_div(x, y) (x / y)\n\n";
"#define _builtin_div(x, y) (x / y)\n";
if (device_props.has_low_precision_rounding)
{
// NVIDIA has terrible rounding errors interpolating constant values across vertices with different w
// PS3 games blindly rely on interpolating a constant to not change the values
// Calling floor/equality will fail randomly causing a moire pattern
OS <<
"#define _builtin_floor(x) floor(x + 0.000001)\n\n";
}
else
{
OS <<
"#define _builtin_floor floor\n\n";
}
if (properties.has_pkg)
{
@ -954,7 +968,7 @@ bool FragmentProgramDecompiler::handle_sct_scb(u32 opcode)
case RSX_FP_OPCODE_DST: SetDst("$Ty(1.0, $0.y * $1.y, $0.z, $1.w)", OPFLAGS::op_extern); return true;
case RSX_FP_OPCODE_REFL: SetDst(getFunction(FUNCTION::FUNCTION_REFL), OPFLAGS::op_extern); return true;
case RSX_FP_OPCODE_EX2: SetDst("exp2($0.xxxx)"); return true;
case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); return true;
case RSX_FP_OPCODE_FLR: SetDst("_builtin_floor($0)"); return true;
case RSX_FP_OPCODE_FRC: SetDst(getFunction(FUNCTION::FUNCTION_FRACT)); return true;
case RSX_FP_OPCODE_LIT:
SetDst("_builtin_lit($0)");

View File

@ -294,6 +294,7 @@ public:
{
bool has_native_half_support = false;
bool emulate_depth_compare = false;
bool has_low_precision_rounding = false;
}
device_props;

View File

@ -248,7 +248,7 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
m_shader_props.require_linear_to_srgb = properties.has_pkg;
m_shader_props.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none;
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
m_shader_props.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;
m_shader_props.low_precision_tests = device_props.has_low_precision_rounding;
m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA;
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
@ -402,6 +402,7 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog)
}
decompiler.device_props.emulate_depth_compare = !pdev->get_formats_support().d24_unorm_s8;
decompiler.device_props.has_low_precision_rounding = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;
decompiler.Task();
shader.create(::glsl::program_domain::glsl_fragment_program, source);