diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index cc2f25a2cb..5d54317402 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -313,5 +313,15 @@ namespace rsx } } } + + void sort(std::predicate auto predicate) + { + if (_size < 2) + { + return; + } + + std::sort(begin(), end(), predicate); + } }; } diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 9098a1356e..b92e90af4c 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -48,48 +48,17 @@ void GLFragmentDecompilerThread::insertHeader(std::stringstream & OS) void GLFragmentDecompilerThread::insertInputs(std::stringstream & OS) { - for (const ParamType& PT : m_parr.params[PF_PARAM_IN]) - { - for (const ParamItem& PI : PT.items) + glsl::insert_fragment_shader_inputs_block( + OS, + glsl::extension_type::NV, + m_prog, + m_parr.params[PF_PARAM_IN], { - //ssa is defined in the program body and is not a varying type - if (PI.name == "ssa") continue; - - const auto reg_location = gl::get_varying_register_location(PI.name); - std::string var_name = PI.name; - - if (var_name == "fogc") - { - var_name = "fog_c"; - } - else if (m_prog.two_sided_lighting) - { - if (var_name == "diff_color") - { - var_name = "diff_color0"; - } - else if (var_name == "spec_color") - { - var_name = "spec_color0"; - } - } - - OS << "layout(location=" << reg_location << ") in vec4 " << var_name << ";\n"; - } - } - - if (m_prog.two_sided_lighting) - { - if (properties.in_register_mask & in_diff_color) - { - OS << "layout(location=" << gl::get_varying_register_location("diff_color1") << ") in vec4 diff_color1;\n"; - } - - if (properties.in_register_mask & in_spec_color) - { - OS << "layout(location=" << gl::get_varying_register_location("spec_color1") << ") in vec4 spec_color1;\n"; - } - } + .two_sided_color = !!(properties.in_register_mask & in_diff_color), + .two_sided_specular = !!(properties.in_register_mask & in_spec_color) + }, + gl::get_varying_register_location + ); } void GLFragmentDecompilerThread::insertOutputs(std::stringstream & OS) @@ -211,7 +180,7 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.require_linear_to_srgb = properties.has_pkg; m_shader_props.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none; m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare; - m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA; + m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA && !(m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION); m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA; m_shader_props.supports_native_fp16 = device_props.has_native_half_support; m_shader_props.ROP_output_rounding = ::gl::get_driver_caps().vendor_NVIDIA; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 1477321ea6..d33c35b471 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -145,6 +145,14 @@ void GLGSRender::on_init_thread() } } + if (gl_caps.NV_fragment_shader_barycentric_supported && + gl_caps.vendor_NVIDIA && + g_cfg.video.shader_precision != gpu_preset_level::low) + { + // NVIDIA's attribute interpolation requires some workarounds + backend_config.supports_normalized_barycentrics = false; + } + // Use industry standard resource alignment values as defaults m_uniform_buffer_offset_align = 256; m_min_texbuffer_alignment = 256; diff --git a/rpcs3/Emu/RSX/GL/glutils/capabilities.cpp b/rpcs3/Emu/RSX/GL/glutils/capabilities.cpp index 4f5d7bc05a..df92d7b920 100644 --- a/rpcs3/Emu/RSX/GL/glutils/capabilities.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/capabilities.cpp @@ -33,7 +33,7 @@ namespace gl void capabilities::initialize() { - int find_count = 15; + int find_count = 16; int ext_count = 0; glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count); @@ -157,6 +157,13 @@ namespace gl find_count--; continue; } + + if (check(ext_name, "GL_NV_fragment_shader_barycentric")) + { + NV_fragment_shader_barycentric_supported = true; + find_count--; + continue; + } } // Set GLSL version diff --git a/rpcs3/Emu/RSX/GL/glutils/capabilities.h b/rpcs3/Emu/RSX/GL/glutils/capabilities.h index 4e8a3a0183..5ef9eb0260 100644 --- a/rpcs3/Emu/RSX/GL/glutils/capabilities.h +++ b/rpcs3/Emu/RSX/GL/glutils/capabilities.h @@ -38,6 +38,7 @@ namespace gl bool AMD_gpu_shader_half_float_supported = false; bool ARB_compute_shader_supported = false; bool NV_depth_buffer_float_supported = false; + bool NV_fragment_shader_barycentric_supported = false; bool vendor_INTEL = false; // has broken GLSL compiler bool vendor_AMD = false; // has broken ARB_multidraw diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp index e71f5111d1..1f6231abcc 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp @@ -1,7 +1,9 @@ #include "stdafx.h" #include "Utilities/StrFmt.h" +#include "../Common/simple_array.hpp" #include "GLSLCommon.h" +#include "RSXFragmentProgram.h" namespace program_common { @@ -1140,4 +1142,103 @@ namespace glsl " uint flags;\n" "};\n\n"; } + + void insert_fragment_shader_inputs_block( + std::stringstream& OS, + const RSXFragmentProgram& prog, + const std::vector& params, + const two_sided_lighting_config& _2sided_lighting, + std::function varying_location) + { + struct _varying_register_config + { + int location; + std::string name; + std::string type; + }; + + rsx::simple_array<_varying_register_config> varying_list; + + for (const ParamType& PT : params) + { + for (const ParamItem& PI : PT.items) + { + // ssa is defined in the program body and is not a varying type + if (PI.name == "ssa") continue; + + const auto reg_location = varying_location(PI.name); + std::string var_name = PI.name; + + if (var_name == "fogc") + { + var_name = "fog_c"; + } + else if (prog.two_sided_lighting) + { + if (var_name == "diff_color") + { + var_name = "diff_color0"; + } + else if (var_name == "spec_color") + { + var_name = "spec_color0"; + } + } + + varying_list.push_back({ reg_location, var_name, PT.type }); + } + } + + if (prog.two_sided_lighting) + { + if (_2sided_lighting.two_sided_color) + { + varying_list.push_back({ varying_location("diff_color1"), "diff_color1", "vec4" }); + } + + if (_2sided_lighting.two_sided_specular) + { + varying_list.push_back({ varying_location("spec_color1"), "spec_color1", "vec4" }); + } + } + + if (varying_list.empty()) + { + return; + } + + // Make the output a little nicer + varying_list.sort(FN(x.location < y.location)); + + if (!(prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION)) + { + for (const auto& reg : varying_list) + { + OS << "layout(location=" << reg.location << ") in " << reg.type << " " << reg.name << ";\n"; + } + + return; + } + + for (const auto& reg : varying_list) + { + OS << "layout(location=" << reg.location << ") pervertexNV in " << reg.type << " " << reg.name << "_raw[3];\n"; + } + + // Interpolate the input attributes manually. + // Matches AMD behavior where gl_BaryCoordSmoothAMD only provides x and y with z being autogenerated. + OS << + "vec4 _interpolate_varying3(const in vec4[3] v)\n" + "{\n" + " const BaryCoord_z = 1.0 - (gl_BaryCoordNV.x + gl_BaryCoordNV.y);\n" + " return gl_BaryCoordNV.x * v[0] + gl_BaryCoordNV.y * v[1] + BaryCoord_z * v[2];\n" + "}\n\n"; + + for (const auto& reg : varying_list) + { + OS << "vec4 " << reg.name << " = _interpolate_varying3(" << reg.name << "_raw);\n"; + } + + OS << "\n"; + } } diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.h b/rpcs3/Emu/RSX/Program/GLSLCommon.h index 7c6ff28e5a..b257ca5c3d 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.h @@ -4,6 +4,8 @@ #include "GLSLTypes.h" #include "ShaderParam.h" +class RSXFragmentProgram; + namespace rsx { // TODO: Move this somewhere else once more compilers are supported other than glsl @@ -82,6 +84,20 @@ namespace program_common namespace glsl { + struct two_sided_lighting_config + { + bool two_sided_color; + bool two_sided_specular; + }; + + struct extension_type + { + static constexpr std::string_view + EXT = "EXT", + KHR = "KHR", + NV = "NV"; + }; + std::string getFloatTypeNameImpl(usz elementCount); std::string getHalfTypeNameImpl(usz elementCount); std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1, bool scalar = false); @@ -92,4 +108,12 @@ namespace glsl void insert_fog_declaration(std::ostream& OS); std::string getFunctionImpl(FUNCTION f); void insert_subheader_block(std::ostream& OS); + + void insert_fragment_shader_inputs_block( + std::stringstream& OS, + const std::string_view bary_coords_extenstion_type, + const RSXFragmentProgram& prog, + const std::vector& params, + const two_sided_lighting_config& _2sided_lighting, + std::function varying_location); } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index f470b7997f..f5e9ac9423 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1947,7 +1947,7 @@ namespace rsx ensure(!(m_graphics_state & rsx::pipeline_state::vertex_program_ucode_dirty)); current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask(); - current_vertex_program.ctrl = rsx::method_registers.current_draw_clause.classify_mode() == primitive_class::polygon ? RSX_SHADER_CONTROL_POLYGON_RASTER : 0; + current_vertex_program.ctrl = 0; // Reserved for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { @@ -2157,7 +2157,7 @@ namespace rsx if (method_registers.current_draw_clause.classify_mode() == primitive_class::polygon) { - current_fragment_program.ctrl |= RSX_SHADER_CONTROL_POLYGON_RASTER; + current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION; } else if (method_registers.point_sprite_enabled() && method_registers.current_draw_clause.primitive == primitive_type::points) diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index eba99f6d59..aeffffda4b 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -44,6 +44,12 @@ void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS) required_extensions.emplace_back("GL_ARB_shader_texture_image_samples"); } + if (m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION) + { + version = std::max(version, 450); + required_extensions.emplace_back("GL_EXT_fragment_shader_barycentric"); + } + OS << "#version " << version << "\n"; for (const auto ext : required_extensions) { @@ -57,48 +63,17 @@ void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS) void VKFragmentDecompilerThread::insertInputs(std::stringstream & OS) { - for (const ParamType& PT : m_parr.params[PF_PARAM_IN]) - { - for (const ParamItem& PI : PT.items) + glsl::insert_fragment_shader_inputs_block( + OS, + glsl::extension_type::EXT, + m_prog, + m_parr.params[PF_PARAM_IN], { - //ssa is defined in the program body and is not a varying type - if (PI.name == "ssa") continue; - - const auto reg_location = vk::get_varying_register_location(PI.name); - std::string var_name = PI.name; - - if (var_name == "fogc") - { - var_name = "fog_c"; - } - else if (m_prog.two_sided_lighting) - { - if (var_name == "diff_color") - { - var_name = "diff_color0"; - } - else if (var_name == "spec_color") - { - var_name = "spec_color0"; - } - } - - OS << "layout(location=" << reg_location << ") in " << PT.type << " " << var_name << ";\n"; - } - } - - if (m_prog.two_sided_lighting) - { - if (properties.in_register_mask & in_diff_color) - { - OS << "layout(location=" << vk::get_varying_register_location("diff_color1") << ") in vec4 diff_color1;\n"; - } - - if (properties.in_register_mask & in_spec_color) - { - OS << "layout(location=" << vk::get_varying_register_location("spec_color1") << ") in vec4 spec_color1;\n"; - } - } + .two_sided_color = !!(properties.in_register_mask & in_diff_color), + .two_sided_specular = !!(properties.in_register_mask & in_spec_color) + }, + vk::get_varying_register_location + ); } void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS) @@ -270,7 +245,7 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.require_linear_to_srgb = properties.has_pkg; m_shader_props.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none; m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare; - m_shader_props.low_precision_tests = device_props.has_low_precision_rounding; + m_shader_props.low_precision_tests = device_props.has_low_precision_rounding && !(m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION); m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA; m_shader_props.supports_native_fp16 = device_props.has_native_half_support; m_shader_props.ROP_output_rounding = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index b495b66c09..06ca2e75bb 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -539,8 +539,11 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) backend_config.supports_multidraw = true; - // NVIDIA has broken barycentric interpolation - backend_config.supports_normalized_barycentrics = (vk::get_driver_vendor() != vk::driver_vendor::NVIDIA); + // NVIDIA has broken attribute interpolation + backend_config.supports_normalized_barycentrics = ( + vk::get_driver_vendor() != vk::driver_vendor::NVIDIA && + m_device->get_barycoords_support() && + g_cfg.video.shader_precision != gpu_preset_level::low); // NOTE: We do not actually need multiple sample support for A2C to work // This is here for visual consistency - will be removed when AA problems due to mipmaps are fixed diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index 3b3f872907..3446dfbd85 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -36,6 +36,7 @@ namespace vk VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{}; VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_info{}; VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT fbo_loops_info{}; + VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR shader_barycentric_info{}; if (device_extensions.is_supported(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { @@ -65,6 +66,13 @@ namespace vk features2.pNext = &fbo_loops_info; } + if (device_extensions.is_supported(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME)) + { + shader_barycentric_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_KHR; + shader_barycentric_info.pNext = features2.pNext; + features2.pNext = &shader_barycentric_info; + } + auto _vkGetPhysicalDeviceFeatures2KHR = reinterpret_cast(vkGetInstanceProcAddr(parent, "vkGetPhysicalDeviceFeatures2KHR")); ensure(_vkGetPhysicalDeviceFeatures2KHR); // "vkGetInstanceProcAddress failed to find entry point!" _vkGetPhysicalDeviceFeatures2KHR(dev, &features2); @@ -73,6 +81,7 @@ namespace vk shader_types_support.allow_float16 = !!shader_support_info.shaderFloat16; shader_types_support.allow_int8 = !!shader_support_info.shaderInt8; framebuffer_loops_support = !!fbo_loops_info.attachmentFeedbackLoopLayout; + barycoords_support = !!shader_barycentric_info.fragmentShaderBarycentric; features = features2.features; if (descriptor_indexing_support) diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.h b/rpcs3/Emu/RSX/VK/vkutils/device.h index f4b5bac8d4..09ca7e6c63 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.h +++ b/rpcs3/Emu/RSX/VK/vkutils/device.h @@ -63,6 +63,7 @@ namespace vk bool sampler_mirror_clamped_support : 1 = false; bool descriptor_indexing_support : 1 = false; bool framebuffer_loops_support : 1 = false; + bool barycoords_support : 1 = false; u32 descriptor_max_draw_calls = DESCRIPTOR_MAX_DRAW_CALLS; u64 descriptor_update_after_bind_mask = 0; @@ -151,6 +152,7 @@ namespace vk bool get_debug_utils_support() const { return g_cfg.video.renderdoc_compatiblity && pgpu->debug_utils_support; } bool get_descriptor_indexing_support() const { return pgpu->descriptor_indexing_support; } bool get_framebuffer_loops_support() const { return pgpu->framebuffer_loops_support; } + bool get_barycoords_support() const { return pgpu->barycoords_support; } u64 get_descriptor_update_after_bind_support() const { return pgpu->descriptor_update_after_bind_mask; } u32 get_descriptor_max_draw_calls() const { return pgpu->descriptor_max_draw_calls; } diff --git a/rpcs3/Emu/RSX/gcm_enums.h b/rpcs3/Emu/RSX/gcm_enums.h index dfdb342ece..7cc313703d 100644 --- a/rpcs3/Emu/RSX/gcm_enums.h +++ b/rpcs3/Emu/RSX/gcm_enums.h @@ -781,7 +781,7 @@ enum RSX_SHADER_CONTROL_UNKNOWN1 = 0x8000, // seemingly set when srgb packer is used?? // Custom - RSX_SHADER_CONTROL_POLYGON_RASTER = 0x10000 // Rasterizing triangles and not lines or points + RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x10000 // Rasterizing triangles and not lines or points }; // GCM Reports