From 705820c430fc5ed0df61406dd2889cf825945a12 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 23 Feb 2018 11:30:13 +0300 Subject: [PATCH] rsx: Nvidia driver compatibility workarounds - Sanitize NaN values before they reach the driver. On nvidia (X * NaN = X) --- rpcs3/Emu/RSX/Common/ProgramStateCache.h | 8 +++++++- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 5 ++++- rpcs3/Emu/RSX/GL/GLHelpers.h | 6 +++--- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 5 ++++- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 18 ++++++++++++++++-- rpcs3/Emu/RSX/VK/VKHelpers.h | 1 + 6 files changed, 35 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.h b/rpcs3/Emu/RSX/Common/ProgramStateCache.h index 9cb4077272..3fb7689384 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.h @@ -305,7 +305,7 @@ public: return 0; } - void fill_fragment_constants_buffer(gsl::span dst_buffer, const RSXFragmentProgram &fragment_program) const + void fill_fragment_constants_buffer(gsl::span dst_buffer, const RSXFragmentProgram &fragment_program, bool sanitize = false) const { const auto I = m_fragment_shader_cache.find(fragment_program); if (I == m_fragment_shader_cache.end()) @@ -345,6 +345,12 @@ public: } } } + else if (sanitize) + { + //Lower NaNs to 0 + const auto mask = _mm_cmpunord_ps((__m128&)shuffled_vector, _mm_set1_ps(1.f)); + _mm_stream_si128((__m128i*)dst, (__m128i&)_mm_andnot_ps(mask, (__m128&)shuffled_vector)); + } else { _mm_stream_si128((__m128i*)dst, shuffled_vector); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index d151e9ffc7..2c27924e02 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1112,7 +1112,10 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info) buf = static_cast(mapping.first); fragment_constants_offset = mapping.second; if (fragment_constants_size) - m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast(buf), gsl::narrow(fragment_constants_size) }, fragment_program); + { + m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast(buf), gsl::narrow(fragment_constants_size) }, + fragment_program, gl::get_driver_caps().vendor_NVIDIA); + } // Fragment state fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program); diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index d06d11528e..ce356ebb66 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -85,9 +85,9 @@ namespace gl bool ARB_texture_barrier_supported = false; bool NV_texture_barrier_supported = false; bool initialized = false; - bool vendor_INTEL = false; - bool vendor_AMD = false; - bool vendor_NVIDIA = false; + bool vendor_INTEL = false; //has broken GLSL compiler + bool vendor_AMD = false; //has broken ARB_multidraw + bool vendor_NVIDIA = false; //has NaN poisoning issues void initialize() { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index ea153b694d..9483bc2c5e 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2346,7 +2346,10 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info) //Fragment constants buf = buf + 8192; if (fragment_constants_sz) - m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast(buf), ::narrow(fragment_constants_sz) }, fragment_program); + { + m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast(buf), ::narrow(fragment_constants_sz) }, + fragment_program, vk::sanitize_fp_values()); + } fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 902b19891d..18e465dec2 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -14,8 +14,11 @@ namespace vk VkSampler g_null_sampler = nullptr; atomic_t g_cb_no_interrupt_flag { false }; - atomic_t g_drv_no_primitive_restart_flag { false }; - atomic_t g_drv_force_32bit_indices{ false }; + + //Driver compatibility workarounds + bool g_drv_no_primitive_restart_flag = false; + bool g_drv_force_32bit_indices = false; + bool g_drv_sanitize_fp_values = false; u64 g_num_processed_frames = 0; u64 g_num_total_frames = 0; @@ -312,6 +315,12 @@ namespace vk g_drv_force_32bit_indices = true; } #endif + + //Nvidia cards are easily susceptible to NaN poisoning + if (gpu_name.find("NVIDIA") != std::string::npos || gpu_name.find("GeForce") != std::string::npos) + { + g_drv_sanitize_fp_values = true; + } } bool emulate_primitive_restart() @@ -324,6 +333,11 @@ namespace vk return g_drv_force_32bit_indices; } + bool sanitize_fp_values() + { + return g_drv_sanitize_fp_values; + } + void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range) { //Prepare an image to match the new layout.. diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 69a67d3f50..36ef6b69fa 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -72,6 +72,7 @@ namespace vk //Compatibility workarounds bool emulate_primitive_restart(); bool force_32bit_index_buffer(); + bool sanitize_fp_values(); VkComponentMapping default_component_map(); VkImageSubresource default_image_subresource();