rsx: Nvidia driver compatibility workarounds

- Sanitize NaN values before they reach the driver. On nvidia (X * NaN = X)
This commit is contained in:
kd-11 2018-02-23 11:30:13 +03:00
parent 6b23e733d0
commit 705820c430
6 changed files with 35 additions and 8 deletions

View File

@ -305,7 +305,7 @@ public:
return 0; return 0;
} }
void fill_fragment_constants_buffer(gsl::span<f32, gsl::dynamic_range> dst_buffer, const RSXFragmentProgram &fragment_program) const void fill_fragment_constants_buffer(gsl::span<f32, gsl::dynamic_range> dst_buffer, const RSXFragmentProgram &fragment_program, bool sanitize = false) const
{ {
const auto I = m_fragment_shader_cache.find(fragment_program); const auto I = m_fragment_shader_cache.find(fragment_program);
if (I == m_fragment_shader_cache.end()) if (I == m_fragment_shader_cache.end())
@ -345,6 +345,12 @@ public:
} }
} }
} }
else if (sanitize)
{
//Lower NaNs to 0
const auto mask = _mm_cmpunord_ps((__m128&)shuffled_vector, _mm_set1_ps(1.f));
_mm_stream_si128((__m128i*)dst, (__m128i&)_mm_andnot_ps(mask, (__m128&)shuffled_vector));
}
else else
{ {
_mm_stream_si128((__m128i*)dst, shuffled_vector); _mm_stream_si128((__m128i*)dst, shuffled_vector);

View File

@ -1112,7 +1112,10 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
buf = static_cast<u8*>(mapping.first); buf = static_cast<u8*>(mapping.first);
fragment_constants_offset = mapping.second; fragment_constants_offset = mapping.second;
if (fragment_constants_size) if (fragment_constants_size)
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) }, fragment_program); {
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) },
fragment_program, gl::get_driver_caps().vendor_NVIDIA);
}
// Fragment state // Fragment state
fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program); fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program);

View File

@ -85,9 +85,9 @@ namespace gl
bool ARB_texture_barrier_supported = false; bool ARB_texture_barrier_supported = false;
bool NV_texture_barrier_supported = false; bool NV_texture_barrier_supported = false;
bool initialized = false; bool initialized = false;
bool vendor_INTEL = false; bool vendor_INTEL = false; //has broken GLSL compiler
bool vendor_AMD = false; bool vendor_AMD = false; //has broken ARB_multidraw
bool vendor_NVIDIA = false; bool vendor_NVIDIA = false; //has NaN poisoning issues
void initialize() void initialize()
{ {

View File

@ -2346,7 +2346,10 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
//Fragment constants //Fragment constants
buf = buf + 8192; buf = buf + 8192;
if (fragment_constants_sz) if (fragment_constants_sz)
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) }, fragment_program); {
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) },
fragment_program, vk::sanitize_fp_values());
}
fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program); fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program);

View File

@ -14,8 +14,11 @@ namespace vk
VkSampler g_null_sampler = nullptr; VkSampler g_null_sampler = nullptr;
atomic_t<bool> g_cb_no_interrupt_flag { false }; atomic_t<bool> g_cb_no_interrupt_flag { false };
atomic_t<bool> g_drv_no_primitive_restart_flag { false };
atomic_t<bool> g_drv_force_32bit_indices{ false }; //Driver compatibility workarounds
bool g_drv_no_primitive_restart_flag = false;
bool g_drv_force_32bit_indices = false;
bool g_drv_sanitize_fp_values = false;
u64 g_num_processed_frames = 0; u64 g_num_processed_frames = 0;
u64 g_num_total_frames = 0; u64 g_num_total_frames = 0;
@ -312,6 +315,12 @@ namespace vk
g_drv_force_32bit_indices = true; g_drv_force_32bit_indices = true;
} }
#endif #endif
//Nvidia cards are easily susceptible to NaN poisoning
if (gpu_name.find("NVIDIA") != std::string::npos || gpu_name.find("GeForce") != std::string::npos)
{
g_drv_sanitize_fp_values = true;
}
} }
bool emulate_primitive_restart() bool emulate_primitive_restart()
@ -324,6 +333,11 @@ namespace vk
return g_drv_force_32bit_indices; return g_drv_force_32bit_indices;
} }
bool sanitize_fp_values()
{
return g_drv_sanitize_fp_values;
}
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range) void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range)
{ {
//Prepare an image to match the new layout.. //Prepare an image to match the new layout..

View File

@ -72,6 +72,7 @@ namespace vk
//Compatibility workarounds //Compatibility workarounds
bool emulate_primitive_restart(); bool emulate_primitive_restart();
bool force_32bit_index_buffer(); bool force_32bit_index_buffer();
bool sanitize_fp_values();
VkComponentMapping default_component_map(); VkComponentMapping default_component_map();
VkImageSubresource default_image_subresource(); VkImageSubresource default_image_subresource();