forked from ShuriZma/suyu
1
0
Fork 0

emit_spirv: Workaround VK_KHR_shader_float_controls on fp16 Nvidia

Fix regression on Fire Emblem: Three Houses when using native fp16.
This commit is contained in:
ReinUsesLisp 2021-06-29 18:42:17 -03:00 committed by ameerj
parent 1b27a2b597
commit 8722668b3c
4 changed files with 12 additions and 5 deletions

View File

@ -319,7 +319,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit
Id main_func) { Id main_func) {
const Info& info{program.info}; const Info& info{program.info};
if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
LOG_WARNING(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
} else if (info.uses_fp32_denorms_flush) { } else if (info.uses_fp32_denorms_flush) {
if (profile.support_fp32_denorm_flush) { if (profile.support_fp32_denorm_flush) {
ctx.AddCapability(spv::Capability::DenormFlushToZero); ctx.AddCapability(spv::Capability::DenormFlushToZero);
@ -332,15 +332,15 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit
ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddCapability(spv::Capability::DenormPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
} else { } else {
LOG_WARNING(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
} }
} }
if (!profile.support_separate_denorm_behavior) { if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) {
// No separate denorm behavior // No separate denorm behavior
return; return;
} }
if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) {
LOG_WARNING(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader");
} else if (info.uses_fp16_denorms_flush) { } else if (info.uses_fp16_denorms_flush) {
if (profile.support_fp16_denorm_flush) { if (profile.support_fp16_denorm_flush) {
ctx.AddCapability(spv::Capability::DenormFlushToZero); ctx.AddCapability(spv::Capability::DenormFlushToZero);
@ -353,13 +353,16 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit
ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddCapability(spv::Capability::DenormPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U);
} else { } else {
LOG_WARNING(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support");
} }
} }
} }
void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program,
EmitContext& ctx, Id main_func) { EmitContext& ctx, Id main_func) {
if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
return;
}
if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);

View File

@ -58,6 +58,8 @@ struct Profile {
bool has_broken_unsigned_image_offsets{}; bool has_broken_unsigned_image_offsets{};
/// Signed instructions with unsigned data types are misinterpreted /// Signed instructions with unsigned data types are misinterpreted
bool has_broken_signed_operations{}; bool has_broken_signed_operations{};
/// Float controls break when fp16 is enabled
bool has_broken_fp16_float_controls{};
/// Dynamic vec4 indexing is broken on some OpenGL drivers /// Dynamic vec4 indexing is broken on some OpenGL drivers
bool has_gl_component_indexing_bug{}; bool has_gl_component_indexing_bug{};
/// The precise type qualifier is broken in the fragment stage of some drivers /// The precise type qualifier is broken in the fragment stage of some drivers

View File

@ -206,6 +206,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.has_broken_spirv_clamp = true, .has_broken_spirv_clamp = true,
.has_broken_unsigned_image_offsets = true, .has_broken_unsigned_image_offsets = true,
.has_broken_signed_operations = true, .has_broken_signed_operations = true,
.has_broken_fp16_float_controls = false,
.has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
.has_gl_precise_bug = device.HasPreciseBug(), .has_gl_precise_bug = device.HasPreciseBug(),
.ignore_nan_fp_comparisons = true, .ignore_nan_fp_comparisons = true,

View File

@ -315,6 +315,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
.has_broken_unsigned_image_offsets = false, .has_broken_unsigned_image_offsets = false,
.has_broken_signed_operations = false, .has_broken_signed_operations = false,
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR,
.ignore_nan_fp_comparisons = false, .ignore_nan_fp_comparisons = false,
}; };
host_info = Shader::HostTranslateInfo{ host_info = Shader::HostTranslateInfo{