From 6ab24e6c176bfd288296bf8284a30b0835fa45c3 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sat, 11 Jun 2022 21:03:09 -0500 Subject: [PATCH] VideoCommon: Better driver bug handling Adds a pass to process driver deficiencies between UID caching and use, allowing a full view of the whole pipeline, since some bugs/workarounds involve interactions between blend modes and the pixel shader --- Source/Core/VideoBackends/OGL/OGLRender.cpp | 80 +++++------ .../VideoBackends/Software/Rasterizer.cpp | 2 +- Source/Core/VideoBackends/Software/Tev.cpp | 2 +- .../Core/VideoBackends/Vulkan/VKPipeline.cpp | 82 +++++------- .../VideoBackends/Vulkan/VulkanContext.cpp | 7 - Source/Core/VideoCommon/BPMemory.h | 21 ++- Source/Core/VideoCommon/DriverDetails.h | 3 +- Source/Core/VideoCommon/GXPipelineTypes.h | 2 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 102 ++++---------- Source/Core/VideoCommon/PixelShaderGen.h | 7 +- .../Core/VideoCommon/PixelShaderManager.cpp | 2 +- Source/Core/VideoCommon/RenderState.cpp | 28 ++++ Source/Core/VideoCommon/RenderState.h | 2 + Source/Core/VideoCommon/ShaderCache.cpp | 126 ++++++++++++++++-- Source/Core/VideoCommon/UberShaderPixel.cpp | 19 ++- Source/Core/VideoCommon/UberShaderPixel.h | 9 +- 16 files changed, 292 insertions(+), 202 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/OGLRender.cpp b/Source/Core/VideoBackends/OGL/OGLRender.cpp index 4c686941fe..93df1ccc29 100644 --- a/Source/Core/VideoBackends/OGL/OGLRender.cpp +++ b/Source/Core/VideoBackends/OGL/OGLRender.cpp @@ -1168,55 +1168,43 @@ void Renderer::ApplyBlendingState(const BlendingState state) if (m_current_blend_state == state) return; - bool useDualSource = - state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend && - (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha); - // Only use shader blend if we need to and we don't support dual-source blending directly - bool useShaderBlend = !useDualSource && state.usedualsrc && state.dstalpha && - g_ActiveConfig.backend_info.bSupportsFramebufferFetch; + bool useDualSource = state.usedualsrc; - if (useShaderBlend) - { - glDisable(GL_BLEND); - } + const GLenum src_factors[8] = {GL_ZERO, + GL_ONE, + GL_DST_COLOR, + GL_ONE_MINUS_DST_COLOR, + useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA, + useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : + (GLenum)GL_ONE_MINUS_SRC_ALPHA, + GL_DST_ALPHA, + GL_ONE_MINUS_DST_ALPHA}; + const GLenum dst_factors[8] = {GL_ZERO, + GL_ONE, + GL_SRC_COLOR, + GL_ONE_MINUS_SRC_COLOR, + useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA, + useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : + (GLenum)GL_ONE_MINUS_SRC_ALPHA, + GL_DST_ALPHA, + GL_ONE_MINUS_DST_ALPHA}; + + if (state.blendenable) + glEnable(GL_BLEND); else - { - const GLenum src_factors[8] = {GL_ZERO, - GL_ONE, - GL_DST_COLOR, - GL_ONE_MINUS_DST_COLOR, - useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA, - useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : - (GLenum)GL_ONE_MINUS_SRC_ALPHA, - GL_DST_ALPHA, - GL_ONE_MINUS_DST_ALPHA}; - const GLenum dst_factors[8] = {GL_ZERO, - GL_ONE, - GL_SRC_COLOR, - GL_ONE_MINUS_SRC_COLOR, - useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA, - useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : - (GLenum)GL_ONE_MINUS_SRC_ALPHA, - GL_DST_ALPHA, - GL_ONE_MINUS_DST_ALPHA}; + glDisable(GL_BLEND); - if (state.blendenable) - glEnable(GL_BLEND); - else - glDisable(GL_BLEND); - - // Always call glBlendEquationSeparate and glBlendFuncSeparate, even when - // GL_BLEND is disabled, as a workaround for some bugs (possibly graphics - // driver issues?). See https://bugs.dolphin-emu.org/issues/10120 : "Sonic - // Adventure 2 Battle: graphics crash when loading first Dark level" - GLenum equation = state.subtract ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD; - GLenum equationAlpha = state.subtractAlpha ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD; - glBlendEquationSeparate(equation, equationAlpha); - glBlendFuncSeparate(src_factors[u32(state.srcfactor.Value())], - dst_factors[u32(state.dstfactor.Value())], - src_factors[u32(state.srcfactoralpha.Value())], - dst_factors[u32(state.dstfactoralpha.Value())]); - } + // Always call glBlendEquationSeparate and glBlendFuncSeparate, even when + // GL_BLEND is disabled, as a workaround for some bugs (possibly graphics + // driver issues?). See https://bugs.dolphin-emu.org/issues/10120 : "Sonic + // Adventure 2 Battle: graphics crash when loading first Dark level" + GLenum equation = state.subtract ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD; + GLenum equationAlpha = state.subtractAlpha ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD; + glBlendEquationSeparate(equation, equationAlpha); + glBlendFuncSeparate(src_factors[u32(state.srcfactor.Value())], + dst_factors[u32(state.dstfactor.Value())], + src_factors[u32(state.srcfactoralpha.Value())], + dst_factors[u32(state.dstfactoralpha.Value())]); const GLenum logic_op_codes[16] = { GL_CLEAR, GL_AND, GL_AND_REVERSE, GL_COPY, GL_AND_INVERTED, GL_NOOP, diff --git a/Source/Core/VideoBackends/Software/Rasterizer.cpp b/Source/Core/VideoBackends/Software/Rasterizer.cpp index 4b3f02758d..b7170527d5 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.cpp +++ b/Source/Core/VideoBackends/Software/Rasterizer.cpp @@ -153,7 +153,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi) s32 z = (s32)std::clamp(ZSlope.GetValue(x, y), 0.0f, 16777215.0f); - if (bpmem.UseEarlyDepthTest()) + if (bpmem.GetEmulatedZ() == EmulatedZ::Early) { // TODO: Test if perf regs are incremented even if test is disabled EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT_ZCOMPLOC); diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 64e0f7774b..c2039df3ea 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -840,7 +840,7 @@ void Tev::Draw() output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8; } - if (bpmem.UseLateDepthTest()) + if (bpmem.GetEmulatedZ() == EmulatedZ::Late) { // TODO: Check against hw if these values get incremented even if depth testing is disabled EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT); diff --git a/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp b/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp index 61c51b5d34..f21fb9bf5e 100644 --- a/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp @@ -137,60 +137,48 @@ GetVulkanAttachmentBlendState(const BlendingState& state, AbstractPipelineUsage { VkPipelineColorBlendAttachmentState vk_state = {}; - bool use_dual_source = - state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend && - (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha); - bool use_shader_blend = !use_dual_source && state.usedualsrc && state.dstalpha && - g_ActiveConfig.backend_info.bSupportsFramebufferFetch; + bool use_dual_source = state.usedualsrc; - if (use_shader_blend || (usage == AbstractPipelineUsage::GX && - DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z))) + vk_state.blendEnable = static_cast(state.blendenable); + vk_state.colorBlendOp = state.subtract ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; + vk_state.alphaBlendOp = state.subtractAlpha ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; + + if (use_dual_source) { - vk_state.blendEnable = VK_FALSE; + static constexpr std::array src_factors = { + {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; + static constexpr std::array dst_factors = { + {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; + + vk_state.srcColorBlendFactor = src_factors[u32(state.srcfactor.Value())]; + vk_state.srcAlphaBlendFactor = src_factors[u32(state.srcfactoralpha.Value())]; + vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())]; + vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())]; } else { - vk_state.blendEnable = static_cast(state.blendenable); - vk_state.colorBlendOp = state.subtract ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; - vk_state.alphaBlendOp = state.subtractAlpha ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; + static constexpr std::array src_factors = { + {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - if (use_dual_source) - { - static constexpr std::array src_factors = { - {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, - VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - static constexpr std::array dst_factors = { - {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR, - VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; + static constexpr std::array dst_factors = { + {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - vk_state.srcColorBlendFactor = src_factors[u32(state.srcfactor.Value())]; - vk_state.srcAlphaBlendFactor = src_factors[u32(state.srcfactoralpha.Value())]; - vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())]; - vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())]; - } - else - { - static constexpr std::array src_factors = { - {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, - VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - - static constexpr std::array dst_factors = { - {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR, - VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - - vk_state.srcColorBlendFactor = src_factors[u32(state.srcfactor.Value())]; - vk_state.srcAlphaBlendFactor = src_factors[u32(state.srcfactoralpha.Value())]; - vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())]; - vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())]; - } + vk_state.srcColorBlendFactor = src_factors[u32(state.srcfactor.Value())]; + vk_state.srcAlphaBlendFactor = src_factors[u32(state.srcfactoralpha.Value())]; + vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())]; + vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())]; } if (state.colorupdate) diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 1dc02e689a..a6cb201d72 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -371,13 +371,6 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_REVERSED_DEPTH_RANGE)) config->backend_info.bSupportsReversedDepthRange = false; - // Calling discard when early depth test is enabled can break on some Apple Silicon GPU drivers. - if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z)) - { - // We will use shader blending, so disable hardware dual source blending. - config->backend_info.bSupportsDualSourceBlend = false; - } - // Dynamic sampler indexing locks up Intel GPUs on MoltenVK/Metal if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING)) config->backend_info.bSupportsDynamicSamplerIndexing = false; diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index 28155a2cd6..2caeef62e1 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -2336,6 +2336,16 @@ struct BPCmd int newvalue; }; +enum class EmulatedZ : u32 +{ + Disabled = 0, + Early = 1, + Late = 2, + ForcedEarly = 3, + EarlyWithFBFetch = 4, + EarlyWithZComplocHack = 5, +}; + struct BPMemory { GenMode genMode; @@ -2403,8 +2413,15 @@ struct BPMemory u32 bpMask; // 0xFE u32 unknown18; // ff - bool UseEarlyDepthTest() const { return zcontrol.early_ztest && zmode.testenable; } - bool UseLateDepthTest() const { return !zcontrol.early_ztest && zmode.testenable; } + EmulatedZ GetEmulatedZ() const + { + if (!zmode.testenable) + return EmulatedZ::Disabled; + if (zcontrol.early_ztest) + return EmulatedZ::Early; + else + return EmulatedZ::Late; + } }; #pragma pack() diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index c1dbdb0a4a..646fa00f74 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -237,7 +237,8 @@ enum Bug // crash. Sometimes this happens in the kernel mode part of the driver, resulting in a BSOD. // These shaders are also particularly problematic on macOS's Intel drivers. On OpenGL, they can // cause depth issues. On Metal, they can cause the driver to not write a primitive to the depth - // buffer whenever a fragment is discarded. Disable dual-source blending support on these drivers. + // buffer if dual source blending is output in the shader but not subsequently used in blending. + // Compile separate shaders for DSB on vs off for these drivers. BUG_BROKEN_DUAL_SOURCE_BLENDING, // BUG: ImgTec GLSL shader compiler fails when negating the input to a bitwise operation diff --git a/Source/Core/VideoCommon/GXPipelineTypes.h b/Source/Core/VideoCommon/GXPipelineTypes.h index b448dc107f..c226739b7d 100644 --- a/Source/Core/VideoCommon/GXPipelineTypes.h +++ b/Source/Core/VideoCommon/GXPipelineTypes.h @@ -19,7 +19,7 @@ namespace VideoCommon // As pipelines encompass both shader UIDs and render states, changes to either of these should // also increment the pipeline UID version. Incrementing the UID version will cause all UID // caches to be invalidated. -constexpr u32 GX_PIPELINE_UID_VERSION = 4; // Last changed in PR 10215 +constexpr u32 GX_PIPELINE_UID_VERSION = 5; // Last changed in PR 10747 struct GXPipelineUid { diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 3304c74658..6177e2ad3e 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -167,9 +167,6 @@ constexpr Common::EnumMap tev_a_output_table{ "c2.a", }; -// FIXME: Some of the video card's capabilities (BBox support, EarlyZ support, dstAlpha support) -// leak into this UID; This is really unhelpful if these UIDs ever move from one machine to -// another. PixelShaderUid GetPixelShaderUid() { PixelShaderUid out; @@ -189,20 +186,25 @@ PixelShaderUid GetPixelShaderUid() u32 numStages = uid_data->genMode_numtevstages + 1; - const bool forced_early_z = - bpmem.UseEarlyDepthTest() && + uid_data->Pretest = bpmem.alpha_test.TestResult(); + uid_data->ztest = bpmem.GetEmulatedZ(); + if (uid_data->ztest == EmulatedZ::Early && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined) // We can't allow early_ztest for zfreeze because depth is overridden per-pixel. // This means it's impossible for zcomploc to be emulated on a zfrozen polygon. - && !(bpmem.zmode.testenable && bpmem.genMode.zfreeze); + && !bpmem.genMode.zfreeze) + { + uid_data->ztest = EmulatedZ::ForcedEarly; + } + + const bool forced_early_z = uid_data->ztest == EmulatedZ::ForcedEarly; const bool per_pixel_depth = - (bpmem.ztex2.op != ZTexOp::Disabled && bpmem.UseLateDepthTest()) || + (bpmem.ztex2.op != ZTexOp::Disabled && uid_data->ztest == EmulatedZ::Late) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || (bpmem.zmode.testenable && bpmem.genMode.zfreeze); uid_data->per_pixel_depth = per_pixel_depth; - uid_data->forced_early_z = forced_early_z; if (g_ActiveConfig.bEnablePixelLighting) { @@ -285,59 +287,24 @@ PixelShaderUid GetPixelShaderUid() sizeof(*uid_data) : MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]); - uid_data->Pretest = bpmem.alpha_test.TestResult(); - uid_data->late_ztest = bpmem.UseLateDepthTest(); - // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled // (in this case we need to write a depth value if depth test passes regardless of the alpha // testing result) if (uid_data->Pretest == AlphaTestResult::Undetermined || - (uid_data->Pretest == AlphaTestResult::Fail && uid_data->late_ztest)) + (uid_data->Pretest == AlphaTestResult::Fail && uid_data->ztest == EmulatedZ::Late)) { uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0; uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1; uid_data->alpha_test_logic = bpmem.alpha_test.logic; - - // ZCOMPLOC HACK: - // The only way to emulate alpha test + early-z is to force early-z in the shader. - // As this isn't available on all drivers and as we can't emulate this feature otherwise, - // we are only able to choose which one we want to respect more. - // Tests seem to have proven that writing depth even when the alpha test fails is more - // important that a reliable alpha test, so we just force the alpha test to always succeed. - // At least this seems to be less buggy. - uid_data->alpha_test_use_zcomploc_hack = - bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable && - !g_ActiveConfig.backend_info.bSupportsEarlyZ && !bpmem.genMode.zfreeze; } uid_data->zfreeze = bpmem.genMode.zfreeze; uid_data->ztex_op = bpmem.ztex2.op; - uid_data->early_ztest = bpmem.UseEarlyDepthTest(); uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj; uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; - BlendingState state = {}; - state.Generate(bpmem); - - if (((state.usedualsrc && state.dstalpha) || - DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z)) && - g_ActiveConfig.backend_info.bSupportsFramebufferFetch && - !g_ActiveConfig.backend_info.bSupportsDualSourceBlend) - { - uid_data->blend_enable = state.blendenable; - uid_data->blend_src_factor = state.srcfactor; - uid_data->blend_src_factor_alpha = state.srcfactoralpha; - uid_data->blend_dst_factor = state.dstfactor; - uid_data->blend_dst_factor_alpha = state.dstfactoralpha; - uid_data->blend_subtract = state.subtract; - uid_data->blend_subtract_alpha = state.subtractAlpha; - } - - uid_data->logic_op_enable = state.logicopenable; - uid_data->logic_op_mode = u32(state.logicmode.Value()); - return out; } @@ -798,7 +765,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) " "sampleTexture(texmap, samp[texmap], uv, layer)\n"); - if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ) + if (uid_data->ztest == EmulatedZ::ForcedEarly) { // Zcomploc (aka early_ztest) is a way to control whether depth test is done before // or after texturing and alpha test. PC graphics APIs used to provide no way to emulate @@ -837,28 +804,15 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos out.Write("FORCE_EARLY_Z; \n"); } - // Only use dual-source blending when required on drivers that don't support it very well. - const bool use_dual_source = - host_config.backend_dual_source_blend && - (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || - uid_data->useDstAlpha); - const bool use_shader_blend = - !use_dual_source && - (uid_data->useDstAlpha || - DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z)) && - host_config.backend_shader_framebuffer_fetch; - const bool use_shader_logic_op = !host_config.backend_logic_op && uid_data->logic_op_enable && - host_config.backend_shader_framebuffer_fetch; - const bool use_framebuffer_fetch = - use_shader_blend || use_shader_logic_op || - DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z); + const bool use_framebuffer_fetch = uid_data->blend_enable || uid_data->logic_op_enable || + uid_data->ztest == EmulatedZ::EarlyWithFBFetch; #ifdef __APPLE__ // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) // if we want to use it. if (api_type == APIType::Vulkan) { - if (use_dual_source) + if (!uid_data->no_dual_src) { out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n" "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n", @@ -891,7 +845,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos uid_data->uint_output ? "uvec4" : "vec4", use_framebuffer_fetch ? "real_ocol0" : "ocol0"); - if (use_dual_source) + if (!uid_data->no_dual_src) { out.Write("{} out {} ocol1;\n", has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(1)" : @@ -960,7 +914,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos out.Write("\tfloat4 ocol0;\n"); } - if (use_shader_blend) + if (uid_data->blend_enable) { out.Write("\tfloat4 ocol1;\n"); } @@ -1086,10 +1040,10 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos // (in this case we need to write a depth value if depth test passes regardless of the alpha // testing result) if (uid_data->Pretest == AlphaTestResult::Undetermined || - (uid_data->Pretest == AlphaTestResult::Fail && uid_data->late_ztest)) + (uid_data->Pretest == AlphaTestResult::Fail && uid_data->ztest == EmulatedZ::Late)) { WriteAlphaTest(out, uid_data, api_type, uid_data->per_pixel_depth, - use_dual_source || use_shader_blend); + !uid_data->no_dual_src || uid_data->blend_enable); } // This situation is important for Mario Kart Wii's menus (they will render incorrectly if the @@ -1144,7 +1098,10 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos const bool skip_ztexture = !uid_data->per_pixel_depth && uid_data->fog_fsel == FogType::Off; // Note: z-textures are not written to depth buffer if early depth test is used - if (uid_data->per_pixel_depth && uid_data->early_ztest) + const bool early_ztest = uid_data->ztest == EmulatedZ::Early || + uid_data->ztest == EmulatedZ::EarlyWithFBFetch || + uid_data->ztest == EmulatedZ::EarlyWithZComplocHack; + if (uid_data->per_pixel_depth && early_ztest) { if (!host_config.backend_reversed_depth_range) out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); @@ -1165,7 +1122,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos out.Write("\tzCoord = zCoord & 0xFFFFFF;\n"); } - if (uid_data->per_pixel_depth && uid_data->late_ztest) + if (uid_data->per_pixel_depth && uid_data->ztest == EmulatedZ::Late) { if (!host_config.backend_reversed_depth_range) out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); @@ -1184,14 +1141,14 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos WriteFog(out, uid_data); - if (use_shader_logic_op) + if (uid_data->logic_op_enable) WriteLogicOp(out, uid_data); // Write the color and alpha values to the framebuffer // If using shader blend, we still use the separate alpha - WriteColor(out, api_type, uid_data, use_dual_source || use_shader_blend); + WriteColor(out, api_type, uid_data, !uid_data->no_dual_src || uid_data->blend_enable); - if (use_shader_blend) + if (uid_data->blend_enable) WriteBlend(out, uid_data); else if (use_framebuffer_fetch) out.Write("\treal_ocol0 = ocol0;\n"); @@ -1728,11 +1685,10 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat } // ZCOMPLOC HACK: - if (!uid_data->alpha_test_use_zcomploc_hack) + if (uid_data->ztest != EmulatedZ::EarlyWithZComplocHack) { #ifdef __APPLE__ - if (uid_data->forced_early_z && - DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z)) + if (uid_data->ztest == EmulatedZ::EarlyWithFBFetch) { // Instead of using discard, fetch the framebuffer's color value and use it as the output // for this fragment. diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index 6a7a638ac3..34e0239ddc 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -12,6 +12,7 @@ enum class AlphaTestOp : u32; enum class AlphaTestResult; enum class CompareMode : u32; enum class DstBlendFactor : u32; +enum class EmulatedZ : u32; enum class FogProjection : u32; enum class FogType : u32; enum class KonstSel : u32; @@ -28,6 +29,7 @@ struct pixel_shader_uid_data u32 NumValues() const { return num_values; } u32 pad0 : 4; u32 useDstAlpha : 1; + u32 no_dual_src : 1; AlphaTestResult Pretest : 2; u32 nIndirectStagesUsed : 4; u32 genMode_numtexgens : 4; @@ -36,16 +38,13 @@ struct pixel_shader_uid_data CompareMode alpha_test_comp0 : 3; CompareMode alpha_test_comp1 : 3; AlphaTestOp alpha_test_logic : 2; - u32 alpha_test_use_zcomploc_hack : 1; FogProjection fog_proj : 1; FogType fog_fsel : 3; u32 fog_RangeBaseEnabled : 1; ZTexOp ztex_op : 2; u32 per_pixel_depth : 1; - u32 forced_early_z : 1; - u32 early_ztest : 1; - u32 late_ztest : 1; + EmulatedZ ztest : 3; u32 bounding_box : 1; u32 zfreeze : 1; u32 numColorChans : 2; diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 4e6ce9ff6f..b0319e1f0e 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -448,7 +448,7 @@ void PixelShaderManager::SetGenModeChanged() void PixelShaderManager::SetZModeControl() { - u32 late_ztest = bpmem.UseLateDepthTest(); + u32 late_ztest = bpmem.GetEmulatedZ() == EmulatedZ::Late; u32 rgba6_format = (bpmem.zcontrol.pixel_format == PixelFormat::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor) ? 1 : diff --git a/Source/Core/VideoCommon/RenderState.cpp b/Source/Core/VideoCommon/RenderState.cpp index 3b3554b4fe..a997c654f2 100644 --- a/Source/Core/VideoCommon/RenderState.cpp +++ b/Source/Core/VideoCommon/RenderState.cpp @@ -25,6 +25,34 @@ void DepthState::Generate(const BPMemory& bp) func = bp.zmode.func.Value(); } +static bool IsDualSrc(SrcBlendFactor factor) +{ + return factor == SrcBlendFactor::SrcAlpha || factor == SrcBlendFactor::InvSrcAlpha; +} + +static bool IsDualSrc(DstBlendFactor factor) +{ + switch (factor) + { + case DstBlendFactor::SrcClr: + case DstBlendFactor::SrcAlpha: + case DstBlendFactor::InvSrcClr: + case DstBlendFactor::InvSrcAlpha: + return true; + default: + return false; + } +} + +bool BlendingState::RequiresDualSrc() const +{ + bool requires_dual_src = false; + requires_dual_src |= IsDualSrc(srcfactor) || IsDualSrc(srcfactoralpha); + requires_dual_src |= IsDualSrc(dstfactor) || IsDualSrc(dstfactoralpha); + requires_dual_src &= blendenable && usedualsrc; + return requires_dual_src; +} + // If the framebuffer format has no alpha channel, it is assumed to // ONE on blending. As the backends may emulate this framebuffer // configuration with an alpha channel, we just drop all references diff --git a/Source/Core/VideoCommon/RenderState.h b/Source/Core/VideoCommon/RenderState.h index 6fca2eff15..992c70fb04 100644 --- a/Source/Core/VideoCommon/RenderState.h +++ b/Source/Core/VideoCommon/RenderState.h @@ -142,6 +142,8 @@ union BlendingState BitField<17, 3, SrcBlendFactor> srcfactoralpha; BitField<20, 4, LogicOp> logicmode; + bool RequiresDualSrc() const; + u32 hex; }; diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 65e8c22882..e4d2115c3b 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -10,6 +10,7 @@ #include "Common/MsgHandler.h" #include "Core/ConfigManager.h" +#include "VideoCommon/DriverDetails.h" #include "VideoCommon/FramebufferManager.h" #include "VideoCommon/FramebufferShaderGen.h" #include "VideoCommon/RenderBase.h" @@ -612,8 +613,95 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig( return config; } -std::optional ShaderCache::GetGXPipelineConfig(const GXPipelineUid& config) +/// Edits the UID based on driver bugs and other special configurations +static GXPipelineUid ApplyDriverBugs(const GXPipelineUid& in) { + GXPipelineUid out; + memcpy(&out, &in, sizeof(out)); // copy padding + pixel_shader_uid_data* ps = out.ps_uid.GetUidData(); + BlendingState& blend = out.blending_state; + + if (ps->ztest == EmulatedZ::ForcedEarly && !out.depth_state.updateenable) + { + // No need to force early depth test if you're not writing z + ps->ztest = EmulatedZ::Early; + } + + const bool benefits_from_ps_dual_source_off = + (!g_ActiveConfig.backend_info.bSupportsDualSourceBlend && + g_ActiveConfig.backend_info.bSupportsFramebufferFetch) || + DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING); + if (benefits_from_ps_dual_source_off && !blend.RequiresDualSrc()) + { + // Only use dual-source blending when required on drivers that don't support it very well. + ps->no_dual_src = true; + blend.usedualsrc = false; + } + + if (g_ActiveConfig.backend_info.bSupportsFramebufferFetch) + { + bool fbfetch_blend = false; + if ((DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z) || + !g_ActiveConfig.backend_info.bSupportsEarlyZ) && + ps->ztest == EmulatedZ::ForcedEarly) + { + ps->ztest = EmulatedZ::EarlyWithFBFetch; + fbfetch_blend |= static_cast(out.blending_state.blendenable); + ps->no_dual_src = true; + } + fbfetch_blend |= blend.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp; + fbfetch_blend |= blend.usedualsrc && !g_ActiveConfig.backend_info.bSupportsDualSourceBlend; + if (fbfetch_blend) + { + ps->no_dual_src = true; + if (blend.logicopenable) + { + ps->logic_op_enable = true; + ps->logic_op_mode = static_cast(blend.logicmode.Value()); + blend.logicopenable = false; + } + if (blend.blendenable) + { + ps->blend_enable = true; + ps->blend_src_factor = blend.srcfactor; + ps->blend_src_factor_alpha = blend.srcfactoralpha; + ps->blend_dst_factor = blend.dstfactor; + ps->blend_dst_factor_alpha = blend.dstfactoralpha; + ps->blend_subtract = blend.subtract; + ps->blend_subtract_alpha = blend.subtractAlpha; + blend.blendenable = false; + } + } + } + + // force dual src off if we can't support it + if (!g_ActiveConfig.backend_info.bSupportsDualSourceBlend) + { + ps->no_dual_src = true; + blend.usedualsrc = false; + } + + if (ps->ztest == EmulatedZ::ForcedEarly && !g_ActiveConfig.backend_info.bSupportsEarlyZ) + { + // These things should be false + ASSERT(!ps->zfreeze); + // ZCOMPLOC HACK: + // The only way to emulate alpha test + early-z is to force early-z in the shader. + // As this isn't available on all drivers and as we can't emulate this feature otherwise, + // we are only able to choose which one we want to respect more. + // Tests seem to have proven that writing depth even when the alpha test fails is more + // important that a reliable alpha test, so we just force the alpha test to always succeed. + // At least this seems to be less buggy. + ps->ztest = EmulatedZ::EarlyWithZComplocHack; + } + + return out; +} + +std::optional +ShaderCache::GetGXPipelineConfig(const GXPipelineUid& config_in) +{ + GXPipelineUid config = ApplyDriverBugs(config_in); const AbstractShader* vs; auto vs_iter = m_vs_cache.shader_map.find(config.vs_uid); if (vs_iter != m_vs_cache.shader_map.end() && !vs_iter->second.pending) @@ -650,9 +738,25 @@ std::optional ShaderCache::GetGXPipelineConfig(const GXP config.depth_state, config.blending_state); } -std::optional -ShaderCache::GetGXPipelineConfig(const GXUberPipelineUid& config) +/// Edits the UID based on driver bugs and other special configurations +static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in) { + GXUberPipelineUid out; + memcpy(&out, &in, sizeof(out)); // Copy padding + if (!g_ActiveConfig.backend_info.bSupportsDualSourceBlend || + (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) && + !out.blending_state.RequiresDualSrc())) + { + out.blending_state.usedualsrc = false; + out.ps_uid.GetUidData()->no_dual_src = true; + } + return out; +} + +std::optional +ShaderCache::GetGXPipelineConfig(const GXUberPipelineUid& config_in) +{ + GXUberPipelineUid config = ApplyDriverBugs(config_in); const AbstractShader* vs; auto vs_iter = m_uber_vs_cache.shader_map.find(config.vs_uid); if (vs_iter != m_uber_vs_cache.shader_map.end() && !vs_iter->second.pending) @@ -981,12 +1085,14 @@ void ShaderCache::QueuePipelineCompile(const GXPipelineUid& uid, u32 priority) { stages_ready = true; - auto vs_it = shader_cache->m_vs_cache.shader_map.find(uid.vs_uid); + GXPipelineUid actual_uid = ApplyDriverBugs(uid); + + auto vs_it = shader_cache->m_vs_cache.shader_map.find(actual_uid.vs_uid); stages_ready &= vs_it != shader_cache->m_vs_cache.shader_map.end() && !vs_it->second.pending; if (vs_it == shader_cache->m_vs_cache.shader_map.end()) - shader_cache->QueueVertexShaderCompile(uid.vs_uid, priority); + shader_cache->QueueVertexShaderCompile(actual_uid.vs_uid, priority); - PixelShaderUid ps_uid = uid.ps_uid; + PixelShaderUid ps_uid = actual_uid.ps_uid; ClearUnusedPixelShaderUidBits(shader_cache->m_api_type, shader_cache->m_host_config, &ps_uid); auto ps_it = shader_cache->m_ps_cache.shader_map.find(ps_uid); @@ -1051,13 +1157,15 @@ void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineUid& uid, u32 pri { stages_ready = true; - auto vs_it = shader_cache->m_uber_vs_cache.shader_map.find(uid.vs_uid); + GXUberPipelineUid actual_uid = ApplyDriverBugs(uid); + + auto vs_it = shader_cache->m_uber_vs_cache.shader_map.find(actual_uid.vs_uid); stages_ready &= vs_it != shader_cache->m_uber_vs_cache.shader_map.end() && !vs_it->second.pending; if (vs_it == shader_cache->m_uber_vs_cache.shader_map.end()) - shader_cache->QueueVertexUberShaderCompile(uid.vs_uid, priority); + shader_cache->QueueVertexUberShaderCompile(actual_uid.vs_uid, priority); - UberShader::PixelShaderUid ps_uid = uid.ps_uid; + UberShader::PixelShaderUid ps_uid = actual_uid.ps_uid; UberShader::ClearUnusedPixelShaderUidBits(shader_cache->m_api_type, shader_cache->m_host_config, &ps_uid); diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 024b49ddfa..8b89b045e8 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -21,12 +21,12 @@ PixelShaderUid GetPixelShaderUid() pixel_ubershader_uid_data* const uid_data = out.GetUidData(); uid_data->num_texgens = xfmem.numTexGen.numTexGens; - uid_data->early_depth = bpmem.UseEarlyDepthTest() && + uid_data->early_depth = bpmem.GetEmulatedZ() == EmulatedZ::Early && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined) && !(bpmem.zmode.testenable && bpmem.genMode.zfreeze); uid_data->per_pixel_depth = - (bpmem.ztex2.op != ZTexOp::Disabled && bpmem.UseLateDepthTest()) || + (bpmem.ztex2.op != ZTexOp::Disabled && bpmem.GetEmulatedZ() == EmulatedZ::Late) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !uid_data->early_depth) || (bpmem.zmode.testenable && bpmem.genMode.zfreeze); uid_data->uint_output = bpmem.blendmode.UseLogicOp(); @@ -39,6 +39,10 @@ void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& hos { pixel_ubershader_uid_data* const uid_data = uid->GetUidData(); + // Dual source is always enabled in the shader if this bug is not present + if (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING)) + uid_data->no_dual_src = 0; + // OpenGL and Vulkan convert implicitly normalized color outputs to their uint representation. // Therefore, it is not necessary to use a uint output on these backends. We also disable the // uint output when logic op is not supported (i.e. driver/device does not support D3D11.1). @@ -53,8 +57,9 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, const bool msaa = host_config.msaa; const bool ssaa = host_config.ssaa; const bool stereo = host_config.stereo; - const bool use_dual_source = host_config.backend_dual_source_blend; - const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch; + const bool use_dual_source = host_config.backend_dual_source_blend && !uid_data->no_dual_src; + const bool use_shader_blend = !host_config.backend_dual_source_blend && + host_config.backend_shader_framebuffer_fetch; const bool use_shader_logic_op = !host_config.backend_logic_op && host_config.backend_shader_framebuffer_fetch; const bool use_framebuffer_fetch = @@ -1273,7 +1278,11 @@ void EnumeratePixelShaderUids(const std::function& for (u32 uint_output = 0; uint_output < 2; uint_output++) { puid->uint_output = uint_output; - callback(uid); + for (u32 no_dual_src = 0; no_dual_src < 2; no_dual_src++) + { + puid->no_dual_src = no_dual_src; + callback(uid); + } } } } diff --git a/Source/Core/VideoCommon/UberShaderPixel.h b/Source/Core/VideoCommon/UberShaderPixel.h index 9b2be898a2..aa3d6c7625 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.h +++ b/Source/Core/VideoCommon/UberShaderPixel.h @@ -18,6 +18,7 @@ struct pixel_ubershader_uid_data u32 early_depth : 1; u32 per_pixel_depth : 1; u32 uint_output : 1; + u32 no_dual_src : 1; u32 NumValues() const { return sizeof(pixel_ubershader_uid_data); } }; @@ -42,9 +43,9 @@ struct fmt::formatter template auto format(const UberShader::pixel_ubershader_uid_data& uid, FormatContext& ctx) const { - return fmt::format_to(ctx.out(), "Pixel UberShader for {} texgens{}{}{}", uid.num_texgens, - uid.early_depth ? ", early-depth" : "", - uid.per_pixel_depth ? ", per-pixel depth" : "", - uid.uint_output ? ", uint output" : ""); + return fmt::format_to( + ctx.out(), "Pixel UberShader for {} texgens{}{}{}{}", uid.num_texgens, + uid.early_depth ? ", early-depth" : "", uid.per_pixel_depth ? ", per-pixel depth" : "", + uid.uint_output ? ", uint output" : "", uid.no_dual_src ? ", no dual-source blending" : ""); } };