From bbb64ff9930e191389a4ba7b696bb82174c30c1b Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 27 Feb 2022 16:48:28 +0100 Subject: [PATCH] Shadergen: Use real_ocol0 workaround for shader logic ops Previously we were using this workaround when using framebuffer fetch to emulate dual source blending, but it seems like we also need to use it when using framebuffer fetch to emulate logic ops, otherwise some Adreno devices get a crash when compiling OpenGL ES ubershaders. Using the workaround in specialized shaders doesn't seem to be necessary, but I've made the same change there for consistency. This gets us closer to fixing https://bugs.dolphin-emu.org/issues/12791 but doesn't actually fix it. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 43 ++++++++++----------- Source/Core/VideoCommon/UberShaderPixel.cpp | 42 ++++++++++---------- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 18834339f4..ee95d25187 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -947,11 +947,10 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos !use_dual_source && uid_data->useDstAlpha && host_config.backend_shader_framebuffer_fetch; const bool use_shader_logic_op = !host_config.backend_logic_op && uid_data->logic_op_enable && host_config.backend_shader_framebuffer_fetch; + const bool use_framebuffer_fetch = use_shader_blend || use_shader_logic_op; if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { - bool use_framebuffer_fetch = use_shader_blend || use_shader_logic_op; - #ifdef __APPLE__ // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) // if we want to use it. @@ -959,18 +958,16 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos { if (use_dual_source) { - out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n" - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); - } - else if (use_shader_blend) - { - // Metal doesn't support a single unified variable for both input and output, so we declare - // the output separately. The input will be defined later below. - out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 real_ocol0;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n" + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n", + use_framebuffer_fetch ? "real_ocol0" : "ocol0"); } else { - out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + // Metal doesn't support a single unified variable for both input and output, + // so when using framebuffer fetch, we declare the input separately below. + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {}};\n", + use_framebuffer_fetch ? "real_ocol0" : "ocol0"); } if (use_framebuffer_fetch) @@ -989,7 +986,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(0)" : "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)", use_framebuffer_fetch ? "FRAGMENT_INOUT" : "out", - use_shader_blend ? "real_ocol0" : "ocol0"); + use_framebuffer_fetch ? "real_ocol0" : "ocol0"); if (use_dual_source) { @@ -1048,23 +1045,23 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos // Store off a copy of the initial framebuffer value. // // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the - // framebuffer), we read from real_ocol0 or ocol0, depending if shader blending is enabled. + // framebuffer), we read from real_ocol0. out.Write("#ifdef FB_FETCH_VALUE\n" "\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n" "#else\n" - "\tfloat4 initial_ocol0 = {};\n" - "#endif\n", - use_shader_blend ? "real_ocol0" : "ocol0"); + "\tfloat4 initial_ocol0 = real_ocol0;\n" + "#endif\n"); + + // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an + // intermediate value with multiple reads & modifications, so we pull out the "real" output + // value above and use a temporary for calculations, then set the output value once at the + // end of the shader. + out.Write("\tfloat4 ocol0;\n"); } if (use_shader_blend) { - // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an - // intermediate value with multiple reads & modifications, so we pull out the "real" output - // value above and use a temporary for calculations, then set the output value once at the - // end of the shader if we are using shader blending. - out.Write("\tfloat4 ocol0;\n" - "\tfloat4 ocol1;\n"); + out.Write("\tfloat4 ocol1;\n"); } } else // D3D @@ -1335,6 +1332,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos if (use_shader_blend) WriteBlend(out, uid_data); + else if (use_framebuffer_fetch) + out.Write("\treal_ocol0 = ocol0;\n"); if (uid_data->bounding_box) out.Write("\tUpdateBoundingBox(rawpos.xy);\n"); diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index fa6ad7bc75..abac319afa 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -81,18 +81,16 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, { if (use_dual_source) { - out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n" - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); - } - else if (use_shader_blend) - { - // Metal doesn't support a single unified variable for both input and output, so we declare - // the output separately. The input will be defined later below. - out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 real_ocol0;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n" + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n", + use_framebuffer_fetch ? "real_ocol0" : "ocol0"); } else { - out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + // Metal doesn't support a single unified variable for both input and output, + // so when using framebuffer fetch, we declare the input separately below. + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {}};\n", + use_framebuffer_fetch ? "real_ocol0" : "ocol0"); } if (use_framebuffer_fetch) @@ -111,7 +109,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(0)" : "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)", use_framebuffer_fetch ? "FRAGMENT_INOUT" : "out", - use_shader_blend ? "real_ocol0" : "ocol0"); + use_framebuffer_fetch ? "real_ocol0" : "ocol0"); if (use_dual_source) { @@ -536,23 +534,23 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, // Store off a copy of the initial framebuffer value. // // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the - // framebuffer), we read from real_ocol0 or ocol0, depending if shader blending is enabled. + // framebuffer), we read from real_ocol0. out.Write("#ifdef FB_FETCH_VALUE\n" " float4 initial_ocol0 = FB_FETCH_VALUE;\n" "#else\n" - " float4 initial_ocol0 = {};\n" - "#endif\n", - use_shader_blend ? "real_ocol0" : "ocol0"); + " float4 initial_ocol0 = real_ocol0;\n" + "#endif\n"); + + // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an + // intermediate value with multiple reads & modifications, so we pull out the "real" output + // value above and use a temporary for calculations, then set the output value once at the + // end of the shader. + out.Write(" float4 ocol0;\n"); } if (use_shader_blend) { - // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an - // intermediate value with multiple reads & modifications, so we pull out the "real" output - // value above and use a temporary for calculations, then set the output value once at the - // end of the shader if we are using shader blending. - out.Write(" float4 ocol0;\n" - " float4 ocol1;\n"); + out.Write(" float4 ocol1;\n"); } } else // D3D @@ -1260,6 +1258,10 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " real_ocol0 = ocol0;\n" " }}\n"); } + else if (use_framebuffer_fetch) + { + out.Write(" real_ocol0 = ocol0;\n"); + } out.Write("}}\n" "\n"