From ceb1f8c8cbc41aed49c5ae8d66499057d979b51a Mon Sep 17 00:00:00 2001 From: Jonathan Hamilton Date: Tue, 26 Dec 2017 12:30:22 -0800 Subject: [PATCH] Enable shader_framebuffer_fetch blend path on ubershaders Tested on a linux Intel Skylake integrated graphics with blend_func_extended force-disabled, as it's the only platform I have that doesn't crash with ubershaders and supports fb_fetch --- Source/Core/VideoCommon/BPStructs.cpp | 4 +- Source/Core/VideoCommon/ConstantManager.h | 8 ++ Source/Core/VideoCommon/PixelShaderGen.cpp | 7 ++ .../Core/VideoCommon/PixelShaderManager.cpp | 37 ++++++ Source/Core/VideoCommon/UberShaderPixel.cpp | 118 +++++++++++++++++- 5 files changed, 170 insertions(+), 4 deletions(-) diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 7d027c7b72..8909823b7a 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -153,9 +153,7 @@ static void BPWritten(const BPCmd& bp) SetBlendMode(); - // Dither - if (bp.changes & 0x04) - PixelShaderManager::SetBlendModeChanged(); + PixelShaderManager::SetBlendModeChanged(); } return; case BPMEM_CONSTANTALPHA: // Set Destination Alpha diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 85918d02d9..46de3c2231 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -42,6 +42,14 @@ struct PixelShaderConstants std::array pack1; // .xy - combiners, .z - tevind, .w - iref std::array pack2; // .x - tevorder, .y - tevksel std::array konst; // .rgba + // The following are used in ubershaders when using shader_framebuffer_fetch blending + u32 blend_enable; + u32 blend_src_factor; + u32 blend_src_factor_alpha; + u32 blend_dst_factor; + u32 blend_dst_factor_alpha; + u32 blend_subtract; + u32 blend_subtract_alpha; }; struct VertexShaderConstants diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index f8c79af91e..4cd961c1dd 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -413,6 +413,13 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texg "\tuint4 bpmem_pack1[16];\n" // .xy - combiners, .z - tevind "\tuint4 bpmem_pack2[8];\n" // .x - tevorder, .y - tevksel "\tint4 konstLookup[32];\n" + "\tbool blend_enable;\n" + "\tuint blend_src_factor;\n" + "\tuint blend_src_factor_alpha;\n" + "\tuint blend_dst_factor;\n" + "\tuint blend_dst_factor_alpha;\n" + "\tbool blend_subtract;\n" + "\tbool blend_subtract_alpha;\n" "};\n\n"); out.Write("#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)\n" "#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n" diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 0fa3614bdb..8ada00ef3b 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -473,6 +473,43 @@ void PixelShaderManager::SetBlendModeChanged() constants.dither = dither; dirty = true; } + BlendingState state = {}; + state.Generate(bpmem); + if (constants.blend_enable != state.blendenable) + { + constants.blend_enable = state.blendenable; + dirty = true; + } + if (constants.blend_src_factor != state.srcfactor) + { + constants.blend_src_factor = state.srcfactor; + dirty = true; + } + if (constants.blend_src_factor_alpha != state.srcfactoralpha) + { + constants.blend_src_factor_alpha = state.srcfactoralpha; + dirty = true; + } + if (constants.blend_dst_factor != state.dstfactor) + { + constants.blend_dst_factor = state.dstfactor; + dirty = true; + } + if (constants.blend_dst_factor_alpha != state.dstfactoralpha) + { + constants.blend_dst_factor_alpha = state.dstfactoralpha; + dirty = true; + } + if (constants.blend_subtract != state.subtract) + { + constants.blend_subtract = state.subtract; + dirty = true; + } + if (constants.blend_subtract_alpha != state.subtractAlpha) + { + constants.blend_subtract_alpha = state.subtractAlpha; + dirty = true; + } s_bDestAlphaDirty = true; } diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 550e522099..a0e01febe8 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -47,6 +47,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, const bool ssaa = host_config.ssaa; const bool stereo = host_config.stereo; const bool use_dual_source = host_config.backend_dual_source_blend; + const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch; const bool early_depth = uid_data->early_depth != 0; const bool per_pixel_depth = uid_data->per_pixel_depth != 0; const bool bounding_box = @@ -77,6 +78,21 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); } } + else if (use_shader_blend) + { + // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an + // intermediate value with multiple reads & modifications, so pull out the "real" output value + // and use a temporary for calculations, then set the output value once at the end of the + // shader + if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) + { + out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n"); + } + else + { + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n"); + } + } else { out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); @@ -658,6 +674,13 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, out.Write("void main()\n{\n"); out.Write(" float4 rawpos = gl_FragCoord;\n"); + if (use_shader_blend) + { + // Store off a copy of the initial fb value for blending + out.Write(" float4 initial_ocol0 = FB_FETCH_VALUE;\n"); + out.Write(" float4 ocol0;\n"); + out.Write(" float4 ocol1;\n"); + } } else // D3D { @@ -1203,7 +1226,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, " ocol0.a = float(TevResult.a >> 2) / 63.0;\n" " \n"); - if (use_dual_source) + if (use_dual_source || use_shader_blend) { out.Write(" // Dest alpha override (dual source blending)\n" " // Colors will be blended against the alpha from ocol1 and\n" @@ -1228,6 +1251,99 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, out.Write(" }\n"); } + if (use_shader_blend) + { + static const std::array blendSrcFactor = { + "float3(0,0,0);", // ZERO + "float3(1,1,1);", // ONE + "initial_ocol0.rgb;", // DSTCLR + "float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR + "ocol1.aaa;", // SRCALPHA + "float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA + "initial_ocol0.aaa;", // DSTALPHA + "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA + }; + static const std::array blendSrcFactorAlpha = { + "0.0;", // ZERO + "1.0;", // ONE + "initial_ocol0.a;", // DSTCLR + "1.0 - initial_ocol0.a;", // INVDSTCLR + "ocol1.a;", // SRCALPHA + "1.0 - ocol1.a;", // INVSRCALPHA + "initial_ocol0.a;", // DSTALPHA + "1.0 - initial_ocol0.a;", // INVDSTALPHA + }; + static const std::array blendDstFactor = { + "float3(0,0,0);", // ZERO + "float3(1,1,1);", // ONE + "ocol0.rgb;", // SRCCLR + "float3(1,1,1) - ocol0.rgb;", // INVSRCCLR + "ocol1.aaa;", // SRCALHA + "float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA + "initial_ocol0.aaa;", // DSTALPHA + "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA + }; + static const std::array blendDstFactorAlpha = { + "0.0;", // ZERO + "1.0;", // ONE + "ocol0.a;", // SRCCLR + "1.0 - ocol0.a;", // INVSRCCLR + "ocol1.a;", // SRCALPHA + "1.0 - ocol1.a;", // INVSRCALPHA + "initial_ocol0.a;", // DSTALPHA + "1.0 - initial_ocol0.a;", // INVDSTALPHA + }; + + out.Write(" if (blend_enable) {\n" + " float4 blend_src;\n" + " switch (blend_src_factor) {\n"); + for (unsigned i = 0; i < blendSrcFactor.size(); i++) + { + out.Write(" case %uu: blend_src.rgb = %s; break;\n", i, blendSrcFactor[i]); + } + + out.Write(" }\n" + " switch (blend_src_factor_alpha) {\n"); + for (unsigned i = 0; i < blendSrcFactorAlpha.size(); i++) + { + out.Write(" case %uu: blend_src.a = %s; break;\n", i, blendSrcFactorAlpha[i]); + } + + out.Write(" }\n" + " float4 blend_dst;\n" + " switch (blend_dst_factor) {\n"); + for (unsigned i = 0; i < blendDstFactor.size(); i++) + { + out.Write(" case %uu: blend_dst.rgb = %s; break;\n", i, blendDstFactor[i]); + } + out.Write(" }\n" + " switch (blend_dst_factor_alpha) {\n"); + for (unsigned i = 0; i < blendDstFactorAlpha.size(); i++) + { + out.Write(" case %uu: blend_dst.a = %s; break;\n", i, blendDstFactorAlpha[i]); + } + + out.Write( + " }\n" + " float4 blend_result;\n" + " if (blend_subtract)\n" + " blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * blend_src.rgb;\n" + " else\n" + " blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * " + "blend_src.rgb;\n"); + + out.Write(" if (blend_subtract_alpha)\n" + " blend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n" + " else\n" + " blend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n"); + + out.Write(" real_ocol0 = blend_result;\n"); + + out.Write(" } else {\n" + " real_ocol0 = ocol0;\n" + " }\n"); + } + out.Write("}\n" "\n" "int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) {\n"