diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp index f29e079d61..26a8c4dcfd 100644 --- a/Source/Core/VideoBackends/D3D/main.cpp +++ b/Source/Core/VideoBackends/D3D/main.cpp @@ -82,6 +82,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsBitfield = false; g_Config.backend_info.bSupportsDynamicSamplerIndexing = false; g_Config.backend_info.bSupportsBPTCTextures = false; + g_Config.backend_info.bSupportsFramebufferFetch = false; IDXGIFactory2* factory; IDXGIAdapter* ad; diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index d6c9638763..4ea460e440 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -46,6 +46,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsGPUTextureDecoding = false; g_Config.backend_info.bSupportsST3CTextures = false; g_Config.backend_info.bSupportsBPTCTextures = false; + g_Config.backend_info.bSupportsFramebufferFetch = false; // aamodes: We only support 1 sample, so no MSAA g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index a5133023af..54b5c87100 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -885,6 +885,24 @@ void ProgramShaderCache::CreateHeader() } } + std::string framebuffer_fetch_string; + switch (g_ogl_config.SupportedFramebufferFetch) + { + case ES_FB_FETCH_TYPE::FB_FETCH_EXT: + framebuffer_fetch_string = "#extension GL_EXT_shader_framebuffer_fetch: enable\n" + "#define FB_FETCH_VALUE ocol0\n" + "#define FRAGMENT_INOUT inout"; + break; + case ES_FB_FETCH_TYPE::FB_FETCH_ARM: + framebuffer_fetch_string = "#extension GL_ARM_shader_framebuffer_fetch: enable\n" + "#define FB_FETCH_VALUE gl_LastFragColorARM\n" + "#define FRAGMENT_INOUT out"; + break; + case ES_FB_FETCH_TYPE::FB_FETCH_NONE: + framebuffer_fetch_string = ""; + break; + } + s_glsl_header = StringFromFormat( "%s\n" "%s\n" // ubo @@ -902,6 +920,7 @@ void ProgramShaderCache::CreateHeader() "%s\n" // ES texture buffer "%s\n" // ES dual source blend "%s\n" // shader image load store + "%s\n" // shader framebuffer fetch // Precision defines for GLSL ES "%s\n" @@ -976,8 +995,8 @@ void ProgramShaderCache::CreateHeader() ((!is_glsles && v < GLSL_430) || (is_glsles && v < GLSLES_310)) ? "#extension GL_ARB_shader_image_load_store : enable" : "", - is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "", - is_glsles ? "precision highp sampler2DArray;" : "", + framebuffer_fetch_string.c_str(), is_glsles ? "precision highp float;" : "", + is_glsles ? "precision highp int;" : "", is_glsles ? "precision highp sampler2DArray;" : "", (is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ? "precision highp usamplerBuffer;" : "", diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 428492c72a..7d3fa22f5d 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -522,6 +522,21 @@ Renderer::Renderer() // depth clamping. g_Config.backend_info.bSupportsDepthClamp = false; + if (GLExtensions::Supports("GL_EXT_shader_framebuffer_fetch")) + { + g_ogl_config.SupportedFramebufferFetch = ES_FB_FETCH_TYPE::FB_FETCH_EXT; + } + else if (GLExtensions::Supports("GL_ARM_shader_framebuffer_fetch")) + { + g_ogl_config.SupportedFramebufferFetch = ES_FB_FETCH_TYPE::FB_FETCH_ARM; + } + else + { + g_ogl_config.SupportedFramebufferFetch = ES_FB_FETCH_TYPE::FB_FETCH_NONE; + } + g_Config.backend_info.bSupportsFramebufferFetch = + g_ogl_config.SupportedFramebufferFetch != ES_FB_FETCH_TYPE::FB_FETCH_NONE; + if (GLExtensions::Version() == 300) { g_ogl_config.eSupportedGLSLVersion = GLSLES_300; @@ -1275,44 +1290,54 @@ void Renderer::SetBlendingState(const BlendingState& state) bool useDualSource = state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend && (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha); + // Only use shader blend if we need to and we don't support dual-source blending directly + bool useShaderBlend = !useDualSource && state.usedualsrc && state.dstalpha && + g_ActiveConfig.backend_info.bSupportsFramebufferFetch; - const GLenum src_factors[8] = { - GL_ZERO, - GL_ONE, - GL_DST_COLOR, - GL_ONE_MINUS_DST_COLOR, - useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA, - useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : (GLenum)GL_ONE_MINUS_SRC_ALPHA, - GL_DST_ALPHA, - GL_ONE_MINUS_DST_ALPHA}; - const GLenum dst_factors[8] = { - GL_ZERO, - GL_ONE, - GL_SRC_COLOR, - GL_ONE_MINUS_SRC_COLOR, - useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA, - useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : (GLenum)GL_ONE_MINUS_SRC_ALPHA, - GL_DST_ALPHA, - GL_ONE_MINUS_DST_ALPHA}; - - if (state.blendenable) - { - glEnable(GL_BLEND); - } - else + if (useShaderBlend) { glDisable(GL_BLEND); } + else + { + const GLenum src_factors[8] = { + GL_ZERO, + GL_ONE, + GL_DST_COLOR, + GL_ONE_MINUS_DST_COLOR, + useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA, + useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : (GLenum)GL_ONE_MINUS_SRC_ALPHA, + GL_DST_ALPHA, + GL_ONE_MINUS_DST_ALPHA}; + const GLenum dst_factors[8] = { + GL_ZERO, + GL_ONE, + GL_SRC_COLOR, + GL_ONE_MINUS_SRC_COLOR, + useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA, + useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : (GLenum)GL_ONE_MINUS_SRC_ALPHA, + GL_DST_ALPHA, + GL_ONE_MINUS_DST_ALPHA}; - // Always call glBlendEquationSeparate and glBlendFuncSeparate, even when - // GL_BLEND is disabled, as a workaround for some bugs (possibly graphics - // driver issues?). See https://bugs.dolphin-emu.org/issues/10120 : "Sonic - // Adventure 2 Battle: graphics crash when loading first Dark level" - GLenum equation = state.subtract ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD; - GLenum equationAlpha = state.subtractAlpha ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD; - glBlendEquationSeparate(equation, equationAlpha); - glBlendFuncSeparate(src_factors[state.srcfactor], dst_factors[state.dstfactor], - src_factors[state.srcfactoralpha], dst_factors[state.dstfactoralpha]); + if (state.blendenable) + { + glEnable(GL_BLEND); + } + else + { + glDisable(GL_BLEND); + } + + // Always call glBlendEquationSeparate and glBlendFuncSeparate, even when + // GL_BLEND is disabled, as a workaround for some bugs (possibly graphics + // driver issues?). See https://bugs.dolphin-emu.org/issues/10120 : "Sonic + // Adventure 2 Battle: graphics crash when loading first Dark level" + GLenum equation = state.subtract ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD; + GLenum equationAlpha = state.subtractAlpha ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD; + glBlendEquationSeparate(equation, equationAlpha); + glBlendFuncSeparate(src_factors[state.srcfactor], dst_factors[state.dstfactor], + src_factors[state.srcfactoralpha], dst_factors[state.dstfactoralpha]); + } const GLenum logic_op_codes[16] = { GL_CLEAR, GL_AND, GL_AND_REVERSE, GL_COPY, GL_AND_INVERTED, GL_NOOP, diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index e787828d53..099b63254f 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -35,6 +35,13 @@ enum class ES_TEXBUF_TYPE TEXBUF_EXT }; +enum class ES_FB_FETCH_TYPE +{ + FB_FETCH_NONE, + FB_FETCH_EXT, + FB_FETCH_ARM, +}; + // ogl-only config, so not in VideoConfig.h struct VideoConfig { @@ -59,6 +66,7 @@ struct VideoConfig bool bSupportsAniso; bool bSupportsBitfield; bool bSupportsTextureSubImage; + ES_FB_FETCH_TYPE SupportedFramebufferFetch; const char* gl_vendor; const char* gl_renderer; diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index 57c5096dfa..115f9a9509 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -73,6 +73,7 @@ void VideoSoftware::InitBackendInfo() g_Config.backend_info.bSupportsBPTCTextures = false; g_Config.backend_info.bSupportsCopyToVram = false; g_Config.backend_info.bForceCopyToRam = true; + g_Config.backend_info.bSupportsFramebufferFetch = false; // aamodes g_Config.backend_info.AAModes = {1}; diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 5447e7eea3..c55273a224 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -247,6 +247,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsReversedDepthRange = false; // No support yet due to driver bugs. config->backend_info.bSupportsCopyToVram = true; // Assumed support. config->backend_info.bForceCopyToRam = false; + config->backend_info.bSupportsFramebufferFetch = false; } void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 0ccfb09679..1a427b70d0 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -16,6 +16,7 @@ #include "VideoCommon/DriverDetails.h" #include "VideoCommon/LightingShaderGen.h" #include "VideoCommon/NativeVertexFormat.h" +#include "VideoCommon/RenderState.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" @@ -319,6 +320,21 @@ PixelShaderUid GetPixelShaderUid() uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj; uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; + BlendingState state = {}; + state.Generate(bpmem); + + if (state.usedualsrc && state.dstalpha && g_ActiveConfig.backend_info.bSupportsFramebufferFetch && + !g_ActiveConfig.backend_info.bSupportsDualSourceBlend) + { + uid_data->blend_enable = state.blendenable; + uid_data->blend_src_factor = state.srcfactor; + uid_data->blend_src_factor_alpha = state.srcfactoralpha; + uid_data->blend_dst_factor = state.dstfactor; + uid_data->blend_dst_factor_alpha = state.dstfactoralpha; + uid_data->blend_subtract = state.subtract; + uid_data->blend_subtract_alpha = state.subtractAlpha; + } + return out; } @@ -447,6 +463,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data); static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data, bool use_dual_source); +static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data); ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host_config, const pixel_shader_uid_data* uid_data) @@ -519,6 +536,8 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host host_config.backend_dual_source_blend && (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || uid_data->useDstAlpha); + const bool use_shader_blend = + !use_dual_source && (uid_data->useDstAlpha && host_config.backend_shader_framebuffer_fetch); if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { @@ -535,6 +554,17 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); } } + else if (use_shader_blend) + { + if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) + { + out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 ocol0;\n"); + } + else + { + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 ocol0;\n"); + } + } else { out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); @@ -575,6 +605,11 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host out.Write("void main()\n{\n"); out.Write("\tfloat4 rawpos = gl_FragCoord;\n"); + if (use_shader_blend) + { + // Store off a copy of the initial fb value for blending + out.Write("\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n"); + } } else // D3D { @@ -710,7 +745,8 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host // testing result) if (uid_data->Pretest == AlphaTest::UNDETERMINED || (uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest)) - WriteAlphaTest(out, uid_data, ApiType, uid_data->per_pixel_depth, use_dual_source); + WriteAlphaTest(out, uid_data, ApiType, uid_data->per_pixel_depth, + use_dual_source || use_shader_blend); if (uid_data->zfreeze) { @@ -793,7 +829,11 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host WriteFog(out, uid_data); // Write the color and alpha values to the framebuffer - WriteColor(out, ApiType, uid_data, use_dual_source); + // If using shader blend, we still use the separate alpha + WriteColor(out, ApiType, uid_data, use_dual_source || use_shader_blend); + + if (use_shader_blend) + WriteBlend(out, uid_data); if (uid_data->bounding_box) { @@ -1358,3 +1398,79 @@ static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid } } } + +static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) +{ + if (uid_data->blend_enable) + { + static const std::array blendSrcFactor = { + "float3(0,0,0);", // ZERO + "float3(1,1,1);", // ONE + "initial_ocol0.rgb;", // DSTCLR + "float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR + "ocol1.aaa;", // SRCALPHA + "float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA + "initial_ocol0.aaa;", // DSTALPHA + "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA + }; + static const std::array blendSrcFactorAlpha = { + "0.0;", // ZERO + "1.0;", // ONE + "initial_ocol0.a;", // DSTCLR + "1.0 - initial_ocol0.a;", // INVDSTCLR + "ocol1.a;", // SRCALPHA + "1.0 - ocol1.a;", // INVSRCALPHA + "initial_ocol0.a;", // DSTALPHA + "1.0 - initial_ocol0.a;", // INVDSTALPHA + }; + static const std::array blendDstFactor = { + "float3(0,0,0);", // ZERO + "float3(1,1,1);", // ONE + "ocol0.rgb;", // SRCCLR + "float3(1,1,1) - ocol0.rgb;", // INVSRCCLR + "ocol1.aaa;", // SRCALHA + "float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA + "initial_ocol0.aaa;", // DSTALPHA + "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA + }; + static const std::array blendDstFactorAlpha = { + "0.0;", // ZERO + "1.0;", // ONE + "ocol0.a;", // SRCCLR + "1.0 - ocol0.a;", // INVSRCCLR + "ocol1.a;", // SRCALPHA + "1.0 - ocol1.a;", // INVSRCALPHA + "initial_ocol0.a;", // DSTALPHA + "1.0 - initial_ocol0.a;", // INVDSTALPHA + }; + out.Write("\tfloat4 blend_src;\n"); + out.Write("\tblend_src.rgb = %s\n", blendSrcFactor[uid_data->blend_src_factor]); + out.Write("\tblend_src.a = %s\n", blendSrcFactorAlpha[uid_data->blend_src_factor_alpha]); + out.Write("\tfloat4 blend_dst;\n"); + out.Write("\tblend_dst.rgb = %s\n", blendDstFactor[uid_data->blend_dst_factor]); + out.Write("\tblend_dst.a = %s\n", blendDstFactorAlpha[uid_data->blend_dst_factor_alpha]); + + out.Write("\tfloat4 blend_result;\n"); + if (uid_data->blend_subtract) + { + out.Write("\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * " + "blend_src.rgb;\n"); + } + else + { + out.Write( + "\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * blend_src.rgb;\n"); + } + + if (uid_data->blend_subtract_alpha) + out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n"); + else + out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n"); + } + else + { + out.Write("\tfloat4 blend_result = ocol0;\n"); + } + + out.Write("\tocol0 = blend_result;\n"); +} diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index b480777c41..2e630e2e49 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -44,7 +44,13 @@ struct pixel_shader_uid_data u32 rgba6_format : 1; u32 dither : 1; u32 uint_output : 1; - u32 pad : 15; + u32 blend_enable : 1; // Only used with shader_framebuffer_fetch blend + u32 blend_src_factor : 3; // Only used with shader_framebuffer_fetch blend + u32 blend_src_factor_alpha : 3; // Only used with shader_framebuffer_fetch blend + u32 blend_dst_factor : 3; // Only used with shader_framebuffer_fetch blend + u32 blend_dst_factor_alpha : 3; // Only used with shader_framebuffer_fetch blend + u32 blend_subtract : 1; // Only used with shader_framebuffer_fetch blend + u32 blend_subtract_alpha : 1; // Only used with shader_framebuffer_fetch blend u32 texMtxInfo_n_projection : 8; // 8x1 bit u32 tevindref_bi0 : 3; diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index c6b29c8088..8cc7e54f5c 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -32,6 +32,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent() bits.backend_bitfield = g_ActiveConfig.backend_info.bSupportsBitfield; bits.backend_dynamic_sampler_indexing = g_ActiveConfig.backend_info.bSupportsDynamicSamplerIndexing; + bits.backend_shader_framebuffer_fetch = g_ActiveConfig.backend_info.bSupportsFramebufferFetch; return bits; } @@ -68,9 +69,9 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool if (include_host_config) { - // We're using 20 bits, so 5 hex characters. + // We're using 21 bits, so 6 hex characters. ShaderHostConfig host_config = ShaderHostConfig::GetCurrent(); - filename += StringFromFormat("-%05X", host_config.bits); + filename += StringFromFormat("-%06X", host_config.bits); } filename += ".cache"; diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 3e3ef2eb13..98a30faa88 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -178,7 +178,8 @@ union ShaderHostConfig u32 backend_reversed_depth_range : 1; u32 backend_bitfield : 1; u32 backend_dynamic_sampler_indexing : 1; - u32 pad : 12; + u32 backend_shader_framebuffer_fetch : 1; + u32 pad : 11; }; static ShaderHostConfig GetCurrent(); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index f7ece85a64..b472a2ab57 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -227,6 +227,7 @@ struct VideoConfig final bool bSupportsBitfield; // Needed by UberShaders, so must stay in VideoCommon bool bSupportsDynamicSamplerIndexing; // Needed by UberShaders, so must stay in VideoCommon bool bSupportsBPTCTextures; + bool bSupportsFramebufferFetch; // Used as an alternative to dual-source blend on GLES } backend_info; // Utility