From a8b9df39520da80967bb41300e695db50b051aa0 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Tue, 8 Mar 2022 00:36:05 +1000 Subject: [PATCH] GS: Utilize GL_EXT_framebuffer_fetch where available --- bin/resources/shaders/opengl/tfx_fs.glsl | 67 ++++++++++++++++- pcsx2-qt/Settings/GraphicsSettingsWidget.cpp | 1 + pcsx2-qt/Settings/GraphicsSettingsWidget.ui | 7 ++ pcsx2/Config.h | 1 + pcsx2/Frontend/VulkanHostDisplay.cpp | 2 +- pcsx2/GS/GS.cpp | 1 + pcsx2/GS/Renderers/Common/GSDevice.cpp | 2 - pcsx2/GS/Renderers/Common/GSDevice.h | 11 ++- pcsx2/GS/Renderers/DX11/GSDevice11.cpp | 2 + pcsx2/GS/Renderers/HW/GSRendererNew.cpp | 77 ++++++++++++++------ pcsx2/GS/Renderers/OpenGL/GLLoader.cpp | 10 +++ pcsx2/GS/Renderers/OpenGL/GLLoader.h | 2 + pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp | 29 +++++++- pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h | 2 +- pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp | 24 ++++-- pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h | 11 ++- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp | 2 + pcsx2/GS/Window/GSwxDialog.cpp | 9 ++- pcsx2/Pcsx2Config.cpp | 3 + 19 files changed, 217 insertions(+), 46 deletions(-) diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 212526bcd5..b6c813900a 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -20,6 +20,7 @@ #endif #define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D) +#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1)) #ifdef FRAGMENT_SHADER @@ -38,12 +39,30 @@ in SHADER #endif } PSin; -// Same buffer but 2 colors for dual source blending -layout(location = 0, index = 0) out vec4 SV_Target0; -layout(location = 0, index = 1) out vec4 SV_Target1; +#define TARGET_0_QUALIFIER out + +// Only enable framebuffer fetch when we actually need it. +#if HAS_FRAMEBUFFER_FETCH && (PS_TEX_IS_FB == 1 || PS_FBMASK || SW_BLEND_NEEDS_RT || PS_DATE != 0) + #if defined(GL_EXT_shader_framebuffer_fetch) + #undef TARGET_0_QUALIFIER + #define TARGET_0_QUALIFIER inout + #define LAST_FRAG_COLOR SV_Target0 + #endif +#endif + +#ifndef DISABLE_DUAL_SOURCE + // Same buffer but 2 colors for dual source blending + layout(location = 0, index = 0) TARGET_0_QUALIFIER vec4 SV_Target0; + layout(location = 0, index = 1) out vec4 SV_Target1; +#else + layout(location = 0) TARGET_0_QUALIFIER vec4 SV_Target0; +#endif layout(binding = 1) uniform sampler2D PaletteSampler; + +#if !HAS_FRAMEBUFFER_FETCH layout(binding = 2) uniform sampler2D RtSampler; // note 2 already use by the image below +#endif #ifndef DISABLE_GL42_image #if PS_DATE > 0 @@ -79,7 +98,11 @@ layout(early_fragment_tests) in; vec4 sample_c(vec2 uv) { #if PS_TEX_IS_FB == 1 +#if HAS_FRAMEBUFFER_FETCH + return LAST_FRAG_COLOR; +#else return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0); +#endif #else #if PS_POINT_SAMPLER @@ -234,7 +257,11 @@ mat4 sample_4p(vec4 u) int fetch_raw_depth() { #if PS_TEX_IS_FB == 1 +#if HAS_FRAMEBUFFER_FETCH + return int(LAST_FRAG_COLOR.r * exp2(32.0f)); +#else return int(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f)); +#endif #else return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f)); #endif @@ -243,7 +270,11 @@ int fetch_raw_depth() vec4 fetch_raw_color() { #if PS_TEX_IS_FB == 1 +#if HAS_FRAMEBUFFER_FETCH + return LAST_FRAG_COLOR; +#else return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0); +#endif #else return texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0); #endif @@ -603,7 +634,11 @@ void ps_fbmask(inout vec4 C) { // FIXME do I need special case for 16 bits #if PS_FBMASK +#if HAS_FRAMEBUFFER_FETCH + vec4 RT = trunc(LAST_FRAG_COLOR * 255.0f + 0.1f); +#else vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f); +#endif C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)); #endif } @@ -659,7 +694,14 @@ void ps_blend(inout vec4 Color, float As) return; #endif + vec3 Cs = Color.rgb; + +#if SW_BLEND_NEEDS_RT +#if HAS_FRAMEBUFFER_FETCH + vec4 RT = trunc(LAST_FRAG_COLOR * 255.0f + 0.1f); +#else vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f); +#endif #if PS_DFMT == FMT_24 float Ad = 1.0f; @@ -671,7 +713,7 @@ void ps_blend(inout vec4 Color, float As) // Let the compiler do its jobs ! vec3 Cd = RT.rgb; - vec3 Cs = Color.rgb; +#endif #if PS_BLEND_A == 0 vec3 A = Cs; @@ -748,14 +790,24 @@ void ps_main() if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1)) discard; #endif + +#if PS_DATE != 0 #if ((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2) #if PS_WRITE_RG == 1 // Pseudo 16 bits access. +#if HAS_FRAMEBUFFER_FETCH + float rt_a = LAST_FRAG_COLOR.g; +#else float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).g; +#endif +#else +#if HAS_FRAMEBUFFER_FETCH + float rt_a = LAST_FRAG_COLOR.a; #else float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a; #endif +#endif #if (PS_DATE & 3) == 1 // DATM == 0: Pixel with alpha equal to 1 will failed @@ -784,6 +836,7 @@ void ps_main() if (gl_PrimitiveID > stencil_ceil) { discard; } +#endif #endif vec4 C = ps_color(); @@ -845,7 +898,11 @@ void ps_main() // Must be done before alpha correction #if (PS_BLEND_C == 1 && PS_CLR_HW > 3) +#if HAS_FRAMEBUFFER_FETCH + vec4 RT = trunc(LAST_FRAG_COLOR * 255.0f + 0.1f); +#else vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f); +#endif float alpha_blend = (PS_DFMT == FMT_24) ? 1.0f : RT.a / 128.0f; #else float alpha_blend = C.a / 128.0f; @@ -886,7 +943,9 @@ void ps_main() ps_fbmask(C); SV_Target0 = C / 255.0f; +#ifndef DISABLE_DUAL_SOURCE SV_Target1 = vec4(alpha_blend); +#endif #if PS_ZCLAMP gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS); diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp index ea877c4582..f66ef20521 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp @@ -181,6 +181,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget* SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.useDebugDevice, "EmuCore/GS", "UseDebugDevice", false); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideTextureBarriers, "EmuCore/GS", "OverrideTextureBarriers", -1, -1); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideGeometryShader, "EmuCore/GS", "OverrideGeometryShaders", -1, -1); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.disableFramebufferFetch, "EmuCore/GS", "DisableFramebufferFetch", false); ////////////////////////////////////////////////////////////////////////// // SW Settings diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui index cf63ac6942..c6f37701fb 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui @@ -1032,6 +1032,13 @@ + + + + Disable Framebuffer Fetch + + + diff --git a/pcsx2/Config.h b/pcsx2/Config.h index 5696df51be..a51ad31d97 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -423,6 +423,7 @@ struct Pcsx2Config UseDebugDevice : 1, UseBlitSwapChain : 1, DisableShaderCache : 1, + DisableFramebufferFetch : 1, ThreadedPresentation : 1, OsdShowMessages : 1, OsdShowSpeed : 1, diff --git a/pcsx2/Frontend/VulkanHostDisplay.cpp b/pcsx2/Frontend/VulkanHostDisplay.cpp index 9696618821..98e09a9990 100644 --- a/pcsx2/Frontend/VulkanHostDisplay.cpp +++ b/pcsx2/Frontend/VulkanHostDisplay.cpp @@ -14,7 +14,7 @@ #include "imgui_impl_vulkan.h" #include -static constexpr u32 SHADER_CACHE_VERSION = 2; +static constexpr u32 SHADER_CACHE_VERSION = 3; class VulkanHostDisplayTexture : public HostDisplayTexture { diff --git a/pcsx2/GS/GS.cpp b/pcsx2/GS/GS.cpp index 32ec1467f7..bd664ca150 100644 --- a/pcsx2/GS/GS.cpp +++ b/pcsx2/GS/GS.cpp @@ -1308,6 +1308,7 @@ void GSApp::Init() m_default_configuration["CrcHacksExclusions"] = ""; m_default_configuration["disable_hw_gl_draw"] = "0"; m_default_configuration["disable_shader_cache"] = "0"; + m_default_configuration["DisableFramebufferFetch"] = "0"; m_default_configuration["dithering_ps2"] = "2"; m_default_configuration["dump"] = "0"; m_default_configuration["DumpReplaceableTextures"] = "0"; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index c6e66ecd12..9ee411b183 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -507,8 +507,6 @@ HWBlend GSDevice::GetBlend(size_t index) return blend; } -u16 GSDevice::GetBlendFlags(size_t index) { return m_blendMap[index].flags; } - // clang-format off std::array GSDevice::m_blendMap = diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 7b3c48cc32..7ce60dfe9b 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -544,6 +544,8 @@ public: bool prefer_new_textures : 1; ///< Allocate textures up to the pool size before reusing them, to avoid render pass restarts. bool dxt_textures : 1; ///< Supports DXTn texture compression, i.e. S3TC and BC1-3. bool bptc_textures : 1; ///< Supports BC6/7 texture compression. + bool framebuffer_fetch : 1; ///< Can sample from the framebuffer without texture barriers. + bool dual_source_blend : 1; ///< Can use alpha output as a blend factor. FeatureSupport() { memset(this, 0, sizeof(*this)); @@ -607,6 +609,12 @@ public: __fi HostDisplay* GetDisplay() const { return m_display; } __fi unsigned int GetFrameNumber() const { return m_frame; } + __fi static constexpr bool IsDualSourceBlendFactor(u16 factor) + { + return (factor == GSDevice::SRC1_ALPHA || factor == GSDevice::INV_SRC1_ALPHA + /*|| factor == GSDevice::SRC1_COLOR || factor == GSDevice::INV_SRC1_COLOR*/); // not used + } + void Recycle(GSTexture* t); enum @@ -700,7 +708,8 @@ public: // Convert the GS blend equations to HW specific blend factors/ops // Index is computed as ((((A * 3 + B) * 3) + C) * 3) + D. A, B, C, D taken from ALPHA register. HWBlend GetBlend(size_t index); - u16 GetBlendFlags(size_t index); + __fi HWBlend GetUnconvertedBlend(size_t index) { return m_blendMap[index]; } + __fi u16 GetBlendFlags(size_t index) const { return m_blendMap[index].flags; } }; struct GSAdapter diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index 6920167119..a5c16b262a 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -53,6 +53,8 @@ GSDevice11::GSDevice11() m_features.prefer_new_textures = false; m_features.dxt_textures = false; m_features.bptc_textures = false; + m_features.framebuffer_fetch = false; + m_features.dual_source_blend = true; } bool GSDevice11::Create(HostDisplay* display) diff --git a/pcsx2/GS/Renderers/HW/GSRendererNew.cpp b/pcsx2/GS/Renderers/HW/GSRendererNew.cpp index 02fc5f76ea..45e57f9cde 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererNew.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererNew.cpp @@ -183,7 +183,8 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask() // m_texture_shuffle = false; bool enable_fbmask_emulation = false; - if (g_gs_device->Features().texture_barrier) + const GSDevice::FeatureSupport features = g_gs_device->Features(); + if (features.texture_barrier) { enable_fbmask_emulation = GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum; } @@ -224,7 +225,7 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask() // If date is enabled you need to test the green channel instead of the // alpha channel. Only enable this code in DATE mode to reduce the number // of shader. - m_conf.ps.write_rg = !write_ba && g_gs_device->Features().texture_barrier && m_context->TEST.DATE; + m_conf.ps.write_rg = !write_ba && features.texture_barrier && m_context->TEST.DATE; m_conf.ps.read_ba = read_ba; @@ -278,15 +279,15 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask() m_conf.cb_ps.FbMask.a = ba_mask; // No blending so hit unsafe path. - if (!PRIM->ABE || !g_gs_device->Features().texture_barrier) + if (!PRIM->ABE || !features.texture_barrier) { GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on tex shuffle", fbmask); - m_conf.require_one_barrier = true; + m_conf.require_one_barrier = features.texture_barrier; } else { GL_INS("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask); - m_conf.require_full_barrier = true; + m_conf.require_full_barrier = features.texture_barrier; } } else @@ -339,14 +340,14 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask() { GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, (m_conf.ps.dfmt == 2) ? 16 : 32); - m_conf.require_one_barrier = true; + m_conf.require_one_barrier = features.texture_barrier; } else { // The safe and accurate path (but slow) GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, (m_conf.ps.dfmt == 2) ? 16 : 32); - m_conf.require_full_barrier = true; + m_conf.require_full_barrier = features.texture_barrier; } } } @@ -492,7 +493,7 @@ void GSRendererNew::EmulateChannelShuffle(const GSTextureCache::Source* tex) // sample from fb instead m_conf.tex = nullptr; m_conf.ps.tex_is_fb = true; - m_conf.require_one_barrier = true; + m_conf.require_one_barrier = !g_gs_device->Features().framebuffer_fetch; } else if (m_conf.tex == m_conf.ds) { @@ -539,6 +540,7 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER) } // Compute the blending equation to detect special case + const GSDevice::FeatureSupport features(g_gs_device->Features()); const GIFRegALPHA& ALPHA = m_context->ALPHA; // Set blending to shader bits @@ -627,20 +629,31 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER) && (m_env.COLCLAMP.CLAMP) // Let's add a colclamp check too, hw blend will clamp to 0-1. && !(m_conf.require_one_barrier || m_conf.require_full_barrier); // Also don't run if there are barriers present. + bool sw_blending = false; + if (!features.dual_source_blend) + { + const HWBlend unconverted_blend = g_gs_device->GetUnconvertedBlend(blend_index); + if (GSDevice::IsDualSourceBlendFactor(unconverted_blend.dst) || + GSDevice::IsDualSourceBlendFactor(unconverted_blend.src)) + { + sw_blending = true; + } + } + // Warning no break on purpose // Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks. - bool sw_blending = false; - if (g_gs_device->Features().texture_barrier) + if (features.texture_barrier) { // Condition 1: Require full sw blend for full barrier. // Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead. const bool prefer_sw_blend = m_conf.require_full_barrier || (m_conf.require_one_barrier && m_prim_overlap == PRIM_OVERLAP_NO); // SW Blend is (nearly) free. Let's use it. + const bool no_prim_overlap = features.framebuffer_fetch ? (m_vt.m_primclass == GS_SPRITE_CLASS) : (m_prim_overlap == PRIM_OVERLAP_NO); const bool impossible_or_free_blend = (blend_flag & BLEND_A_MAX) // Impossible blending || blend_non_recursive // Free sw blending, doesn't require barriers or reading fb || accumulation_blend // Mix of hw/sw blending - || (m_prim_overlap == PRIM_OVERLAP_NO) // Blend can be done in a single draw + || no_prim_overlap // Blend can be done in a single draw || (m_conf.require_full_barrier); // Another effect (for example fbmask) already requires a full barrier switch (GSConfig.AccurateBlendingUnit) @@ -731,7 +744,9 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER) if (m_env.COLCLAMP.CLAMP == 0) { bool free_colclip = false; - if (g_gs_device->Features().texture_barrier) + if (features.framebuffer_fetch) + free_colclip = true; + else if (features.texture_barrier) free_colclip = m_prim_overlap == PRIM_OVERLAP_NO || blend_non_recursive; else free_colclip = blend_non_recursive; @@ -780,7 +795,7 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER) if (sw_blending) { GL_INS("PABE mode ENABLED"); - if (g_gs_device->Features().texture_barrier) + if (features.texture_barrier) { // Disable hw/sw blend and do pure sw blend with reading the framebuffer. color_dest_blend = false; @@ -889,7 +904,7 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER) const bool blend_non_recursive_one_barrier = blend_non_recursive && blend_ad_alpha_masked; if (blend_non_recursive_one_barrier) m_conf.require_one_barrier |= true; - else if (g_gs_device->Features().texture_barrier) + else if (features.texture_barrier) m_conf.require_full_barrier |= !blend_non_recursive; else m_conf.require_one_barrier |= !blend_non_recursive; @@ -1294,6 +1309,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour const GSVector2i& rtsize = ds ? ds->GetSize() : rt->GetSize(); const GSVector2& rtscale = ds ? ds->GetScale() : rt->GetScale(); + const GSDevice::FeatureSupport features(g_gs_device->Features()); const bool DATE = m_context->TEST.DATE && m_context->FRAME.PSM != PSM_PSMCT24; bool DATE_PRIMID = false; @@ -1324,10 +1340,13 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Upscaling hack to avoid various line/grid issues MergeSprite(tex); - m_prim_overlap = PrimitiveOverlap(); + if (!features.framebuffer_fetch) + m_prim_overlap = PrimitiveOverlap(); + else + m_prim_overlap = PRIM_OVERLAP_UNKNOW; // Detect framebuffer read that will need special handling - if (g_gs_device->Features().texture_barrier && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum) + if (features.texture_barrier && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum) { const u32 fb_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk; if (((m_context->FRAME.FBMSK & fb_mask) == (fb_mask & 0x00FFFFFF)) && (m_vt.m_primclass == GS_TRIANGLE_CLASS)) @@ -1337,7 +1356,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Tri-Ace (Star Ocean 3/RadiataStories/VP2) uses a palette to handle the +1/-1 GL_DBG("Source and Target are the same! Let's sample the framebuffer"); m_conf.ps.tex_is_fb = 1; - m_conf.require_full_barrier = true; + m_conf.require_full_barrier = !features.framebuffer_fetch; } else if (m_prim_overlap != PRIM_OVERLAP_NO) { @@ -1354,11 +1373,16 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour { // It is way too complex to emulate texture shuffle with DATE, so use accurate path. // No overlap should be triggered on gl/vk only as they support DATE_BARRIER. - const bool no_overlap = (g_gs_device->Features().texture_barrier) && (m_prim_overlap == PRIM_OVERLAP_NO); - if (no_overlap || m_texture_shuffle) + if (features.framebuffer_fetch) { - GL_PERF("DATE: Accurate with %s", no_overlap ? "no overlap" : "texture shuffle"); - if (g_gs_device->Features().texture_barrier) + // Full DATE is "free" with framebuffer fetch. The barrier gets cleared below. + DATE_BARRIER = true; + m_conf.require_full_barrier = true; + } + else if ((features.texture_barrier && m_prim_overlap == PRIM_OVERLAP_NO) || m_texture_shuffle) + { + GL_PERF("DATE: Accurate with %s", (features.texture_barrier && m_prim_overlap == PRIM_OVERLAP_NO) ? "no overlap" : "texture shuffle"); + if (features.texture_barrier) { m_conf.require_full_barrier = true; DATE_BARRIER = true; @@ -1446,6 +1470,13 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour m_conf.blend = {}; // No blending please } + if (features.framebuffer_fetch) + { + // barriers aren't needed with fbfetch + m_conf.require_one_barrier = false; + m_conf.require_full_barrier = false; + } + if (m_conf.ps.scanmsk & 2) DATE_PRIMID = false; // to have discard in the shader work correctly @@ -1509,7 +1540,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } else if (DATE_one) { - if (g_gs_device->Features().texture_barrier) + if (features.texture_barrier) { m_conf.require_one_barrier = true; m_conf.ps.date = 5 + m_context->TEST.DATM; @@ -1615,7 +1646,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // // Use an HLE shader to sample depth directly as the alpha channel GL_INS("ICO sample depth as alpha"); - m_conf.require_full_barrier = true; + m_conf.require_full_barrier = !features.framebuffer_fetch; // Extract the depth as palette index m_conf.ps.depth_fmt = 1; m_conf.ps.channel = ChannelFetch_BLUE; diff --git a/pcsx2/GS/Renderers/OpenGL/GLLoader.cpp b/pcsx2/GS/Renderers/OpenGL/GLLoader.cpp index e45ec5b482..8a7bbc5941 100644 --- a/pcsx2/GS/Renderers/OpenGL/GLLoader.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GLLoader.cpp @@ -154,6 +154,8 @@ namespace GLLoader bool mesa_driver = false; bool in_replayer = false; + bool has_dual_source_blend = false; + bool found_framebuffer_fetch = false; bool found_geometry_shader = true; // we require GL3.3 so geometry must be supported by default bool found_GL_ARB_clear_texture = false; // DX11 GPU @@ -208,6 +210,7 @@ namespace GLLoader vendor_id_amd = true; else if (strstr(vendor, "NVIDIA Corporation")) vendor_id_nvidia = true; + #ifdef _WIN32 else if (strstr(vendor, "Intel")) vendor_id_intel = true; @@ -287,6 +290,13 @@ namespace GLLoader // Mandatory for the advance HW renderer effect. Unfortunately Mesa LLVMPIPE/SWR renderers doesn't support this extension. // Rendering might be corrupted but it could be good enough for test/virtual machine. optional("GL_ARB_texture_barrier"); + + found_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch; + if (found_framebuffer_fetch && GSConfig.DisableFramebufferFetch) + { + Console.Warning("Framebuffer fetch was found but is disabled. This will reduce performance."); + found_framebuffer_fetch = false; + } } if (vendor_id_amd) diff --git a/pcsx2/GS/Renderers/OpenGL/GLLoader.h b/pcsx2/GS/Renderers/OpenGL/GLLoader.h index 64c726c133..827c870f9a 100644 --- a/pcsx2/GS/Renderers/OpenGL/GLLoader.h +++ b/pcsx2/GS/Renderers/OpenGL/GLLoader.h @@ -39,6 +39,8 @@ namespace GLLoader extern bool in_replayer; // GL + extern bool has_dual_source_blend; + extern bool found_framebuffer_fetch; extern bool found_geometry_shader; extern bool found_GL_ARB_gpu_shader5; extern bool found_GL_ARB_shader_image_load_store; diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index db9880be5e..4ab026b91b 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -22,6 +22,7 @@ #include "GS/GSUtil.h" #include "Host.h" #include "HostDisplay.h" +#include #include #include @@ -217,11 +218,13 @@ bool GSDeviceOGL::Create(HostDisplay* display) m_features.broken_point_sampler = GLLoader::vendor_id_amd; m_features.geometry_shader = GLLoader::found_geometry_shader; m_features.image_load_store = GLLoader::found_GL_ARB_shader_image_load_store && GLLoader::found_GL_ARB_clear_texture; - m_features.texture_barrier = GSConfig.OverrideTextureBarriers != 0; + m_features.texture_barrier = GSConfig.OverrideTextureBarriers != 0 || GLLoader::found_framebuffer_fetch; m_features.provoking_vertex_last = true; - m_features.prefer_new_textures = false; m_features.dxt_textures = GL_EXT_texture_compression_s3tc; m_features.bptc_textures = GL_VERSION_4_2 || GL_ARB_texture_compression_bptc || GL_EXT_texture_compression_bptc; + m_features.prefer_new_textures = false; + m_features.framebuffer_fetch = GLLoader::found_framebuffer_fetch; + m_features.dual_source_blend = GLLoader::has_dual_source_blend; GLint point_range[2] = {}; GLint line_range[2] = {}; @@ -978,6 +981,14 @@ std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum typ header += "#extension GL_ARB_shading_language_420pack: require\n"; // Need GL version 410 header += "#extension GL_ARB_separate_shader_objects: require\n"; + if (m_features.framebuffer_fetch) + { + if (GLAD_GL_EXT_shader_framebuffer_fetch) + header += "#extension GL_EXT_shader_framebuffer_fetch : require\n"; + else if (GLAD_GL_ARM_shader_framebuffer_fetch) + header += "#extension GL_ARM_shader_framebuffer_fetch : require\n"; + } + if (GLLoader::found_GL_ARB_shader_image_load_store) { // Need GL version 420 @@ -988,6 +999,11 @@ std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum typ header += "#define DISABLE_GL42_image\n"; } + if (m_features.framebuffer_fetch) + header += "#define HAS_FRAMEBUFFER_FETCH 1\n"; + else + header += "#define HAS_FRAMEBUFFER_FETCH 0\n"; + if (GLLoader::vendor_id_amd || GLLoader::vendor_id_intel) header += "#define BROKEN_DRIVER as_usual\n"; @@ -1632,7 +1648,14 @@ void GSDeviceOGL::OMAttachDs(GSTextureOGL* ds) if (GLState::ds != id) { GLState::ds = id; - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, id, 0); + if (ds && ds->IsDss()) + { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, id, 0); + } + else + { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, id, 0); + } } } diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h index e5c86b0ff4..b7f41a8d4c 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h @@ -212,7 +212,7 @@ public: private: // Increment this constant whenever shaders change, to invalidate user's program binary cache. - static constexpr u32 SHADER_VERSION = 1; + static constexpr u32 SHADER_VERSION = 2; static FILE* m_debug_gl_file; diff --git a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp index af97489d5a..cc49691d63 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp @@ -234,11 +234,23 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format // Depth buffer case Format::DepthStencil: - gl_fmt = GL_DEPTH32F_STENCIL8; - m_int_format = GL_DEPTH_STENCIL; - m_int_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; - m_int_shift = 3; // 4 bytes for depth + 4 bytes for stencil by texels - break; + { + if (!GLLoader::found_framebuffer_fetch) + { + gl_fmt = GL_DEPTH32F_STENCIL8; + m_int_format = GL_DEPTH_STENCIL; + m_int_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + m_int_shift = 3; // 4 bytes for depth + 4 bytes for stencil by texels + } + else + { + gl_fmt = GL_DEPTH_COMPONENT32F; + m_int_format = GL_DEPTH_COMPONENT; + m_int_type = GL_FLOAT; + m_int_shift = 2; + } + } + break; case Format::BC1: gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; @@ -622,7 +634,7 @@ bool GSTextureOGL::Save(const std::string& fn) GSPng::Format fmt = GSPng::RGB_PNG; #endif - if (IsDss()) + if (IsDepth()) { glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); diff --git a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h index 63df1fdf59..b05226075e 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h +++ b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h @@ -71,7 +71,16 @@ public: void Swap(GSTexture* tex) final; GSMap Read(const GSVector4i& r, AlignedBuffer& buffer); - bool IsDss() { return (m_type == Type::DepthStencil || m_type == Type::SparseDepthStencil); } + bool IsDss() { return (m_type == Type::DepthStencil || m_type == Type::SparseDepthStencil) && !GLLoader::found_framebuffer_fetch; } + bool IsDepth() { return (m_type == Type::DepthStencil || m_type == Type::SparseDepthStencil); } + bool IsIntegerFormat() const + { + return (m_int_format == GL_RED_INTEGER || m_int_format == GL_RGBA_INTEGER); + } + bool IsUnsignedFormat() const + { + return (m_int_type == GL_UNSIGNED_BYTE || m_int_type == GL_UNSIGNED_SHORT || m_int_type == GL_UNSIGNED_INT); + } u32 GetID() final { return m_texture_id; } bool HasBeenCleaned() { return m_clean; } diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 3d478f8f8f..02baac3112 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -243,6 +243,8 @@ bool GSDeviceVK::CheckFeatures() m_features.image_load_store = features.fragmentStoresAndAtomics && m_features.texture_barrier; m_features.prefer_new_textures = true; m_features.provoking_vertex_last = g_vulkan_context->GetOptionalExtensions().vk_ext_provoking_vertex; + m_features.framebuffer_fetch = false; + m_features.dual_source_blend = features.dualSrcBlend; if (!features.dualSrcBlend) Console.Warning("Vulkan driver is missing dual-source blending. This will have an impact on performance."); diff --git a/pcsx2/GS/Window/GSwxDialog.cpp b/pcsx2/GS/Window/GSwxDialog.cpp index 1700aa675b..3040c292d3 100644 --- a/pcsx2/GS/Window/GSwxDialog.cpp +++ b/pcsx2/GS/Window/GSwxDialog.cpp @@ -541,10 +541,11 @@ DebugTab::DebugTab(wxWindow* parent) { PaddedBoxSizer debug_box(wxVERTICAL, this, "Debug"); auto* debug_check_box = new wxWrapSizer(wxHORIZONTAL); - m_ui.addCheckBox(debug_check_box, "Use Blit Swap Chain", "UseBlitSwapChain"); - m_ui.addCheckBox(debug_check_box, "Disable Shader Cache", "disable_shader_cache"); - m_ui.addCheckBox(debug_check_box, "Use Debug Device", "UseDebugDevice"); - m_ui.addCheckBox(debug_check_box, "Dump GS data", "dump"); + m_ui.addCheckBox(debug_check_box, "Use Blit Swap Chain", "UseBlitSwapChain"); + m_ui.addCheckBox(debug_check_box, "Disable Shader Cache", "disable_shader_cache"); + m_ui.addCheckBox(debug_check_box, "Disable Framebuffer Fetch", "DisableFramebufferFetch"); + m_ui.addCheckBox(debug_check_box, "Use Debug Device", "UseDebugDevice"); + m_ui.addCheckBox(debug_check_box, "Dump GS data", "dump"); auto* debug_save_check_box = new wxWrapSizer(wxHORIZONTAL); m_ui.addCheckBox(debug_save_check_box, "Save RT", "save"); diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index f6fa863160..4e93cbf0da 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -295,6 +295,7 @@ Pcsx2Config::GSOptions::GSOptions() UseDebugDevice = false; UseBlitSwapChain = false; DisableShaderCache = false; + DisableFramebufferFetch = false; ThreadedPresentation = false; OsdShowMessages = true; OsdShowSpeed = false; @@ -421,6 +422,7 @@ bool Pcsx2Config::GSOptions::RestartOptionsAreEqual(const GSOptions& right) cons OpEqu(UseDebugDevice) && OpEqu(UseBlitSwapChain) && OpEqu(DisableShaderCache) && + OpEqu(DisableFramebufferFetch) && OpEqu(ThreadedPresentation) && OpEqu(OverrideTextureBarriers) && OpEqu(OverrideGeometryShaders); @@ -501,6 +503,7 @@ void Pcsx2Config::GSOptions::ReloadIniSettings() GSSettingBool(UseDebugDevice); GSSettingBool(UseBlitSwapChain); GSSettingBoolEx(DisableShaderCache, "disable_shader_cache"); + GSSettingBool(DisableFramebufferFetch); GSSettingBool(ThreadedPresentation); GSSettingBool(OsdShowMessages); GSSettingBool(OsdShowSpeed);