diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl
index 212526bcd5..b6c813900a 100644
--- a/bin/resources/shaders/opengl/tfx_fs.glsl
+++ b/bin/resources/shaders/opengl/tfx_fs.glsl
@@ -20,6 +20,7 @@
#endif
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
+#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
#ifdef FRAGMENT_SHADER
@@ -38,12 +39,30 @@ in SHADER
#endif
} PSin;
-// Same buffer but 2 colors for dual source blending
-layout(location = 0, index = 0) out vec4 SV_Target0;
-layout(location = 0, index = 1) out vec4 SV_Target1;
+#define TARGET_0_QUALIFIER out
+
+// Only enable framebuffer fetch when we actually need it.
+#if HAS_FRAMEBUFFER_FETCH && (PS_TEX_IS_FB == 1 || PS_FBMASK || SW_BLEND_NEEDS_RT || PS_DATE != 0)
+ #if defined(GL_EXT_shader_framebuffer_fetch)
+ #undef TARGET_0_QUALIFIER
+ #define TARGET_0_QUALIFIER inout
+ #define LAST_FRAG_COLOR SV_Target0
+ #endif
+#endif
+
+#ifndef DISABLE_DUAL_SOURCE
+ // Same buffer but 2 colors for dual source blending
+ layout(location = 0, index = 0) TARGET_0_QUALIFIER vec4 SV_Target0;
+ layout(location = 0, index = 1) out vec4 SV_Target1;
+#else
+ layout(location = 0) TARGET_0_QUALIFIER vec4 SV_Target0;
+#endif
layout(binding = 1) uniform sampler2D PaletteSampler;
+
+#if !HAS_FRAMEBUFFER_FETCH
layout(binding = 2) uniform sampler2D RtSampler; // note 2 already use by the image below
+#endif
#ifndef DISABLE_GL42_image
#if PS_DATE > 0
@@ -79,7 +98,11 @@ layout(early_fragment_tests) in;
vec4 sample_c(vec2 uv)
{
#if PS_TEX_IS_FB == 1
+#if HAS_FRAMEBUFFER_FETCH
+ return LAST_FRAG_COLOR;
+#else
return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
+#endif
#else
#if PS_POINT_SAMPLER
@@ -234,7 +257,11 @@ mat4 sample_4p(vec4 u)
int fetch_raw_depth()
{
#if PS_TEX_IS_FB == 1
+#if HAS_FRAMEBUFFER_FETCH
+ return int(LAST_FRAG_COLOR.r * exp2(32.0f));
+#else
return int(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f));
+#endif
#else
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f));
#endif
@@ -243,7 +270,11 @@ int fetch_raw_depth()
vec4 fetch_raw_color()
{
#if PS_TEX_IS_FB == 1
+#if HAS_FRAMEBUFFER_FETCH
+ return LAST_FRAG_COLOR;
+#else
return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
+#endif
#else
return texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0);
#endif
@@ -603,7 +634,11 @@ void ps_fbmask(inout vec4 C)
{
// FIXME do I need special case for 16 bits
#if PS_FBMASK
+#if HAS_FRAMEBUFFER_FETCH
+ vec4 RT = trunc(LAST_FRAG_COLOR * 255.0f + 0.1f);
+#else
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
+#endif
C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));
#endif
}
@@ -659,7 +694,14 @@ void ps_blend(inout vec4 Color, float As)
return;
#endif
+ vec3 Cs = Color.rgb;
+
+#if SW_BLEND_NEEDS_RT
+#if HAS_FRAMEBUFFER_FETCH
+ vec4 RT = trunc(LAST_FRAG_COLOR * 255.0f + 0.1f);
+#else
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
+#endif
#if PS_DFMT == FMT_24
float Ad = 1.0f;
@@ -671,7 +713,7 @@ void ps_blend(inout vec4 Color, float As)
// Let the compiler do its jobs !
vec3 Cd = RT.rgb;
- vec3 Cs = Color.rgb;
+#endif
#if PS_BLEND_A == 0
vec3 A = Cs;
@@ -748,14 +790,24 @@ void ps_main()
if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1))
discard;
#endif
+
+#if PS_DATE != 0
#if ((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2)
#if PS_WRITE_RG == 1
// Pseudo 16 bits access.
+#if HAS_FRAMEBUFFER_FETCH
+ float rt_a = LAST_FRAG_COLOR.g;
+#else
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).g;
+#endif
+#else
+#if HAS_FRAMEBUFFER_FETCH
+ float rt_a = LAST_FRAG_COLOR.a;
#else
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;
#endif
+#endif
#if (PS_DATE & 3) == 1
// DATM == 0: Pixel with alpha equal to 1 will failed
@@ -784,6 +836,7 @@ void ps_main()
if (gl_PrimitiveID > stencil_ceil) {
discard;
}
+#endif
#endif
vec4 C = ps_color();
@@ -845,7 +898,11 @@ void ps_main()
// Must be done before alpha correction
#if (PS_BLEND_C == 1 && PS_CLR_HW > 3)
+#if HAS_FRAMEBUFFER_FETCH
+ vec4 RT = trunc(LAST_FRAG_COLOR * 255.0f + 0.1f);
+#else
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
+#endif
float alpha_blend = (PS_DFMT == FMT_24) ? 1.0f : RT.a / 128.0f;
#else
float alpha_blend = C.a / 128.0f;
@@ -886,7 +943,9 @@ void ps_main()
ps_fbmask(C);
SV_Target0 = C / 255.0f;
+#ifndef DISABLE_DUAL_SOURCE
SV_Target1 = vec4(alpha_blend);
+#endif
#if PS_ZCLAMP
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp
index ea877c4582..f66ef20521 100644
--- a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp
+++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp
@@ -181,6 +181,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.useDebugDevice, "EmuCore/GS", "UseDebugDevice", false);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideTextureBarriers, "EmuCore/GS", "OverrideTextureBarriers", -1, -1);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideGeometryShader, "EmuCore/GS", "OverrideGeometryShaders", -1, -1);
+ SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.disableFramebufferFetch, "EmuCore/GS", "DisableFramebufferFetch", false);
//////////////////////////////////////////////////////////////////////////
// SW Settings
diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui
index cf63ac6942..c6f37701fb 100644
--- a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui
+++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui
@@ -1032,6 +1032,13 @@
+ -
+
+
+ Disable Framebuffer Fetch
+
+
+
diff --git a/pcsx2/Config.h b/pcsx2/Config.h
index 5696df51be..a51ad31d97 100644
--- a/pcsx2/Config.h
+++ b/pcsx2/Config.h
@@ -423,6 +423,7 @@ struct Pcsx2Config
UseDebugDevice : 1,
UseBlitSwapChain : 1,
DisableShaderCache : 1,
+ DisableFramebufferFetch : 1,
ThreadedPresentation : 1,
OsdShowMessages : 1,
OsdShowSpeed : 1,
diff --git a/pcsx2/Frontend/VulkanHostDisplay.cpp b/pcsx2/Frontend/VulkanHostDisplay.cpp
index 9696618821..98e09a9990 100644
--- a/pcsx2/Frontend/VulkanHostDisplay.cpp
+++ b/pcsx2/Frontend/VulkanHostDisplay.cpp
@@ -14,7 +14,7 @@
#include "imgui_impl_vulkan.h"
#include
-static constexpr u32 SHADER_CACHE_VERSION = 2;
+static constexpr u32 SHADER_CACHE_VERSION = 3;
class VulkanHostDisplayTexture : public HostDisplayTexture
{
diff --git a/pcsx2/GS/GS.cpp b/pcsx2/GS/GS.cpp
index 32ec1467f7..bd664ca150 100644
--- a/pcsx2/GS/GS.cpp
+++ b/pcsx2/GS/GS.cpp
@@ -1308,6 +1308,7 @@ void GSApp::Init()
m_default_configuration["CrcHacksExclusions"] = "";
m_default_configuration["disable_hw_gl_draw"] = "0";
m_default_configuration["disable_shader_cache"] = "0";
+ m_default_configuration["DisableFramebufferFetch"] = "0";
m_default_configuration["dithering_ps2"] = "2";
m_default_configuration["dump"] = "0";
m_default_configuration["DumpReplaceableTextures"] = "0";
diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp
index c6e66ecd12..9ee411b183 100644
--- a/pcsx2/GS/Renderers/Common/GSDevice.cpp
+++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp
@@ -507,8 +507,6 @@ HWBlend GSDevice::GetBlend(size_t index)
return blend;
}
-u16 GSDevice::GetBlendFlags(size_t index) { return m_blendMap[index].flags; }
-
// clang-format off
std::array GSDevice::m_blendMap =
diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h
index 7b3c48cc32..7ce60dfe9b 100644
--- a/pcsx2/GS/Renderers/Common/GSDevice.h
+++ b/pcsx2/GS/Renderers/Common/GSDevice.h
@@ -544,6 +544,8 @@ public:
bool prefer_new_textures : 1; ///< Allocate textures up to the pool size before reusing them, to avoid render pass restarts.
bool dxt_textures : 1; ///< Supports DXTn texture compression, i.e. S3TC and BC1-3.
bool bptc_textures : 1; ///< Supports BC6/7 texture compression.
+ bool framebuffer_fetch : 1; ///< Can sample from the framebuffer without texture barriers.
+ bool dual_source_blend : 1; ///< Can use alpha output as a blend factor.
FeatureSupport()
{
memset(this, 0, sizeof(*this));
@@ -607,6 +609,12 @@ public:
__fi HostDisplay* GetDisplay() const { return m_display; }
__fi unsigned int GetFrameNumber() const { return m_frame; }
+ __fi static constexpr bool IsDualSourceBlendFactor(u16 factor)
+ {
+ return (factor == GSDevice::SRC1_ALPHA || factor == GSDevice::INV_SRC1_ALPHA
+ /*|| factor == GSDevice::SRC1_COLOR || factor == GSDevice::INV_SRC1_COLOR*/); // not used
+ }
+
void Recycle(GSTexture* t);
enum
@@ -700,7 +708,8 @@ public:
// Convert the GS blend equations to HW specific blend factors/ops
// Index is computed as ((((A * 3 + B) * 3) + C) * 3) + D. A, B, C, D taken from ALPHA register.
HWBlend GetBlend(size_t index);
- u16 GetBlendFlags(size_t index);
+ __fi HWBlend GetUnconvertedBlend(size_t index) { return m_blendMap[index]; }
+ __fi u16 GetBlendFlags(size_t index) const { return m_blendMap[index].flags; }
};
struct GSAdapter
diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp
index 6920167119..a5c16b262a 100644
--- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp
+++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp
@@ -53,6 +53,8 @@ GSDevice11::GSDevice11()
m_features.prefer_new_textures = false;
m_features.dxt_textures = false;
m_features.bptc_textures = false;
+ m_features.framebuffer_fetch = false;
+ m_features.dual_source_blend = true;
}
bool GSDevice11::Create(HostDisplay* display)
diff --git a/pcsx2/GS/Renderers/HW/GSRendererNew.cpp b/pcsx2/GS/Renderers/HW/GSRendererNew.cpp
index 02fc5f76ea..45e57f9cde 100644
--- a/pcsx2/GS/Renderers/HW/GSRendererNew.cpp
+++ b/pcsx2/GS/Renderers/HW/GSRendererNew.cpp
@@ -183,7 +183,8 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask()
// m_texture_shuffle = false;
bool enable_fbmask_emulation = false;
- if (g_gs_device->Features().texture_barrier)
+ const GSDevice::FeatureSupport features = g_gs_device->Features();
+ if (features.texture_barrier)
{
enable_fbmask_emulation = GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum;
}
@@ -224,7 +225,7 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask()
// If date is enabled you need to test the green channel instead of the
// alpha channel. Only enable this code in DATE mode to reduce the number
// of shader.
- m_conf.ps.write_rg = !write_ba && g_gs_device->Features().texture_barrier && m_context->TEST.DATE;
+ m_conf.ps.write_rg = !write_ba && features.texture_barrier && m_context->TEST.DATE;
m_conf.ps.read_ba = read_ba;
@@ -278,15 +279,15 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask()
m_conf.cb_ps.FbMask.a = ba_mask;
// No blending so hit unsafe path.
- if (!PRIM->ABE || !g_gs_device->Features().texture_barrier)
+ if (!PRIM->ABE || !features.texture_barrier)
{
GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on tex shuffle", fbmask);
- m_conf.require_one_barrier = true;
+ m_conf.require_one_barrier = features.texture_barrier;
}
else
{
GL_INS("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask);
- m_conf.require_full_barrier = true;
+ m_conf.require_full_barrier = features.texture_barrier;
}
}
else
@@ -339,14 +340,14 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask()
{
GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK,
(m_conf.ps.dfmt == 2) ? 16 : 32);
- m_conf.require_one_barrier = true;
+ m_conf.require_one_barrier = features.texture_barrier;
}
else
{
// The safe and accurate path (but slow)
GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK,
(m_conf.ps.dfmt == 2) ? 16 : 32);
- m_conf.require_full_barrier = true;
+ m_conf.require_full_barrier = features.texture_barrier;
}
}
}
@@ -492,7 +493,7 @@ void GSRendererNew::EmulateChannelShuffle(const GSTextureCache::Source* tex)
// sample from fb instead
m_conf.tex = nullptr;
m_conf.ps.tex_is_fb = true;
- m_conf.require_one_barrier = true;
+ m_conf.require_one_barrier = !g_gs_device->Features().framebuffer_fetch;
}
else if (m_conf.tex == m_conf.ds)
{
@@ -539,6 +540,7 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
}
// Compute the blending equation to detect special case
+ const GSDevice::FeatureSupport features(g_gs_device->Features());
const GIFRegALPHA& ALPHA = m_context->ALPHA;
// Set blending to shader bits
@@ -627,20 +629,31 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
&& (m_env.COLCLAMP.CLAMP) // Let's add a colclamp check too, hw blend will clamp to 0-1.
&& !(m_conf.require_one_barrier || m_conf.require_full_barrier); // Also don't run if there are barriers present.
+ bool sw_blending = false;
+ if (!features.dual_source_blend)
+ {
+ const HWBlend unconverted_blend = g_gs_device->GetUnconvertedBlend(blend_index);
+ if (GSDevice::IsDualSourceBlendFactor(unconverted_blend.dst) ||
+ GSDevice::IsDualSourceBlendFactor(unconverted_blend.src))
+ {
+ sw_blending = true;
+ }
+ }
+
// Warning no break on purpose
// Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks.
- bool sw_blending = false;
- if (g_gs_device->Features().texture_barrier)
+ if (features.texture_barrier)
{
// Condition 1: Require full sw blend for full barrier.
// Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead.
const bool prefer_sw_blend = m_conf.require_full_barrier || (m_conf.require_one_barrier && m_prim_overlap == PRIM_OVERLAP_NO);
// SW Blend is (nearly) free. Let's use it.
+ const bool no_prim_overlap = features.framebuffer_fetch ? (m_vt.m_primclass == GS_SPRITE_CLASS) : (m_prim_overlap == PRIM_OVERLAP_NO);
const bool impossible_or_free_blend = (blend_flag & BLEND_A_MAX) // Impossible blending
|| blend_non_recursive // Free sw blending, doesn't require barriers or reading fb
|| accumulation_blend // Mix of hw/sw blending
- || (m_prim_overlap == PRIM_OVERLAP_NO) // Blend can be done in a single draw
+ || no_prim_overlap // Blend can be done in a single draw
|| (m_conf.require_full_barrier); // Another effect (for example fbmask) already requires a full barrier
switch (GSConfig.AccurateBlendingUnit)
@@ -731,7 +744,9 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
if (m_env.COLCLAMP.CLAMP == 0)
{
bool free_colclip = false;
- if (g_gs_device->Features().texture_barrier)
+ if (features.framebuffer_fetch)
+ free_colclip = true;
+ else if (features.texture_barrier)
free_colclip = m_prim_overlap == PRIM_OVERLAP_NO || blend_non_recursive;
else
free_colclip = blend_non_recursive;
@@ -780,7 +795,7 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
if (sw_blending)
{
GL_INS("PABE mode ENABLED");
- if (g_gs_device->Features().texture_barrier)
+ if (features.texture_barrier)
{
// Disable hw/sw blend and do pure sw blend with reading the framebuffer.
color_dest_blend = false;
@@ -889,7 +904,7 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
const bool blend_non_recursive_one_barrier = blend_non_recursive && blend_ad_alpha_masked;
if (blend_non_recursive_one_barrier)
m_conf.require_one_barrier |= true;
- else if (g_gs_device->Features().texture_barrier)
+ else if (features.texture_barrier)
m_conf.require_full_barrier |= !blend_non_recursive;
else
m_conf.require_one_barrier |= !blend_non_recursive;
@@ -1294,6 +1309,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
const GSVector2i& rtsize = ds ? ds->GetSize() : rt->GetSize();
const GSVector2& rtscale = ds ? ds->GetScale() : rt->GetScale();
+ const GSDevice::FeatureSupport features(g_gs_device->Features());
const bool DATE = m_context->TEST.DATE && m_context->FRAME.PSM != PSM_PSMCT24;
bool DATE_PRIMID = false;
@@ -1324,10 +1340,13 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Upscaling hack to avoid various line/grid issues
MergeSprite(tex);
- m_prim_overlap = PrimitiveOverlap();
+ if (!features.framebuffer_fetch)
+ m_prim_overlap = PrimitiveOverlap();
+ else
+ m_prim_overlap = PRIM_OVERLAP_UNKNOW;
// Detect framebuffer read that will need special handling
- if (g_gs_device->Features().texture_barrier && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum)
+ if (features.texture_barrier && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum)
{
const u32 fb_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk;
if (((m_context->FRAME.FBMSK & fb_mask) == (fb_mask & 0x00FFFFFF)) && (m_vt.m_primclass == GS_TRIANGLE_CLASS))
@@ -1337,7 +1356,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Tri-Ace (Star Ocean 3/RadiataStories/VP2) uses a palette to handle the +1/-1
GL_DBG("Source and Target are the same! Let's sample the framebuffer");
m_conf.ps.tex_is_fb = 1;
- m_conf.require_full_barrier = true;
+ m_conf.require_full_barrier = !features.framebuffer_fetch;
}
else if (m_prim_overlap != PRIM_OVERLAP_NO)
{
@@ -1354,11 +1373,16 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
{
// It is way too complex to emulate texture shuffle with DATE, so use accurate path.
// No overlap should be triggered on gl/vk only as they support DATE_BARRIER.
- const bool no_overlap = (g_gs_device->Features().texture_barrier) && (m_prim_overlap == PRIM_OVERLAP_NO);
- if (no_overlap || m_texture_shuffle)
+ if (features.framebuffer_fetch)
{
- GL_PERF("DATE: Accurate with %s", no_overlap ? "no overlap" : "texture shuffle");
- if (g_gs_device->Features().texture_barrier)
+ // Full DATE is "free" with framebuffer fetch. The barrier gets cleared below.
+ DATE_BARRIER = true;
+ m_conf.require_full_barrier = true;
+ }
+ else if ((features.texture_barrier && m_prim_overlap == PRIM_OVERLAP_NO) || m_texture_shuffle)
+ {
+ GL_PERF("DATE: Accurate with %s", (features.texture_barrier && m_prim_overlap == PRIM_OVERLAP_NO) ? "no overlap" : "texture shuffle");
+ if (features.texture_barrier)
{
m_conf.require_full_barrier = true;
DATE_BARRIER = true;
@@ -1446,6 +1470,13 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
m_conf.blend = {}; // No blending please
}
+ if (features.framebuffer_fetch)
+ {
+ // barriers aren't needed with fbfetch
+ m_conf.require_one_barrier = false;
+ m_conf.require_full_barrier = false;
+ }
+
if (m_conf.ps.scanmsk & 2)
DATE_PRIMID = false; // to have discard in the shader work correctly
@@ -1509,7 +1540,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
}
else if (DATE_one)
{
- if (g_gs_device->Features().texture_barrier)
+ if (features.texture_barrier)
{
m_conf.require_one_barrier = true;
m_conf.ps.date = 5 + m_context->TEST.DATM;
@@ -1615,7 +1646,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
//
// Use an HLE shader to sample depth directly as the alpha channel
GL_INS("ICO sample depth as alpha");
- m_conf.require_full_barrier = true;
+ m_conf.require_full_barrier = !features.framebuffer_fetch;
// Extract the depth as palette index
m_conf.ps.depth_fmt = 1;
m_conf.ps.channel = ChannelFetch_BLUE;
diff --git a/pcsx2/GS/Renderers/OpenGL/GLLoader.cpp b/pcsx2/GS/Renderers/OpenGL/GLLoader.cpp
index e45ec5b482..8a7bbc5941 100644
--- a/pcsx2/GS/Renderers/OpenGL/GLLoader.cpp
+++ b/pcsx2/GS/Renderers/OpenGL/GLLoader.cpp
@@ -154,6 +154,8 @@ namespace GLLoader
bool mesa_driver = false;
bool in_replayer = false;
+ bool has_dual_source_blend = false;
+ bool found_framebuffer_fetch = false;
bool found_geometry_shader = true; // we require GL3.3 so geometry must be supported by default
bool found_GL_ARB_clear_texture = false;
// DX11 GPU
@@ -208,6 +210,7 @@ namespace GLLoader
vendor_id_amd = true;
else if (strstr(vendor, "NVIDIA Corporation"))
vendor_id_nvidia = true;
+
#ifdef _WIN32
else if (strstr(vendor, "Intel"))
vendor_id_intel = true;
@@ -287,6 +290,13 @@ namespace GLLoader
// Mandatory for the advance HW renderer effect. Unfortunately Mesa LLVMPIPE/SWR renderers doesn't support this extension.
// Rendering might be corrupted but it could be good enough for test/virtual machine.
optional("GL_ARB_texture_barrier");
+
+ found_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch;
+ if (found_framebuffer_fetch && GSConfig.DisableFramebufferFetch)
+ {
+ Console.Warning("Framebuffer fetch was found but is disabled. This will reduce performance.");
+ found_framebuffer_fetch = false;
+ }
}
if (vendor_id_amd)
diff --git a/pcsx2/GS/Renderers/OpenGL/GLLoader.h b/pcsx2/GS/Renderers/OpenGL/GLLoader.h
index 64c726c133..827c870f9a 100644
--- a/pcsx2/GS/Renderers/OpenGL/GLLoader.h
+++ b/pcsx2/GS/Renderers/OpenGL/GLLoader.h
@@ -39,6 +39,8 @@ namespace GLLoader
extern bool in_replayer;
// GL
+ extern bool has_dual_source_blend;
+ extern bool found_framebuffer_fetch;
extern bool found_geometry_shader;
extern bool found_GL_ARB_gpu_shader5;
extern bool found_GL_ARB_shader_image_load_store;
diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp
index db9880be5e..4ab026b91b 100644
--- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp
+++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp
@@ -22,6 +22,7 @@
#include "GS/GSUtil.h"
#include "Host.h"
#include "HostDisplay.h"
+#include
#include
#include
@@ -217,11 +218,13 @@ bool GSDeviceOGL::Create(HostDisplay* display)
m_features.broken_point_sampler = GLLoader::vendor_id_amd;
m_features.geometry_shader = GLLoader::found_geometry_shader;
m_features.image_load_store = GLLoader::found_GL_ARB_shader_image_load_store && GLLoader::found_GL_ARB_clear_texture;
- m_features.texture_barrier = GSConfig.OverrideTextureBarriers != 0;
+ m_features.texture_barrier = GSConfig.OverrideTextureBarriers != 0 || GLLoader::found_framebuffer_fetch;
m_features.provoking_vertex_last = true;
- m_features.prefer_new_textures = false;
m_features.dxt_textures = GL_EXT_texture_compression_s3tc;
m_features.bptc_textures = GL_VERSION_4_2 || GL_ARB_texture_compression_bptc || GL_EXT_texture_compression_bptc;
+ m_features.prefer_new_textures = false;
+ m_features.framebuffer_fetch = GLLoader::found_framebuffer_fetch;
+ m_features.dual_source_blend = GLLoader::has_dual_source_blend;
GLint point_range[2] = {};
GLint line_range[2] = {};
@@ -978,6 +981,14 @@ std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum typ
header += "#extension GL_ARB_shading_language_420pack: require\n";
// Need GL version 410
header += "#extension GL_ARB_separate_shader_objects: require\n";
+ if (m_features.framebuffer_fetch)
+ {
+ if (GLAD_GL_EXT_shader_framebuffer_fetch)
+ header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
+ else if (GLAD_GL_ARM_shader_framebuffer_fetch)
+ header += "#extension GL_ARM_shader_framebuffer_fetch : require\n";
+ }
+
if (GLLoader::found_GL_ARB_shader_image_load_store)
{
// Need GL version 420
@@ -988,6 +999,11 @@ std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum typ
header += "#define DISABLE_GL42_image\n";
}
+ if (m_features.framebuffer_fetch)
+ header += "#define HAS_FRAMEBUFFER_FETCH 1\n";
+ else
+ header += "#define HAS_FRAMEBUFFER_FETCH 0\n";
+
if (GLLoader::vendor_id_amd || GLLoader::vendor_id_intel)
header += "#define BROKEN_DRIVER as_usual\n";
@@ -1632,7 +1648,14 @@ void GSDeviceOGL::OMAttachDs(GSTextureOGL* ds)
if (GLState::ds != id)
{
GLState::ds = id;
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, id, 0);
+ if (ds && ds->IsDss())
+ {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, id, 0);
+ }
+ else
+ {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, id, 0);
+ }
}
}
diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h
index e5c86b0ff4..b7f41a8d4c 100644
--- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h
+++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h
@@ -212,7 +212,7 @@ public:
private:
// Increment this constant whenever shaders change, to invalidate user's program binary cache.
- static constexpr u32 SHADER_VERSION = 1;
+ static constexpr u32 SHADER_VERSION = 2;
static FILE* m_debug_gl_file;
diff --git a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp
index af97489d5a..cc49691d63 100644
--- a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp
+++ b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp
@@ -234,11 +234,23 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
// Depth buffer
case Format::DepthStencil:
- gl_fmt = GL_DEPTH32F_STENCIL8;
- m_int_format = GL_DEPTH_STENCIL;
- m_int_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV;
- m_int_shift = 3; // 4 bytes for depth + 4 bytes for stencil by texels
- break;
+ {
+ if (!GLLoader::found_framebuffer_fetch)
+ {
+ gl_fmt = GL_DEPTH32F_STENCIL8;
+ m_int_format = GL_DEPTH_STENCIL;
+ m_int_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV;
+ m_int_shift = 3; // 4 bytes for depth + 4 bytes for stencil by texels
+ }
+ else
+ {
+ gl_fmt = GL_DEPTH_COMPONENT32F;
+ m_int_format = GL_DEPTH_COMPONENT;
+ m_int_type = GL_FLOAT;
+ m_int_shift = 2;
+ }
+ }
+ break;
case Format::BC1:
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
@@ -622,7 +634,7 @@ bool GSTextureOGL::Save(const std::string& fn)
GSPng::Format fmt = GSPng::RGB_PNG;
#endif
- if (IsDss())
+ if (IsDepth())
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
diff --git a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h
index 63df1fdf59..b05226075e 100644
--- a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h
+++ b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h
@@ -71,7 +71,16 @@ public:
void Swap(GSTexture* tex) final;
GSMap Read(const GSVector4i& r, AlignedBuffer& buffer);
- bool IsDss() { return (m_type == Type::DepthStencil || m_type == Type::SparseDepthStencil); }
+ bool IsDss() { return (m_type == Type::DepthStencil || m_type == Type::SparseDepthStencil) && !GLLoader::found_framebuffer_fetch; }
+ bool IsDepth() { return (m_type == Type::DepthStencil || m_type == Type::SparseDepthStencil); }
+ bool IsIntegerFormat() const
+ {
+ return (m_int_format == GL_RED_INTEGER || m_int_format == GL_RGBA_INTEGER);
+ }
+ bool IsUnsignedFormat() const
+ {
+ return (m_int_type == GL_UNSIGNED_BYTE || m_int_type == GL_UNSIGNED_SHORT || m_int_type == GL_UNSIGNED_INT);
+ }
u32 GetID() final { return m_texture_id; }
bool HasBeenCleaned() { return m_clean; }
diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp
index 3d478f8f8f..02baac3112 100644
--- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp
+++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp
@@ -243,6 +243,8 @@ bool GSDeviceVK::CheckFeatures()
m_features.image_load_store = features.fragmentStoresAndAtomics && m_features.texture_barrier;
m_features.prefer_new_textures = true;
m_features.provoking_vertex_last = g_vulkan_context->GetOptionalExtensions().vk_ext_provoking_vertex;
+ m_features.framebuffer_fetch = false;
+ m_features.dual_source_blend = features.dualSrcBlend;
if (!features.dualSrcBlend)
Console.Warning("Vulkan driver is missing dual-source blending. This will have an impact on performance.");
diff --git a/pcsx2/GS/Window/GSwxDialog.cpp b/pcsx2/GS/Window/GSwxDialog.cpp
index 1700aa675b..3040c292d3 100644
--- a/pcsx2/GS/Window/GSwxDialog.cpp
+++ b/pcsx2/GS/Window/GSwxDialog.cpp
@@ -541,10 +541,11 @@ DebugTab::DebugTab(wxWindow* parent)
{
PaddedBoxSizer debug_box(wxVERTICAL, this, "Debug");
auto* debug_check_box = new wxWrapSizer(wxHORIZONTAL);
- m_ui.addCheckBox(debug_check_box, "Use Blit Swap Chain", "UseBlitSwapChain");
- m_ui.addCheckBox(debug_check_box, "Disable Shader Cache", "disable_shader_cache");
- m_ui.addCheckBox(debug_check_box, "Use Debug Device", "UseDebugDevice");
- m_ui.addCheckBox(debug_check_box, "Dump GS data", "dump");
+ m_ui.addCheckBox(debug_check_box, "Use Blit Swap Chain", "UseBlitSwapChain");
+ m_ui.addCheckBox(debug_check_box, "Disable Shader Cache", "disable_shader_cache");
+ m_ui.addCheckBox(debug_check_box, "Disable Framebuffer Fetch", "DisableFramebufferFetch");
+ m_ui.addCheckBox(debug_check_box, "Use Debug Device", "UseDebugDevice");
+ m_ui.addCheckBox(debug_check_box, "Dump GS data", "dump");
auto* debug_save_check_box = new wxWrapSizer(wxHORIZONTAL);
m_ui.addCheckBox(debug_save_check_box, "Save RT", "save");
diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp
index f6fa863160..4e93cbf0da 100644
--- a/pcsx2/Pcsx2Config.cpp
+++ b/pcsx2/Pcsx2Config.cpp
@@ -295,6 +295,7 @@ Pcsx2Config::GSOptions::GSOptions()
UseDebugDevice = false;
UseBlitSwapChain = false;
DisableShaderCache = false;
+ DisableFramebufferFetch = false;
ThreadedPresentation = false;
OsdShowMessages = true;
OsdShowSpeed = false;
@@ -421,6 +422,7 @@ bool Pcsx2Config::GSOptions::RestartOptionsAreEqual(const GSOptions& right) cons
OpEqu(UseDebugDevice) &&
OpEqu(UseBlitSwapChain) &&
OpEqu(DisableShaderCache) &&
+ OpEqu(DisableFramebufferFetch) &&
OpEqu(ThreadedPresentation) &&
OpEqu(OverrideTextureBarriers) &&
OpEqu(OverrideGeometryShaders);
@@ -501,6 +503,7 @@ void Pcsx2Config::GSOptions::ReloadIniSettings()
GSSettingBool(UseDebugDevice);
GSSettingBool(UseBlitSwapChain);
GSSettingBoolEx(DisableShaderCache, "disable_shader_cache");
+ GSSettingBool(DisableFramebufferFetch);
GSSettingBool(ThreadedPresentation);
GSSettingBool(OsdShowMessages);
GSSettingBool(OsdShowSpeed);