GS: Utilize GL_EXT_framebuffer_fetch where available

This commit is contained in:
Connor McLaughlin 2022-03-08 00:36:05 +10:00 committed by refractionpcsx2
parent bb75c78c1a
commit a8b9df3952
19 changed files with 217 additions and 46 deletions

View File

@ -20,6 +20,7 @@
#endif
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
#ifdef FRAGMENT_SHADER
@ -38,12 +39,30 @@ in SHADER
#endif
} PSin;
// Same buffer but 2 colors for dual source blending
layout(location = 0, index = 0) out vec4 SV_Target0;
layout(location = 0, index = 1) out vec4 SV_Target1;
#define TARGET_0_QUALIFIER out
// Only enable framebuffer fetch when we actually need it.
#if HAS_FRAMEBUFFER_FETCH && (PS_TEX_IS_FB == 1 || PS_FBMASK || SW_BLEND_NEEDS_RT || PS_DATE != 0)
#if defined(GL_EXT_shader_framebuffer_fetch)
#undef TARGET_0_QUALIFIER
#define TARGET_0_QUALIFIER inout
#define LAST_FRAG_COLOR SV_Target0
#endif
#endif
#ifndef DISABLE_DUAL_SOURCE
// Same buffer but 2 colors for dual source blending
layout(location = 0, index = 0) TARGET_0_QUALIFIER vec4 SV_Target0;
layout(location = 0, index = 1) out vec4 SV_Target1;
#else
layout(location = 0) TARGET_0_QUALIFIER vec4 SV_Target0;
#endif
layout(binding = 1) uniform sampler2D PaletteSampler;
#if !HAS_FRAMEBUFFER_FETCH
layout(binding = 2) uniform sampler2D RtSampler; // note 2 already use by the image below
#endif
#ifndef DISABLE_GL42_image
#if PS_DATE > 0
@ -79,7 +98,11 @@ layout(early_fragment_tests) in;
vec4 sample_c(vec2 uv)
{
#if PS_TEX_IS_FB == 1
#if HAS_FRAMEBUFFER_FETCH
return LAST_FRAG_COLOR;
#else
return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
#endif
#else
#if PS_POINT_SAMPLER
@ -234,7 +257,11 @@ mat4 sample_4p(vec4 u)
int fetch_raw_depth()
{
#if PS_TEX_IS_FB == 1
#if HAS_FRAMEBUFFER_FETCH
return int(LAST_FRAG_COLOR.r * exp2(32.0f));
#else
return int(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f));
#endif
#else
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f));
#endif
@ -243,7 +270,11 @@ int fetch_raw_depth()
vec4 fetch_raw_color()
{
#if PS_TEX_IS_FB == 1
#if HAS_FRAMEBUFFER_FETCH
return LAST_FRAG_COLOR;
#else
return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
#endif
#else
return texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0);
#endif
@ -603,7 +634,11 @@ void ps_fbmask(inout vec4 C)
{
// FIXME do I need special case for 16 bits
#if PS_FBMASK
#if HAS_FRAMEBUFFER_FETCH
vec4 RT = trunc(LAST_FRAG_COLOR * 255.0f + 0.1f);
#else
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
#endif
C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));
#endif
}
@ -659,7 +694,14 @@ void ps_blend(inout vec4 Color, float As)
return;
#endif
vec3 Cs = Color.rgb;
#if SW_BLEND_NEEDS_RT
#if HAS_FRAMEBUFFER_FETCH
vec4 RT = trunc(LAST_FRAG_COLOR * 255.0f + 0.1f);
#else
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
#endif
#if PS_DFMT == FMT_24
float Ad = 1.0f;
@ -671,7 +713,7 @@ void ps_blend(inout vec4 Color, float As)
// Let the compiler do its jobs !
vec3 Cd = RT.rgb;
vec3 Cs = Color.rgb;
#endif
#if PS_BLEND_A == 0
vec3 A = Cs;
@ -748,14 +790,24 @@ void ps_main()
if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1))
discard;
#endif
#if PS_DATE != 0
#if ((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2)
#if PS_WRITE_RG == 1
// Pseudo 16 bits access.
#if HAS_FRAMEBUFFER_FETCH
float rt_a = LAST_FRAG_COLOR.g;
#else
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).g;
#endif
#else
#if HAS_FRAMEBUFFER_FETCH
float rt_a = LAST_FRAG_COLOR.a;
#else
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;
#endif
#endif
#if (PS_DATE & 3) == 1
// DATM == 0: Pixel with alpha equal to 1 will failed
@ -784,6 +836,7 @@ void ps_main()
if (gl_PrimitiveID > stencil_ceil) {
discard;
}
#endif
#endif
vec4 C = ps_color();
@ -845,7 +898,11 @@ void ps_main()
// Must be done before alpha correction
#if (PS_BLEND_C == 1 && PS_CLR_HW > 3)
#if HAS_FRAMEBUFFER_FETCH
vec4 RT = trunc(LAST_FRAG_COLOR * 255.0f + 0.1f);
#else
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
#endif
float alpha_blend = (PS_DFMT == FMT_24) ? 1.0f : RT.a / 128.0f;
#else
float alpha_blend = C.a / 128.0f;
@ -886,7 +943,9 @@ void ps_main()
ps_fbmask(C);
SV_Target0 = C / 255.0f;
#ifndef DISABLE_DUAL_SOURCE
SV_Target1 = vec4(alpha_blend);
#endif
#if PS_ZCLAMP
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);

View File

@ -181,6 +181,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.useDebugDevice, "EmuCore/GS", "UseDebugDevice", false);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideTextureBarriers, "EmuCore/GS", "OverrideTextureBarriers", -1, -1);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideGeometryShader, "EmuCore/GS", "OverrideGeometryShaders", -1, -1);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.disableFramebufferFetch, "EmuCore/GS", "DisableFramebufferFetch", false);
//////////////////////////////////////////////////////////////////////////
// SW Settings

View File

@ -1032,6 +1032,13 @@
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QCheckBox" name="disableFramebufferFetch">
<property name="text">
<string>Disable Framebuffer Fetch</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>

View File

@ -423,6 +423,7 @@ struct Pcsx2Config
UseDebugDevice : 1,
UseBlitSwapChain : 1,
DisableShaderCache : 1,
DisableFramebufferFetch : 1,
ThreadedPresentation : 1,
OsdShowMessages : 1,
OsdShowSpeed : 1,

View File

@ -14,7 +14,7 @@
#include "imgui_impl_vulkan.h"
#include <array>
static constexpr u32 SHADER_CACHE_VERSION = 2;
static constexpr u32 SHADER_CACHE_VERSION = 3;
class VulkanHostDisplayTexture : public HostDisplayTexture
{

View File

@ -1308,6 +1308,7 @@ void GSApp::Init()
m_default_configuration["CrcHacksExclusions"] = "";
m_default_configuration["disable_hw_gl_draw"] = "0";
m_default_configuration["disable_shader_cache"] = "0";
m_default_configuration["DisableFramebufferFetch"] = "0";
m_default_configuration["dithering_ps2"] = "2";
m_default_configuration["dump"] = "0";
m_default_configuration["DumpReplaceableTextures"] = "0";

View File

@ -507,8 +507,6 @@ HWBlend GSDevice::GetBlend(size_t index)
return blend;
}
u16 GSDevice::GetBlendFlags(size_t index) { return m_blendMap[index].flags; }
// clang-format off
std::array<HWBlend, 3*3*3*3 + 1> GSDevice::m_blendMap =

View File

@ -544,6 +544,8 @@ public:
bool prefer_new_textures : 1; ///< Allocate textures up to the pool size before reusing them, to avoid render pass restarts.
bool dxt_textures : 1; ///< Supports DXTn texture compression, i.e. S3TC and BC1-3.
bool bptc_textures : 1; ///< Supports BC6/7 texture compression.
bool framebuffer_fetch : 1; ///< Can sample from the framebuffer without texture barriers.
bool dual_source_blend : 1; ///< Can use alpha output as a blend factor.
FeatureSupport()
{
memset(this, 0, sizeof(*this));
@ -607,6 +609,12 @@ public:
__fi HostDisplay* GetDisplay() const { return m_display; }
__fi unsigned int GetFrameNumber() const { return m_frame; }
__fi static constexpr bool IsDualSourceBlendFactor(u16 factor)
{
return (factor == GSDevice::SRC1_ALPHA || factor == GSDevice::INV_SRC1_ALPHA
/*|| factor == GSDevice::SRC1_COLOR || factor == GSDevice::INV_SRC1_COLOR*/); // not used
}
void Recycle(GSTexture* t);
enum
@ -700,7 +708,8 @@ public:
// Convert the GS blend equations to HW specific blend factors/ops
// Index is computed as ((((A * 3 + B) * 3) + C) * 3) + D. A, B, C, D taken from ALPHA register.
HWBlend GetBlend(size_t index);
u16 GetBlendFlags(size_t index);
__fi HWBlend GetUnconvertedBlend(size_t index) { return m_blendMap[index]; }
__fi u16 GetBlendFlags(size_t index) const { return m_blendMap[index].flags; }
};
struct GSAdapter

View File

@ -53,6 +53,8 @@ GSDevice11::GSDevice11()
m_features.prefer_new_textures = false;
m_features.dxt_textures = false;
m_features.bptc_textures = false;
m_features.framebuffer_fetch = false;
m_features.dual_source_blend = true;
}
bool GSDevice11::Create(HostDisplay* display)

View File

@ -183,7 +183,8 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask()
// m_texture_shuffle = false;
bool enable_fbmask_emulation = false;
if (g_gs_device->Features().texture_barrier)
const GSDevice::FeatureSupport features = g_gs_device->Features();
if (features.texture_barrier)
{
enable_fbmask_emulation = GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum;
}
@ -224,7 +225,7 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask()
// If date is enabled you need to test the green channel instead of the
// alpha channel. Only enable this code in DATE mode to reduce the number
// of shader.
m_conf.ps.write_rg = !write_ba && g_gs_device->Features().texture_barrier && m_context->TEST.DATE;
m_conf.ps.write_rg = !write_ba && features.texture_barrier && m_context->TEST.DATE;
m_conf.ps.read_ba = read_ba;
@ -278,15 +279,15 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask()
m_conf.cb_ps.FbMask.a = ba_mask;
// No blending so hit unsafe path.
if (!PRIM->ABE || !g_gs_device->Features().texture_barrier)
if (!PRIM->ABE || !features.texture_barrier)
{
GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on tex shuffle", fbmask);
m_conf.require_one_barrier = true;
m_conf.require_one_barrier = features.texture_barrier;
}
else
{
GL_INS("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask);
m_conf.require_full_barrier = true;
m_conf.require_full_barrier = features.texture_barrier;
}
}
else
@ -339,14 +340,14 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask()
{
GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK,
(m_conf.ps.dfmt == 2) ? 16 : 32);
m_conf.require_one_barrier = true;
m_conf.require_one_barrier = features.texture_barrier;
}
else
{
// The safe and accurate path (but slow)
GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK,
(m_conf.ps.dfmt == 2) ? 16 : 32);
m_conf.require_full_barrier = true;
m_conf.require_full_barrier = features.texture_barrier;
}
}
}
@ -492,7 +493,7 @@ void GSRendererNew::EmulateChannelShuffle(const GSTextureCache::Source* tex)
// sample from fb instead
m_conf.tex = nullptr;
m_conf.ps.tex_is_fb = true;
m_conf.require_one_barrier = true;
m_conf.require_one_barrier = !g_gs_device->Features().framebuffer_fetch;
}
else if (m_conf.tex == m_conf.ds)
{
@ -539,6 +540,7 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
}
// Compute the blending equation to detect special case
const GSDevice::FeatureSupport features(g_gs_device->Features());
const GIFRegALPHA& ALPHA = m_context->ALPHA;
// Set blending to shader bits
@ -627,20 +629,31 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
&& (m_env.COLCLAMP.CLAMP) // Let's add a colclamp check too, hw blend will clamp to 0-1.
&& !(m_conf.require_one_barrier || m_conf.require_full_barrier); // Also don't run if there are barriers present.
bool sw_blending = false;
if (!features.dual_source_blend)
{
const HWBlend unconverted_blend = g_gs_device->GetUnconvertedBlend(blend_index);
if (GSDevice::IsDualSourceBlendFactor(unconverted_blend.dst) ||
GSDevice::IsDualSourceBlendFactor(unconverted_blend.src))
{
sw_blending = true;
}
}
// Warning no break on purpose
// Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks.
bool sw_blending = false;
if (g_gs_device->Features().texture_barrier)
if (features.texture_barrier)
{
// Condition 1: Require full sw blend for full barrier.
// Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead.
const bool prefer_sw_blend = m_conf.require_full_barrier || (m_conf.require_one_barrier && m_prim_overlap == PRIM_OVERLAP_NO);
// SW Blend is (nearly) free. Let's use it.
const bool no_prim_overlap = features.framebuffer_fetch ? (m_vt.m_primclass == GS_SPRITE_CLASS) : (m_prim_overlap == PRIM_OVERLAP_NO);
const bool impossible_or_free_blend = (blend_flag & BLEND_A_MAX) // Impossible blending
|| blend_non_recursive // Free sw blending, doesn't require barriers or reading fb
|| accumulation_blend // Mix of hw/sw blending
|| (m_prim_overlap == PRIM_OVERLAP_NO) // Blend can be done in a single draw
|| no_prim_overlap // Blend can be done in a single draw
|| (m_conf.require_full_barrier); // Another effect (for example fbmask) already requires a full barrier
switch (GSConfig.AccurateBlendingUnit)
@ -731,7 +744,9 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
if (m_env.COLCLAMP.CLAMP == 0)
{
bool free_colclip = false;
if (g_gs_device->Features().texture_barrier)
if (features.framebuffer_fetch)
free_colclip = true;
else if (features.texture_barrier)
free_colclip = m_prim_overlap == PRIM_OVERLAP_NO || blend_non_recursive;
else
free_colclip = blend_non_recursive;
@ -780,7 +795,7 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
if (sw_blending)
{
GL_INS("PABE mode ENABLED");
if (g_gs_device->Features().texture_barrier)
if (features.texture_barrier)
{
// Disable hw/sw blend and do pure sw blend with reading the framebuffer.
color_dest_blend = false;
@ -889,7 +904,7 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
const bool blend_non_recursive_one_barrier = blend_non_recursive && blend_ad_alpha_masked;
if (blend_non_recursive_one_barrier)
m_conf.require_one_barrier |= true;
else if (g_gs_device->Features().texture_barrier)
else if (features.texture_barrier)
m_conf.require_full_barrier |= !blend_non_recursive;
else
m_conf.require_one_barrier |= !blend_non_recursive;
@ -1294,6 +1309,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
const GSVector2i& rtsize = ds ? ds->GetSize() : rt->GetSize();
const GSVector2& rtscale = ds ? ds->GetScale() : rt->GetScale();
const GSDevice::FeatureSupport features(g_gs_device->Features());
const bool DATE = m_context->TEST.DATE && m_context->FRAME.PSM != PSM_PSMCT24;
bool DATE_PRIMID = false;
@ -1324,10 +1340,13 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Upscaling hack to avoid various line/grid issues
MergeSprite(tex);
m_prim_overlap = PrimitiveOverlap();
if (!features.framebuffer_fetch)
m_prim_overlap = PrimitiveOverlap();
else
m_prim_overlap = PRIM_OVERLAP_UNKNOW;
// Detect framebuffer read that will need special handling
if (g_gs_device->Features().texture_barrier && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum)
if (features.texture_barrier && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum)
{
const u32 fb_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk;
if (((m_context->FRAME.FBMSK & fb_mask) == (fb_mask & 0x00FFFFFF)) && (m_vt.m_primclass == GS_TRIANGLE_CLASS))
@ -1337,7 +1356,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Tri-Ace (Star Ocean 3/RadiataStories/VP2) uses a palette to handle the +1/-1
GL_DBG("Source and Target are the same! Let's sample the framebuffer");
m_conf.ps.tex_is_fb = 1;
m_conf.require_full_barrier = true;
m_conf.require_full_barrier = !features.framebuffer_fetch;
}
else if (m_prim_overlap != PRIM_OVERLAP_NO)
{
@ -1354,11 +1373,16 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
{
// It is way too complex to emulate texture shuffle with DATE, so use accurate path.
// No overlap should be triggered on gl/vk only as they support DATE_BARRIER.
const bool no_overlap = (g_gs_device->Features().texture_barrier) && (m_prim_overlap == PRIM_OVERLAP_NO);
if (no_overlap || m_texture_shuffle)
if (features.framebuffer_fetch)
{
GL_PERF("DATE: Accurate with %s", no_overlap ? "no overlap" : "texture shuffle");
if (g_gs_device->Features().texture_barrier)
// Full DATE is "free" with framebuffer fetch. The barrier gets cleared below.
DATE_BARRIER = true;
m_conf.require_full_barrier = true;
}
else if ((features.texture_barrier && m_prim_overlap == PRIM_OVERLAP_NO) || m_texture_shuffle)
{
GL_PERF("DATE: Accurate with %s", (features.texture_barrier && m_prim_overlap == PRIM_OVERLAP_NO) ? "no overlap" : "texture shuffle");
if (features.texture_barrier)
{
m_conf.require_full_barrier = true;
DATE_BARRIER = true;
@ -1446,6 +1470,13 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
m_conf.blend = {}; // No blending please
}
if (features.framebuffer_fetch)
{
// barriers aren't needed with fbfetch
m_conf.require_one_barrier = false;
m_conf.require_full_barrier = false;
}
if (m_conf.ps.scanmsk & 2)
DATE_PRIMID = false; // to have discard in the shader work correctly
@ -1509,7 +1540,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
}
else if (DATE_one)
{
if (g_gs_device->Features().texture_barrier)
if (features.texture_barrier)
{
m_conf.require_one_barrier = true;
m_conf.ps.date = 5 + m_context->TEST.DATM;
@ -1615,7 +1646,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
//
// Use an HLE shader to sample depth directly as the alpha channel
GL_INS("ICO sample depth as alpha");
m_conf.require_full_barrier = true;
m_conf.require_full_barrier = !features.framebuffer_fetch;
// Extract the depth as palette index
m_conf.ps.depth_fmt = 1;
m_conf.ps.channel = ChannelFetch_BLUE;

View File

@ -154,6 +154,8 @@ namespace GLLoader
bool mesa_driver = false;
bool in_replayer = false;
bool has_dual_source_blend = false;
bool found_framebuffer_fetch = false;
bool found_geometry_shader = true; // we require GL3.3 so geometry must be supported by default
bool found_GL_ARB_clear_texture = false;
// DX11 GPU
@ -208,6 +210,7 @@ namespace GLLoader
vendor_id_amd = true;
else if (strstr(vendor, "NVIDIA Corporation"))
vendor_id_nvidia = true;
#ifdef _WIN32
else if (strstr(vendor, "Intel"))
vendor_id_intel = true;
@ -287,6 +290,13 @@ namespace GLLoader
// Mandatory for the advance HW renderer effect. Unfortunately Mesa LLVMPIPE/SWR renderers doesn't support this extension.
// Rendering might be corrupted but it could be good enough for test/virtual machine.
optional("GL_ARB_texture_barrier");
found_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch;
if (found_framebuffer_fetch && GSConfig.DisableFramebufferFetch)
{
Console.Warning("Framebuffer fetch was found but is disabled. This will reduce performance.");
found_framebuffer_fetch = false;
}
}
if (vendor_id_amd)

View File

@ -39,6 +39,8 @@ namespace GLLoader
extern bool in_replayer;
// GL
extern bool has_dual_source_blend;
extern bool found_framebuffer_fetch;
extern bool found_geometry_shader;
extern bool found_GL_ARB_gpu_shader5;
extern bool found_GL_ARB_shader_image_load_store;

View File

@ -22,6 +22,7 @@
#include "GS/GSUtil.h"
#include "Host.h"
#include "HostDisplay.h"
#include <cinttypes>
#include <fstream>
#include <sstream>
@ -217,11 +218,13 @@ bool GSDeviceOGL::Create(HostDisplay* display)
m_features.broken_point_sampler = GLLoader::vendor_id_amd;
m_features.geometry_shader = GLLoader::found_geometry_shader;
m_features.image_load_store = GLLoader::found_GL_ARB_shader_image_load_store && GLLoader::found_GL_ARB_clear_texture;
m_features.texture_barrier = GSConfig.OverrideTextureBarriers != 0;
m_features.texture_barrier = GSConfig.OverrideTextureBarriers != 0 || GLLoader::found_framebuffer_fetch;
m_features.provoking_vertex_last = true;
m_features.prefer_new_textures = false;
m_features.dxt_textures = GL_EXT_texture_compression_s3tc;
m_features.bptc_textures = GL_VERSION_4_2 || GL_ARB_texture_compression_bptc || GL_EXT_texture_compression_bptc;
m_features.prefer_new_textures = false;
m_features.framebuffer_fetch = GLLoader::found_framebuffer_fetch;
m_features.dual_source_blend = GLLoader::has_dual_source_blend;
GLint point_range[2] = {};
GLint line_range[2] = {};
@ -978,6 +981,14 @@ std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum typ
header += "#extension GL_ARB_shading_language_420pack: require\n";
// Need GL version 410
header += "#extension GL_ARB_separate_shader_objects: require\n";
if (m_features.framebuffer_fetch)
{
if (GLAD_GL_EXT_shader_framebuffer_fetch)
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
else if (GLAD_GL_ARM_shader_framebuffer_fetch)
header += "#extension GL_ARM_shader_framebuffer_fetch : require\n";
}
if (GLLoader::found_GL_ARB_shader_image_load_store)
{
// Need GL version 420
@ -988,6 +999,11 @@ std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum typ
header += "#define DISABLE_GL42_image\n";
}
if (m_features.framebuffer_fetch)
header += "#define HAS_FRAMEBUFFER_FETCH 1\n";
else
header += "#define HAS_FRAMEBUFFER_FETCH 0\n";
if (GLLoader::vendor_id_amd || GLLoader::vendor_id_intel)
header += "#define BROKEN_DRIVER as_usual\n";
@ -1632,7 +1648,14 @@ void GSDeviceOGL::OMAttachDs(GSTextureOGL* ds)
if (GLState::ds != id)
{
GLState::ds = id;
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, id, 0);
if (ds && ds->IsDss())
{
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, id, 0);
}
else
{
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, id, 0);
}
}
}

View File

@ -212,7 +212,7 @@ public:
private:
// Increment this constant whenever shaders change, to invalidate user's program binary cache.
static constexpr u32 SHADER_VERSION = 1;
static constexpr u32 SHADER_VERSION = 2;
static FILE* m_debug_gl_file;

View File

@ -234,11 +234,23 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
// Depth buffer
case Format::DepthStencil:
gl_fmt = GL_DEPTH32F_STENCIL8;
m_int_format = GL_DEPTH_STENCIL;
m_int_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV;
m_int_shift = 3; // 4 bytes for depth + 4 bytes for stencil by texels
break;
{
if (!GLLoader::found_framebuffer_fetch)
{
gl_fmt = GL_DEPTH32F_STENCIL8;
m_int_format = GL_DEPTH_STENCIL;
m_int_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV;
m_int_shift = 3; // 4 bytes for depth + 4 bytes for stencil by texels
}
else
{
gl_fmt = GL_DEPTH_COMPONENT32F;
m_int_format = GL_DEPTH_COMPONENT;
m_int_type = GL_FLOAT;
m_int_shift = 2;
}
}
break;
case Format::BC1:
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
@ -622,7 +634,7 @@ bool GSTextureOGL::Save(const std::string& fn)
GSPng::Format fmt = GSPng::RGB_PNG;
#endif
if (IsDss())
if (IsDepth())
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);

View File

@ -71,7 +71,16 @@ public:
void Swap(GSTexture* tex) final;
GSMap Read(const GSVector4i& r, AlignedBuffer<u8, 32>& buffer);
bool IsDss() { return (m_type == Type::DepthStencil || m_type == Type::SparseDepthStencil); }
bool IsDss() { return (m_type == Type::DepthStencil || m_type == Type::SparseDepthStencil) && !GLLoader::found_framebuffer_fetch; }
bool IsDepth() { return (m_type == Type::DepthStencil || m_type == Type::SparseDepthStencil); }
bool IsIntegerFormat() const
{
return (m_int_format == GL_RED_INTEGER || m_int_format == GL_RGBA_INTEGER);
}
bool IsUnsignedFormat() const
{
return (m_int_type == GL_UNSIGNED_BYTE || m_int_type == GL_UNSIGNED_SHORT || m_int_type == GL_UNSIGNED_INT);
}
u32 GetID() final { return m_texture_id; }
bool HasBeenCleaned() { return m_clean; }

View File

@ -243,6 +243,8 @@ bool GSDeviceVK::CheckFeatures()
m_features.image_load_store = features.fragmentStoresAndAtomics && m_features.texture_barrier;
m_features.prefer_new_textures = true;
m_features.provoking_vertex_last = g_vulkan_context->GetOptionalExtensions().vk_ext_provoking_vertex;
m_features.framebuffer_fetch = false;
m_features.dual_source_blend = features.dualSrcBlend;
if (!features.dualSrcBlend)
Console.Warning("Vulkan driver is missing dual-source blending. This will have an impact on performance.");

View File

@ -541,10 +541,11 @@ DebugTab::DebugTab(wxWindow* parent)
{
PaddedBoxSizer<wxStaticBoxSizer> debug_box(wxVERTICAL, this, "Debug");
auto* debug_check_box = new wxWrapSizer(wxHORIZONTAL);
m_ui.addCheckBox(debug_check_box, "Use Blit Swap Chain", "UseBlitSwapChain");
m_ui.addCheckBox(debug_check_box, "Disable Shader Cache", "disable_shader_cache");
m_ui.addCheckBox(debug_check_box, "Use Debug Device", "UseDebugDevice");
m_ui.addCheckBox(debug_check_box, "Dump GS data", "dump");
m_ui.addCheckBox(debug_check_box, "Use Blit Swap Chain", "UseBlitSwapChain");
m_ui.addCheckBox(debug_check_box, "Disable Shader Cache", "disable_shader_cache");
m_ui.addCheckBox(debug_check_box, "Disable Framebuffer Fetch", "DisableFramebufferFetch");
m_ui.addCheckBox(debug_check_box, "Use Debug Device", "UseDebugDevice");
m_ui.addCheckBox(debug_check_box, "Dump GS data", "dump");
auto* debug_save_check_box = new wxWrapSizer(wxHORIZONTAL);
m_ui.addCheckBox(debug_save_check_box, "Save RT", "save");

View File

@ -295,6 +295,7 @@ Pcsx2Config::GSOptions::GSOptions()
UseDebugDevice = false;
UseBlitSwapChain = false;
DisableShaderCache = false;
DisableFramebufferFetch = false;
ThreadedPresentation = false;
OsdShowMessages = true;
OsdShowSpeed = false;
@ -421,6 +422,7 @@ bool Pcsx2Config::GSOptions::RestartOptionsAreEqual(const GSOptions& right) cons
OpEqu(UseDebugDevice) &&
OpEqu(UseBlitSwapChain) &&
OpEqu(DisableShaderCache) &&
OpEqu(DisableFramebufferFetch) &&
OpEqu(ThreadedPresentation) &&
OpEqu(OverrideTextureBarriers) &&
OpEqu(OverrideGeometryShaders);
@ -501,6 +503,7 @@ void Pcsx2Config::GSOptions::ReloadIniSettings()
GSSettingBool(UseDebugDevice);
GSSettingBool(UseBlitSwapChain);
GSSettingBoolEx(DisableShaderCache, "disable_shader_cache");
GSSettingBool(DisableFramebufferFetch);
GSSettingBool(ThreadedPresentation);
GSSettingBool(OsdShowMessages);
GSSettingBool(OsdShowSpeed);