GS:OGL: Use blend min for primid destination alpha

This commit is contained in:
TellowKrinkle 2022-08-31 15:16:31 -05:00 committed by lightningterror
parent 80523828c7
commit 62f9b16139
11 changed files with 95 additions and 177 deletions

View File

@ -378,4 +378,21 @@ void ps_yuv()
}
#endif
#if defined(ps_stencil_image_init_0) || defined(ps_stencil_image_init_1)
void main()
{
SV_Target0 = vec4(0x7FFFFFFF);
#ifdef ps_stencil_image_init_0
if((127.5f / 255.0f) < sample_c().a) // < 0x80 pass (== 0x80 should not pass)
SV_Target0 = vec4(-1);
#endif
#ifdef ps_stencil_image_init_1
if(sample_c().a < (127.5f / 255.0f)) // >= 0x80 pass
SV_Target0 = vec4(-1);
#endif
}
#endif
#endif

View File

@ -74,36 +74,12 @@ layout(binding = 1) uniform sampler2D PaletteSampler;
layout(binding = 2) uniform sampler2D RtSampler; // note 2 already use by the image below
#endif
#ifndef DISABLE_GL42_image
#if PS_DATE > 0
// Performance note: images mustn't be declared if they are unused. Otherwise it will
// require extra shader validation.
// FIXME how to declare memory access
layout(r32i, binding = 3) uniform iimage2D img_prim_min;
// WARNING:
// You can't enable it if you discard the fragment. The depth is still
// updated (shadow in Shin Megami Tensei Nocturne)
//
// early_fragment_tests must still be enabled in the first pass of the 2 passes algo
// First pass search the first primitive that will write the bad alpha value. Value
// won't be written if the fragment fails the depth test.
//
// In theory the best solution will be do
// 1/ copy the depth buffer
// 2/ do the full depth (current depth writes are disabled)
// 3/ restore the depth buffer for 2nd pass
// Of course, it is likely too costly.
#if PS_DATE == 1 || PS_DATE == 2
layout(early_fragment_tests) in;
#endif
#if PS_DATE == 3
layout(binding = 3) uniform sampler2D img_prim_min;
// I don't remember why I set this parameter but it is surely useless
//layout(pixel_center_integer) in vec4 gl_FragCoord;
#endif
#else
// use basic stencil
#endif
vec4 fetch_rt()
{
@ -814,8 +790,7 @@ void ps_main()
discard;
#endif
#if PS_DATE != 0
#if ((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2)
#if PS_DATE >= 5
#if PS_WRITE_RG == 1
// Pseudo 16 bits access.
@ -833,25 +808,19 @@ void ps_main()
#endif
if (bad) {
#if PS_DATE >= 5 || defined(DISABLE_GL42_image)
discard;
#else
imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1));
return;
#endif
}
#endif
#if PS_DATE == 3 && !defined(DISABLE_GL42_image)
int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r;
#if PS_DATE == 3
int stencil_ceil = int(texelFetch(img_prim_min, ivec2(gl_FragCoord.xy), 0).r);
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
// the bad alpha value so we must keep it.
if (gl_PrimitiveID > stencil_ceil) {
discard;
}
#endif
#endif
vec4 C = ps_color();
@ -934,19 +903,15 @@ void ps_main()
#endif
// Get first primitive that will write a failling alpha value
#if PS_DATE == 1 && !defined(DISABLE_GL42_image)
#if PS_DATE == 1
// DATM == 0
// Pixel with alpha equal to 1 will failed (128-255)
if (C.a > 127.5f) {
imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);
}
SV_Target0 = (C.a > 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
return;
#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)
#elif PS_DATE == 2
// DATM == 1
// Pixel with alpha equal to 0 will failed (0-127)
if (C.a < 127.5f) {
imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);
}
SV_Target0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
return;
#endif

View File

@ -1477,12 +1477,10 @@ void GSApp::Init()
m_default_configuration["OsdShowIndicators"] = "1";
m_default_configuration["OsdScale"] = "100";
m_default_configuration["override_GL_ARB_copy_image"] = "-1";
m_default_configuration["override_GL_ARB_clear_texture"] = "-1";
m_default_configuration["override_GL_ARB_clip_control"] = "-1";
m_default_configuration["override_GL_ARB_direct_state_access"] = "-1";
m_default_configuration["override_GL_ARB_draw_buffers_blend"] = "-1";
m_default_configuration["override_GL_ARB_gpu_shader5"] = "-1";
m_default_configuration["override_GL_ARB_shader_image_load_store"] = "-1";
m_default_configuration["override_GL_ARB_texture_barrier"] = "-1";
m_default_configuration["OverrideTextureBarriers"] = "-1";
m_default_configuration["OverrideGeometryShaders"] = "-1";

View File

@ -148,10 +148,8 @@ namespace GLLoader
bool has_dual_source_blend = false;
bool found_framebuffer_fetch = false;
bool found_geometry_shader = true; // we require GL3.3 so geometry must be supported by default
bool found_GL_ARB_clear_texture = false;
// DX11 GPU
bool found_GL_ARB_gpu_shader5 = false; // Require IvyBridge
bool found_GL_ARB_shader_image_load_store = false; // Intel IB. Nvidia/AMD miss Mesa implementation.
bool found_GL_ARB_texture_barrier = false;
static bool mandatory(const std::string& ext)
@ -266,10 +264,6 @@ namespace GLLoader
{
// GL4.0
found_GL_ARB_gpu_shader5 = optional("GL_ARB_gpu_shader5");
// GL4.2
found_GL_ARB_shader_image_load_store = optional("GL_ARB_shader_image_load_store");
// GL4.4
found_GL_ARB_clear_texture = optional("GL_ARB_clear_texture");
// GL4.5
optional("GL_ARB_direct_state_access");
// Mandatory for the advance HW renderer effect. Unfortunately Mesa LLVMPIPE/SWR renderers doesn't support this extension.

View File

@ -43,7 +43,5 @@ namespace GLLoader
extern bool found_framebuffer_fetch;
extern bool found_geometry_shader;
extern bool found_GL_ARB_gpu_shader5;
extern bool found_GL_ARB_shader_image_load_store;
extern bool found_GL_ARB_clear_texture;
extern bool found_GL_ARB_texture_barrier;
} // namespace GLLoader

View File

@ -217,7 +217,7 @@ bool GSDeviceOGL::Create(HostDisplay* display)
// optional features based on context
m_features.broken_point_sampler = GLLoader::vendor_id_amd;
m_features.geometry_shader = GLLoader::found_geometry_shader;
m_features.image_load_store = GLLoader::found_GL_ARB_shader_image_load_store && GLLoader::found_GL_ARB_clear_texture;
m_features.image_load_store = true;
if (GSConfig.OverrideTextureBarriers == 0)
m_features.texture_barrier = GLLoader::found_framebuffer_fetch; // Force Disabled
else if (GSConfig.OverrideTextureBarriers == 1)
@ -353,21 +353,23 @@ bool GSDeviceOGL::Create(HostDisplay* display)
}
}
// these all share the same vertex shader
const auto convert_glsl = Host::ReadResourceFileToString("shaders/opengl/convert.glsl");
if (!convert_glsl.has_value())
{
Host::ReportErrorAsync("GS", "Failed to read shaders/opengl/convert.glsl.");
return false;
}
// ****************************************************************
// convert
// ****************************************************************
{
GL_PUSH("GSDeviceOGL::Convert");
// these all share the same vertex shader
const auto shader = Host::ReadResourceFileToString("shaders/opengl/convert.glsl");
if (!shader.has_value())
{
Host::ReportErrorAsync("GS", "Failed to read shaders/opengl/convert.glsl.");
return false;
}
m_convert.vs = GetShaderSource("vs_main", GL_VERTEX_SHADER, m_shader_common_header, *shader, {});
m_convert.vs = GetShaderSource("vs_main", GL_VERTEX_SHADER, m_shader_common_header, *convert_glsl, {});
for (size_t i = 0; i < std::size(m_convert.ps); i++)
{
@ -375,7 +377,7 @@ bool GSDeviceOGL::Create(HostDisplay* display)
const std::string macro_sel = (static_cast<ShaderConvert>(i) == ShaderConvert::RGBA_TO_8I) ?
StringUtil::StdStringFromFormat("#define PS_SCALE_FACTOR %d\n", GSConfig.UpscaleMultiplier) :
std::string();
const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, m_shader_common_header, *shader, macro_sel));
const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, m_shader_common_header, *convert_glsl, macro_sel));
if (!m_shader_cache.GetProgram(&m_convert.ps[i], m_convert.vs, {}, ps))
return false;
m_convert.ps[i].SetFormattedName("Convert pipe %s", name);
@ -527,6 +529,15 @@ bool GSDeviceOGL::Create(HostDisplay* display)
m_date.dss = new GSDepthStencilOGL();
m_date.dss->EnableStencil();
m_date.dss->SetStencil(GL_ALWAYS, GL_REPLACE);
for (size_t i = 0; i < std::size(m_date.primid_ps); i++)
{
const std::string ps(GetShaderSource(
StringUtil::StdStringFromFormat("ps_stencil_image_init_%d", i),
GL_FRAGMENT_SHADER, m_shader_common_header, *convert_glsl, {}));
m_shader_cache.GetProgram(&m_date.primid_ps[i], m_convert.vs, {}, ps);
m_date.primid_ps[i].SetFormattedName("PrimID Destination Alpha Init %d", i);
}
}
// ****************************************************************
@ -803,20 +814,6 @@ void GSDeviceOGL::ClearDepth(GSTexture* t)
GL_PUSH("Clear Depth %d", T->GetID());
if (0 && GLLoader::found_GL_ARB_clear_texture)
{
// I don't know what the driver does but it creates
// some slowdowns on Harry Potter PS
// Maybe it triggers some texture relocations, or maybe
// it clears also the stencil value (2 times slower)
//
// Let's disable this code for the moment.
// Don't bother with Depth_Stencil insanity
T->Clear(NULL);
}
else
{
OMSetFBO(m_fbo);
// RT must be detached, if RT is too small, depth won't be fully cleared
// AT tolenico 2 map clip bug
@ -837,7 +834,6 @@ void GSDeviceOGL::ClearDepth(GSTexture* t)
glDepthMask(false);
}
glEnable(GL_SCISSOR_TEST);
}
}
void GSDeviceOGL::ClearStencil(GSTexture* t, u8 c)
@ -938,39 +934,15 @@ GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel)
return dss;
}
void GSDeviceOGL::InitPrimDateTexture(GSTexture* rt, const GSVector4i& area)
GSTexture* GSDeviceOGL::InitPrimDateTexture(GSTexture* rt, const GSVector4i& area, bool datm)
{
const GSVector2i& rtsize = rt->GetSize();
// Create a texture to avoid the useless clean@0
if (m_date.t == NULL)
m_date.t = CreateTexture(rtsize.x, rtsize.y, false, GSTexture::Format::PrimID);
GSTexture* tex = CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::PrimID, false);
// Clean with the max signed value
const int max_int = 0x7FFFFFFF;
static_cast<GSTextureOGL*>(m_date.t)->Clear(&max_int, area);
glBindImageTexture(3, static_cast<GSTextureOGL*>(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I);
#ifdef ENABLE_OGL_DEBUG
// Help to see the texture in apitrace
PSSetShaderResource(3, m_date.t);
#endif
}
void GSDeviceOGL::RecycleDateTexture()
{
if (m_date.t)
{
//static_cast<GSTextureOGL*>(m_date.t)->Save(format("/tmp/date_adv_%04ld.csv", GSState::s_n));
Recycle(m_date.t);
m_date.t = NULL;
}
}
void GSDeviceOGL::Barrier(GLbitfield b)
{
glMemoryBarrier(b);
GL_PUSH("PrimID Destination Alpha Clear");
StretchRect(rt, GSVector4(area) / GSVector4(rtsize).xyxy(), tex, GSVector4(area), m_date.primid_ps[datm], false);
return tex;
}
std::string GSDeviceOGL::GetShaderSource(const std::string_view& entry, GLenum type, const std::string_view& common_header, const std::string_view& glsl_h_code, const std::string_view& macro_sel)
@ -1000,16 +972,6 @@ std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum typ
if (GLLoader::found_GL_ARB_gpu_shader5)
header += "#extension GL_ARB_gpu_shader5 : enable\n";
if (GLLoader::found_GL_ARB_shader_image_load_store)
{
// Need GL version 420
header += "#extension GL_ARB_shader_image_load_store: require\n";
}
else
{
header += "#define DISABLE_GL42_image\n";
}
if (m_features.framebuffer_fetch)
header += "#define HAS_FRAMEBUFFER_FETCH 1\n";
else
@ -1856,6 +1818,8 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
GSVector2i rtsize = (config.rt ? config.rt : config.ds)->GetSize();
GSTexture* primid_texture = nullptr;
// Destination Alpha Setup
switch (config.destination_alpha)
{
@ -1863,7 +1827,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
case GSHWDrawConfig::DestinationAlphaMode::Full:
break; // No setup
case GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking:
InitPrimDateTexture(config.rt, config.drawarea);
primid_texture = InitPrimDateTexture(config.rt, config.drawarea, config.datm);
break;
case GSHWDrawConfig::DestinationAlphaMode::StencilOne:
if (m_features.texture_barrier)
@ -1933,11 +1897,6 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
PSSetShaderResource(2, config.rt);
SetupSampler(config.sampler);
OMSetBlendState(config.blend.enable, s_gl_blend_factors[config.blend.src_factor],
s_gl_blend_factors[config.blend.dst_factor], s_gl_blend_ops[config.blend.op],
config.blend.constant_enable, config.blend.constant);
OMSetColorMaskState(config.colormask);
SetupOM(config.depth);
if (m_vs_cb_cache.Update(config.cb_vs))
{
@ -1983,35 +1942,32 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
{
GL_PUSH("Date GL42");
// It could be good idea to use stencil in the same time.
// Early stencil test will reduce the number of atomic-load operation
GL_PUSH("Destination Alpha PrimID Init");
// Create an r32i image that will contain primitive ID
// Note: do it at the beginning because the clean will dirty the FBO state
//dev->InitPrimDateTexture(rtsize.x, rtsize.y);
OMSetRenderTargets(primid_texture, config.ds, &config.scissor);
OMColorMaskSelector mask;
mask.wrgba = 0;
mask.wr = true;
OMSetColorMaskState(mask);
OMSetBlendState(true, GL_ONE, GL_ONE, GL_MIN);
OMDepthStencilSelector dss = config.depth;
dss.zwe = 0; // Don't write depth
SetupOM(dss);
// I don't know how much is it legal to mount rt as Texture/RT. No write is done.
// In doubt let's detach RT.
OMSetRenderTargets(NULL, config.ds, &config.scissor);
// Don't write anything on the color buffer
// Neither in the depth buffer
glDepthMask(false);
// Compute primitiveID max that pass the date test (Draw without barrier)
DrawIndexedPrimitive();
// Ask PS to discard shader above the primitiveID max
glDepthMask(GLState::depth_mask);
psel.ps.date = 3;
config.alpha_second_pass.ps.date = 3;
SetupPipeline(psel);
// Be sure that first pass is finished !
Barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
PSSetShaderResource(3, primid_texture);
}
OMSetBlendState(config.blend.enable, s_gl_blend_factors[config.blend.src_factor],
s_gl_blend_factors[config.blend.dst_factor], s_gl_blend_ops[config.blend.op],
config.blend.constant_enable, config.blend.constant);
OMSetColorMaskState(config.colormask);
SetupOM(config.depth);
OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor);
SendHWDraw(config, psel.ps.IsFeedbackLoop());
@ -2063,8 +2019,8 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
}
}
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
RecycleDateTexture();
if (primid_texture)
Recycle(primid_texture);
if (draw_rt_clone)
Recycle(draw_rt_clone);

View File

@ -272,7 +272,7 @@ private:
struct
{
GSDepthStencilOGL* dss = nullptr;
GSTexture* t = nullptr;
GL::Program primid_ps[2];
} m_date;
struct
@ -338,8 +338,7 @@ public:
void ClearDepth(GSTexture* t) final;
void ClearStencil(GSTexture* t, u8 c) final;
void InitPrimDateTexture(GSTexture* rt, const GSVector4i& area);
void RecycleDateTexture();
GSTexture* InitPrimDateTexture(GSTexture* rt, const GSVector4i& area, bool datm);
bool DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map) final;
@ -391,6 +390,4 @@ public:
void SetupOM(OMDepthStencilSelector dssel);
GLuint GetSamplerID(PSSamplerSelector ssel);
GLuint GetPaletteSamplerID();
void Barrier(GLbitfield b);
};

View File

@ -189,8 +189,8 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
{
// 1 Channel integer
case Format::PrimID:
gl_fmt = GL_R32I;
m_int_format = GL_RED_INTEGER;
gl_fmt = GL_R32F;
m_int_format = GL_RED;
m_int_type = GL_INT;
m_int_shift = 2;
break;

View File

@ -168,10 +168,6 @@ const char* dialog_message(int ID, bool* updateText)
case IDC_GEOMETRY_SHADER_OVERRIDE:
return cvtString("Allows the GPU instead of just the CPU to transform lines into sprites. This reduces CPU load and bandwidth requirement, but it is heavier on the GPU.\n"
"Automatic detection is recommended.");
case IDC_IMAGE_LOAD_STORE:
return cvtString("Allows advanced atomic operations to speed up Accurate DATE.\n"
"Only disable this if using Accurate DATE causes (GPU driver) issues.\n\n"
"Note: This option is only supported by GPUs which support at least Direct3D 11.");
case IDC_LINEAR_PRESENT:
return cvtString("Use bilinear filtering when Upscaling/Downscaling the image to the screen. Disable it if you want a sharper/pixelated output.");
// Exclusive for Hardware Renderer

View File

@ -88,7 +88,6 @@ enum
IDC_SWTHREADS_EDIT,
// OpenGL Advanced Settings
IDC_GEOMETRY_SHADER_OVERRIDE,
IDC_IMAGE_LOAD_STORE,
// On-screen Display
IDC_OSD_LOG,
IDC_OSD_MONITOR,

View File

@ -548,7 +548,6 @@ DebugTab::DebugTab(wxWindow* parent)
const int space = wxSizerFlags().Border().GetBorderInPixels();
PaddedBoxSizer<wxBoxSizer> tab_box(wxVERTICAL);
auto ogl_hw_prereq = [this]{ return m_is_ogl_hw; };
auto vk_ogl_hw_prereq = [this] { return m_is_ogl_hw || m_is_vk_hw; };
if (g_Conf->DevMode || IsDevBuild)
@ -586,7 +585,6 @@ DebugTab::DebugTab(wxWindow* parent)
auto* ogl_grid = new wxFlexGridSizer(2, space, space);
m_ui.addComboBoxAndLabel(ogl_grid, "Texture Barriers:", "OverrideTextureBarriers", &theApp.m_gs_generic_list, -1, vk_ogl_hw_prereq);
m_ui.addComboBoxAndLabel(ogl_grid, "Geometry Shader:", "OverrideGeometryShaders", &theApp.m_gs_generic_list, IDC_GEOMETRY_SHADER_OVERRIDE, vk_ogl_hw_prereq);
m_ui.addComboBoxAndLabel(ogl_grid, "Image Load Store:", "override_GL_ARB_shader_image_load_store", &theApp.m_gs_generic_list, IDC_IMAGE_LOAD_STORE, ogl_hw_prereq);
m_ui.addComboBoxAndLabel(ogl_grid, "Dump Compression:", "GSDumpCompression", &theApp.m_gs_dump_compression, -1);
ogl_box->Add(ogl_grid);