From 1fb29de0828842df70d32f51faf3935520456646 Mon Sep 17 00:00:00 2001 From: lightningterror <18107717+lightningterror@users.noreply.github.com> Date: Tue, 7 Dec 2021 11:45:14 +0100 Subject: [PATCH] GS-ogl: Try to use a mix of hw/sw blending in more situations. When alpha is less than 128: Do Cs*F or Cs*As in shader. Do Cs*(As + 1) or Cs*(F + 1) in shader. Do Cs*(1 - As) or Cs*(1 - F) in shader. Optimization, might be faster as we will require less barriers. --- pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp | 6 ++- pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h | 2 +- pcsx2/GS/Renderers/OpenGL/GSRendererOGL.cpp | 52 +++++++++++++++++---- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index a1cdb30ae5..4fc17356e7 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -1844,7 +1844,7 @@ void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel) } } -void GSDeviceOGL::OMSetBlendState(u8 blend_index, u8 blend_factor, bool is_blend_constant, bool accumulation_blend) +void GSDeviceOGL::OMSetBlendState(u8 blend_index, u8 blend_factor, bool is_blend_constant, bool accumulation_blend, bool blend_mix) { if (blend_index) { @@ -1867,6 +1867,10 @@ void GSDeviceOGL::OMSetBlendState(u8 blend_index, u8 blend_factor, bool is_blend b.src = GL_ONE; b.dst = GL_ONE; } + else if (blend_mix) + { + b.src = GL_ONE; + } if (GLState::eq_RGB != b.op) { diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h index af23c80d8f..2e4c055cff 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h @@ -628,7 +628,7 @@ public: void PSSetSamplerState(GLuint ss); void OMSetDepthStencilState(GSDepthStencilOGL* dss); - void OMSetBlendState(u8 blend_index = 0, u8 blend_factor = 0, bool is_blend_constant = false, bool accumulation_blend = false); + void OMSetBlendState(u8 blend_index = 0, u8 blend_factor = 0, bool is_blend_constant = false, bool accumulation_blend = false, bool blend_mix = false); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL) final; void OMSetColorMaskState(OMColorMaskSelector sel = OMColorMaskSelector()); diff --git a/pcsx2/GS/Renderers/OpenGL/GSRendererOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSRendererOGL.cpp index 670570146a..a289682571 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSRendererOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSRendererOGL.cpp @@ -480,17 +480,30 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) const u8 blend_index = u8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D); const int blend_flag = m_dev->GetBlendFlags(blend_index); - // SW Blend is (nearly) free. Let's use it. - const bool impossible_or_free_blend = (blend_flag & (BLEND_NO_REC|BLEND_A_MAX|BLEND_ACCU)) // Blend doesn't requires the costly barrier - || (m_prim_overlap == PRIM_OVERLAP_NO) // Blend can be done in a single draw - || (m_require_full_barrier); // Another effect (for example fbmask) already requires a full barrier - // Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd bool accumulation_blend = !!(blend_flag & BLEND_ACCU); // Blending doesn't require barrier, or sampling of the rt const bool blend_non_recursive = !!(blend_flag & BLEND_NO_REC); + // BLEND MIX selection, use a mix of hw/sw blending + if (!m_vt.m_alpha.valid && (ALPHA.C == 0)) + GetAlphaMinMax(); + const bool blend_mix1 = !!(blend_flag & BLEND_MIX1); + const bool blend_mix2 = !!(blend_flag & BLEND_MIX2); + const bool blend_mix3 = !!(blend_flag & BLEND_MIX3); + bool blend_mix = (blend_mix1 || blend_mix2 || blend_mix3) + // Do not enable if As > 128 or F > 128, hw blend clamps to 1 + && !((ALPHA.C == 0 && m_vt.m_alpha.max > 128) || (ALPHA.C == 2 && ALPHA.FIX > 128u)); + + // SW Blend is (nearly) free. Let's use it. + const bool impossible_or_free_blend = (blend_flag & BLEND_A_MAX) // Impossible blending + || blend_non_recursive // Free sw blending, doesn't require barriers or reading fb + || accumulation_blend // Mix of hw/sw blending + || blend_mix // Mix of hw/sw blending + || (m_prim_overlap == PRIM_OVERLAP_NO) // Blend can be done in a single draw + || (m_require_full_barrier); // Another effect (for example fbmask) already requires a full barrier + // Warning no break on purpose // Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks. bool sw_blending = false; @@ -500,8 +513,6 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) sw_blending |= true; [[fallthrough]]; case ACC_BLEND_FULL: - if (!m_vt.m_alpha.valid && (ALPHA.C == 0)) - GetAlphaMinMax(); sw_blending |= (ALPHA.A != ALPHA.B) && ((ALPHA.C == 0 && m_vt.m_alpha.max > 128) || (ALPHA.C == 2 && ALPHA.FIX > 128u)); [[fallthrough]]; case ACC_BLEND_HIGH: @@ -536,8 +547,9 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) m_ps_sel.colclip = 1; sw_blending = true; accumulation_blend = false; // disable the HDR algo + blend_mix = false; } - else if (accumulation_blend) + else if (accumulation_blend || blend_mix) { // A fast algo that requires 2 passes GL_INS("COLCLIP Fast HDR mode ENABLED"); @@ -567,6 +579,7 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) GL_INS("PABE mode ENABLED"); m_ps_sel.pabe = 1; accumulation_blend = false; + blend_mix = false; } } @@ -613,6 +626,29 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) // Note accumulation_blend doesn't require a barrier } + else if (blend_mix) + { + dev->OMSetBlendState(blend_index, ALPHA.FIX, ALPHA.C == 2, false, true); + + if (blend_mix1) + { + m_ps_sel.blend_a = 0; + m_ps_sel.blend_b = 2; + m_ps_sel.blend_d = 2; + } + else if (blend_mix2) + { + m_ps_sel.blend_a = 0; + m_ps_sel.blend_b = 2; + m_ps_sel.blend_d = 0; + } + else if (blend_mix3) + { + m_ps_sel.blend_a = 2; + m_ps_sel.blend_b = 0; + m_ps_sel.blend_d = 0; + } + } else { // Disable HW blending