GS-ogl: Try to use a mix of hw/sw blending in more situations.

When alpha is less than 128:
Do Cs*F or Cs*As in shader.
Do Cs*(As + 1) or Cs*(F + 1) in shader.
Do Cs*(1 - As) or Cs*(1 - F) in shader.

Optimization, might be faster as we will require less barriers.
This commit is contained in:
lightningterror 2021-12-07 11:45:14 +01:00
parent f79e5ab559
commit 1fb29de082
3 changed files with 50 additions and 10 deletions

View File

@ -1844,7 +1844,7 @@ void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel)
}
}
void GSDeviceOGL::OMSetBlendState(u8 blend_index, u8 blend_factor, bool is_blend_constant, bool accumulation_blend)
void GSDeviceOGL::OMSetBlendState(u8 blend_index, u8 blend_factor, bool is_blend_constant, bool accumulation_blend, bool blend_mix)
{
if (blend_index)
{
@ -1867,6 +1867,10 @@ void GSDeviceOGL::OMSetBlendState(u8 blend_index, u8 blend_factor, bool is_blend
b.src = GL_ONE;
b.dst = GL_ONE;
}
else if (blend_mix)
{
b.src = GL_ONE;
}
if (GLState::eq_RGB != b.op)
{

View File

@ -628,7 +628,7 @@ public:
void PSSetSamplerState(GLuint ss);
void OMSetDepthStencilState(GSDepthStencilOGL* dss);
void OMSetBlendState(u8 blend_index = 0, u8 blend_factor = 0, bool is_blend_constant = false, bool accumulation_blend = false);
void OMSetBlendState(u8 blend_index = 0, u8 blend_factor = 0, bool is_blend_constant = false, bool accumulation_blend = false, bool blend_mix = false);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL) final;
void OMSetColorMaskState(OMColorMaskSelector sel = OMColorMaskSelector());

View File

@ -480,17 +480,30 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
const u8 blend_index = u8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D);
const int blend_flag = m_dev->GetBlendFlags(blend_index);
// SW Blend is (nearly) free. Let's use it.
const bool impossible_or_free_blend = (blend_flag & (BLEND_NO_REC|BLEND_A_MAX|BLEND_ACCU)) // Blend doesn't requires the costly barrier
|| (m_prim_overlap == PRIM_OVERLAP_NO) // Blend can be done in a single draw
|| (m_require_full_barrier); // Another effect (for example fbmask) already requires a full barrier
// Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd
bool accumulation_blend = !!(blend_flag & BLEND_ACCU);
// Blending doesn't require barrier, or sampling of the rt
const bool blend_non_recursive = !!(blend_flag & BLEND_NO_REC);
// BLEND MIX selection, use a mix of hw/sw blending
if (!m_vt.m_alpha.valid && (ALPHA.C == 0))
GetAlphaMinMax();
const bool blend_mix1 = !!(blend_flag & BLEND_MIX1);
const bool blend_mix2 = !!(blend_flag & BLEND_MIX2);
const bool blend_mix3 = !!(blend_flag & BLEND_MIX3);
bool blend_mix = (blend_mix1 || blend_mix2 || blend_mix3)
// Do not enable if As > 128 or F > 128, hw blend clamps to 1
&& !((ALPHA.C == 0 && m_vt.m_alpha.max > 128) || (ALPHA.C == 2 && ALPHA.FIX > 128u));
// SW Blend is (nearly) free. Let's use it.
const bool impossible_or_free_blend = (blend_flag & BLEND_A_MAX) // Impossible blending
|| blend_non_recursive // Free sw blending, doesn't require barriers or reading fb
|| accumulation_blend // Mix of hw/sw blending
|| blend_mix // Mix of hw/sw blending
|| (m_prim_overlap == PRIM_OVERLAP_NO) // Blend can be done in a single draw
|| (m_require_full_barrier); // Another effect (for example fbmask) already requires a full barrier
// Warning no break on purpose
// Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks.
bool sw_blending = false;
@ -500,8 +513,6 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
sw_blending |= true;
[[fallthrough]];
case ACC_BLEND_FULL:
if (!m_vt.m_alpha.valid && (ALPHA.C == 0))
GetAlphaMinMax();
sw_blending |= (ALPHA.A != ALPHA.B) && ((ALPHA.C == 0 && m_vt.m_alpha.max > 128) || (ALPHA.C == 2 && ALPHA.FIX > 128u));
[[fallthrough]];
case ACC_BLEND_HIGH:
@ -536,8 +547,9 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
m_ps_sel.colclip = 1;
sw_blending = true;
accumulation_blend = false; // disable the HDR algo
blend_mix = false;
}
else if (accumulation_blend)
else if (accumulation_blend || blend_mix)
{
// A fast algo that requires 2 passes
GL_INS("COLCLIP Fast HDR mode ENABLED");
@ -567,6 +579,7 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
GL_INS("PABE mode ENABLED");
m_ps_sel.pabe = 1;
accumulation_blend = false;
blend_mix = false;
}
}
@ -613,6 +626,29 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
// Note accumulation_blend doesn't require a barrier
}
else if (blend_mix)
{
dev->OMSetBlendState(blend_index, ALPHA.FIX, ALPHA.C == 2, false, true);
if (blend_mix1)
{
m_ps_sel.blend_a = 0;
m_ps_sel.blend_b = 2;
m_ps_sel.blend_d = 2;
}
else if (blend_mix2)
{
m_ps_sel.blend_a = 0;
m_ps_sel.blend_b = 2;
m_ps_sel.blend_d = 0;
}
else if (blend_mix3)
{
m_ps_sel.blend_a = 2;
m_ps_sel.blend_b = 0;
m_ps_sel.blend_d = 0;
}
}
else
{
// Disable HW blending