GS/HW: Cleanup blend function, update blend levels.

Cleanup and optimize blend levels for all renderers:
For Gl/Vk
High blend: Prefer sw blend when RTA > 128, otherwise try to use RTA correction.

For DX:
Medium blend: Prefer sw blend on Ad cases where prims don't overlap, alpha masked case or rta correction isn't possible.
High: Prefer sw blend on Cd*(Alpha + 1) cases where prims don't overlap.
Full: Prefer sw blend on cases where Alpha > 128 when prims don't overlap.

Add some optimizations for Ad cases to not do any blending depending on Alpha value.
This commit is contained in:
lightningterror 2024-03-22 20:33:10 +01:00
parent d6e3eccf45
commit 0d61f154d7
1 changed files with 50 additions and 73 deletions

View File

@ -3854,6 +3854,9 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
const bool alpha_c0_high_min_one = (m_conf.ps.blend_c == 0 && GetAlphaMinMax().min > 128);
const bool alpha_c0_high_max_one = (m_conf.ps.blend_c == 0 && GetAlphaMinMax().max > 128);
const bool alpha_c0_less_max_one = (m_conf.ps.blend_c == 0 && GetAlphaMinMax().max <= 128);
const bool alpha_c1_high_min_one = (m_conf.ps.blend_c == 1 && rt_alpha_min > 128);
const bool alpha_c1_high_max_one = (m_conf.ps.blend_c == 1 && rt_alpha_max > 128);
const bool alpha_c1_high_no_rta_correct = m_conf.ps.blend_c == 1 && !(rt->m_rt_alpha_scale || m_can_correct_alpha);
const bool alpha_c2_zero = (m_conf.ps.blend_c == 2 && AFIX == 0u);
const bool alpha_c2_one = (m_conf.ps.blend_c == 2 && AFIX == 128u);
const bool alpha_c2_less_one = (m_conf.ps.blend_c == 2 && AFIX <= 128u);
@ -3886,7 +3889,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
m_conf.ps.blend_b = 0;
}
else if (COLCLAMP.CLAMP && m_conf.ps.blend_a == 2
&& (m_conf.ps.blend_d == 2 || (m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_c0_high_min_one || alpha_c2_high_one))))
&& (m_conf.ps.blend_d == 2 || (m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_c0_high_min_one || alpha_c1_high_min_one || alpha_c2_high_one))))
{
// CLAMP 1, negative result will be clamped to 0.
// Condition 1:
@ -3907,14 +3910,20 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
const bool blend_ad = m_conf.ps.blend_c == 1;
const bool alpha_mask = (m_cached_ctx.FRAME.FBMSK & 0xFF000000) == 0xFF000000;
bool blend_ad_alpha_masked = blend_ad && alpha_mask;
if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic) || (COLCLAMP.CLAMP == 0))
&& g_gs_device->Features().texture_barrier && blend_ad_alpha_masked)
if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic) || (COLCLAMP.CLAMP == 0)) && features.texture_barrier && blend_ad_alpha_masked)
{
// Swap Ad with As for hw blend.
m_conf.ps.a_masked = 1;
m_conf.ps.blend_c = 0;
else if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Medium)
// Detect barrier aka fbmask on d3d11.
|| m_conf.require_one_barrier)
&& blend_ad_alpha_masked)
m_conf.require_one_barrier |= true;
}
else if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Medium) || m_conf.require_one_barrier) && blend_ad_alpha_masked)
{
// Swap Ad with As for hw blend.
m_conf.ps.a_masked = 1;
m_conf.ps.blend_c = 0;
m_conf.require_one_barrier |= true;
}
else
blend_ad_alpha_masked = false;
@ -3948,29 +3957,31 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
bool blend_mix = (blend_mix1 || blend_mix2 || blend_mix3) && COLCLAMP.CLAMP;
const bool one_barrier = m_conf.require_one_barrier || blend_ad_alpha_masked;
// Primitives don't overlap.
const bool no_prim_overlap = (m_prim_overlap == PRIM_OVERLAP_NO);
// Condition 1: Require full sw blend for full barrier.
// Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead.
// Condition 3: A shuffle is unlikely to overlap, so when a barrier is enabled like from fbmask we can prefer full sw blend.
const bool prefer_sw_blend = (features.texture_barrier && m_conf.require_full_barrier) || (m_conf.require_one_barrier && (no_prim_overlap || m_conf.ps.shuffle));
const bool free_blend = blend_non_recursive // Free sw blending, doesn't require barriers or reading fb
|| accumulation_blend; // Mix of hw/sw blending
// Blend can be done on hw. As and F cases should be accurate.
// BLEND_HW_CLR1 with Ad, BLEND_HW_CLR3 might require sw blend.
// BLEND_HW_CLR1 with As/F and BLEND_HW_CLR2 can be done in hw.
const bool clr_blend = !!(blend_flag & (BLEND_HW_CLR1 | BLEND_HW_CLR2 | BLEND_HW_CLR3));
bool clr_blend1_2 = (blend_flag & (BLEND_HW_CLR1 | BLEND_HW_CLR2)) && (m_conf.ps.blend_c != 1) // Make sure it isn't an Ad case
bool clr_blend1_2 = (blend_flag & (BLEND_HW_CLR1 | BLEND_HW_CLR2)) && (m_conf.ps.blend_c != 1) // As or Af cases only.
&& !(m_draw_env->PABE.PABE && GetAlphaMinMax().min < 128) // No PABE as it will require sw blending.
&& (COLCLAMP.CLAMP) // Let's add a colclamp check too, hw blend will clamp to 0-1.
&& !(one_barrier || m_conf.require_full_barrier); // Also don't run if there are barriers present.
&& !prefer_sw_blend; // Don't run if sw blend is preferred.
// Warning no break on purpose
// Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks.
bool sw_blending = false;
if (features.texture_barrier)
{
// Condition 1: Require full sw blend for full barrier.
// Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead.
const bool prefer_sw_blend = m_conf.require_full_barrier || (one_barrier && (m_prim_overlap == PRIM_OVERLAP_NO || m_conf.ps.shuffle));
const bool no_prim_overlap = (m_prim_overlap == PRIM_OVERLAP_NO);
const bool free_blend = blend_non_recursive // Free sw blending, doesn't require barriers or reading fb
|| accumulation_blend; // Mix of hw/sw blending
const bool blend_requires_barrier = (blend_flag & BLEND_A_MAX) // Impossible blending
|| (m_conf.require_full_barrier) // Another effect (for example fbmask) already requires a full barrier
// Sw blend, either full barrier or one barrier with no overlap.
|| prefer_sw_blend
// Blend can be done in a single draw, and we already need a barrier
// On fbfetch, one barrier is like full barrier
|| (one_barrier && (no_prim_overlap || features.framebuffer_fetch))
@ -3988,7 +3999,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
sw_blending |= m_conf.ps.blend_a != m_conf.ps.blend_b && alpha_c0_high_max_one;
[[fallthrough]];
case AccBlendLevel::High:
sw_blending |= m_conf.ps.blend_c == 1 || (m_conf.ps.blend_a != m_conf.ps.blend_b && alpha_c2_high_one);
sw_blending |= (alpha_c1_high_max_one || alpha_c1_high_no_rta_correct) || (m_conf.ps.blend_a != m_conf.ps.blend_b && alpha_c2_high_one);
[[fallthrough]];
case AccBlendLevel::Medium:
// Initial idea was to enable accurate blending for sprite rendering to handle
@ -3997,17 +4008,14 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
sw_blending |= m_vt.m_primclass == GS_SPRITE_CLASS && m_drawlist.size() < 100;
[[fallthrough]];
case AccBlendLevel::Basic:
// SW FBMASK, needs sw blend, avoid hitting any hw blend pre enabled (accumulation, blend mix, blend cd),
// fixes shadows in Superman shadows of Apokolips.
// DATE_BARRIER already does full barrier so also makes more sense to do full sw blend.
// Prefer sw blend if possible.
color_dest_blend &= !prefer_sw_blend;
// If prims don't overlap prefer full sw blend on blend_ad_alpha_masked cases.
accumulation_blend &= !(prefer_sw_blend || (blend_ad_alpha_masked && m_prim_overlap == PRIM_OVERLAP_NO));
accumulation_blend &= !prefer_sw_blend;
// Enable sw blending for barriers.
sw_blending |= blend_requires_barrier;
// Try to do hw blend for clr2 case.
sw_blending &= !clr_blend1_2;
// Enable sw blending for free blending, should be done after blend_ad_improved check.
// Enable sw blending for free blending.
sw_blending |= free_blend;
// Do not run BLEND MIX if sw blending is already present, it's less accurate.
blend_mix &= !sw_blending;
@ -4019,42 +4027,38 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
}
else
{
// FBMASK, channel shuffle already reads the fb so it is safe to enable sw blend when there is no overlap or a texture shuffle.
const bool prefer_sw_blend = m_conf.require_one_barrier && (m_prim_overlap == PRIM_OVERLAP_NO || m_conf.ps.shuffle);
switch (GSConfig.AccurateBlendingUnit)
{
case AccBlendLevel::Maximum:
if (m_prim_overlap == PRIM_OVERLAP_NO)
// Enable sw blend when prims don't overlap.
if (no_prim_overlap)
{
clr_blend1_2 = false;
sw_blending |= true;
}
[[fallthrough]];
case AccBlendLevel::Full:
sw_blending |= ((m_conf.ps.blend_c == 1 || (blend_mix && (alpha_c2_high_one || alpha_c0_high_max_one))) && (m_prim_overlap == PRIM_OVERLAP_NO));
// Enable sw blend on cases where Alpha > 128 when prims don't overlap.
sw_blending |= (alpha_c0_high_max_one || alpha_c1_high_max_one || alpha_c2_high_one) && no_prim_overlap;
[[fallthrough]];
case AccBlendLevel::High:
sw_blending |= (!(clr_blend || blend_mix) && (m_prim_overlap == PRIM_OVERLAP_NO));
// Enable sw blend on Cd*(Alpha + 1) cases where prims don't overlap.
sw_blending |= (m_conf.ps.blend_a == m_conf.ps.blend_d == 1) && no_prim_overlap;
[[fallthrough]];
case AccBlendLevel::Medium:
// If prims don't overlap prefer full sw blend on blend_ad_alpha_masked cases.
if (blend_ad_alpha_masked && m_prim_overlap == PRIM_OVERLAP_NO)
{
accumulation_blend = false;
sw_blending |= true;
}
// Enable sw blend on Ad cases where prims don't overlap, blend_ad_alpha_masked or rta correction isn't possible.
sw_blending |= !blend_ad_alpha_masked && (alpha_c1_high_max_one || alpha_c1_high_no_rta_correct) && no_prim_overlap;
[[fallthrough]];
case AccBlendLevel::Basic:
// Disable accumulation blend when sw blend is preferred.
color_dest_blend &= !prefer_sw_blend;
// Prefer sw blend if possible.
color_dest_blend &= !prefer_sw_blend;
accumulation_blend &= !prefer_sw_blend;
// Blending requires reading the framebuffer when there's no overlap.
// Enable sw blending for reading fb.
sw_blending |= prefer_sw_blend;
// Try to do hw blend for clr2 case.
sw_blending &= !clr_blend1_2;
// Enable sw blending for free blending, should be done after blend_ad_improved check.
sw_blending |= accumulation_blend || blend_non_recursive;
// Enable sw blending for free blending.
sw_blending |= free_blend;
// Do not run BLEND MIX if sw blending is already present, it's less accurate.
blend_mix &= !sw_blending;
sw_blending |= blend_mix;
@ -4110,7 +4114,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
if (features.framebuffer_fetch)
free_colclip = true;
else if (features.texture_barrier)
free_colclip = m_prim_overlap == PRIM_OVERLAP_NO || blend_non_recursive;
free_colclip = no_prim_overlap || blend_non_recursive;
else
free_colclip = blend_non_recursive;
@ -4186,13 +4190,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
}
}
// For stat to optimize accurate option
#if 0
GL_INS("BLEND_INFO: %u/%u/%u/%u. Clamp:%u. Prim:%d number %u (drawlist %zu) (sw %d)",
m_conf.ps.blend_a, m_conf.ps.blend_b, m_conf.ps.blend_c, m_conf.ps.blend_d,
m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, m_drawlist.size(), sw_blending);
#endif
if (color_dest_blend)
{
// Blend output will be Cd, disable hw/sw blending.
@ -4257,10 +4254,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
// Dual source output not needed (accumulation blend replaces it with ONE).
m_conf.ps.no_color1 = true;
// Only Ad case will require one barrier
// No need to set a_masked bit for blend_ad_alpha_masked case
m_conf.require_one_barrier |= blend_ad_alpha_masked;
}
else if (blend_mix)
{
@ -4281,11 +4274,10 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
if (blend_mix1)
{
if (m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_c0_high_min_one || alpha_c2_high_one))
if (m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_c0_high_min_one || alpha_c1_high_min_one || alpha_c2_high_one))
{
// Replace Cs*As + Cd*(1 - As) with Cs*As - Cd*(As - 1).
// Replace Cs*F + Cd*(1 - F) with Cs*F - Cd*(F - 1).
// As - 1 or F - 1 subtraction is only done for the dual source output (hw blending part) since we are changing the equation.
// Replace Cs*Alpha + Cd*(1 - Alpha) with Cs*Alpha - Cd*(Alpha - 1).
// Alpha - 1 subtraction is only done for the dual source output (hw blending part) since we are changing the equation.
// Af will be replaced with As in shader and send it to dual source output.
m_conf.blend = {true, GSDevice::CONST_ONE, GSDevice::SRC1_COLOR, GSDevice::OP_SUBTRACT, false, 0};
// blend hw 1 will disable alpha clamp, we can reuse the old bits.
@ -4295,8 +4287,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
}
else if (m_conf.ps.blend_a == m_conf.ps.blend_d)
{
// Compensate slightly for Cd*(As + 1) - Cs*As.
// Try to compensate a bit with subtracting 1 (0.00392) * (Alpha + 1) from Cs.
// Compensate slightly for Cd*(Alpha + 1) - Cs*Alpha.
m_conf.ps.blend_hw = 2;
}
@ -4322,14 +4313,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
m_conf.ps.blend_b = 0;
m_conf.ps.blend_d = 0;
}
// Only Ad case will require one barrier
if (blend_ad_alpha_masked)
{
// Swap Ad with As for hw blend
m_conf.ps.a_masked = 1;
m_conf.require_one_barrier |= true;
}
}
else
{
@ -4381,12 +4364,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
m_conf.ps.blend_hw = 3;
}
if (blend_ad_alpha_masked)
{
m_conf.ps.a_masked = 1;
m_conf.require_one_barrier |= true;
}
const HWBlend blend(GSDevice::GetBlend(blend_index, replace_dual_src));
m_conf.blend = {true, blend.src, blend.dst, blend.op, m_conf.ps.blend_c == 2, AFIX};