GS/HW: Cleanup blend function, update blend levels.

Cleanup and optimize blend levels for all renderers: For Gl/Vk High blend: Prefer sw blend when RTA > 128, otherwise try to use RTA correction. For DX: Medium blend: Prefer sw blend on Ad cases where prims don't overlap, alpha masked case or rta correction isn't possible. High: Prefer sw blend on Cd*(Alpha + 1) cases where prims don't overlap. Full: Prefer sw blend on cases where Alpha > 128 when prims don't overlap. Add some optimizations for Ad cases to not do any blending depending on Alpha value.
2024-03-22 20:33:10 +01:00 · 2024-03-22 20:33:10 +01:00 · 0d61f154d7
parent d6e3eccf45
commit 0d61f154d7
1 changed files with 50 additions and 73 deletions
--- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp
+++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp
@ -3854,6 +3854,9 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
 	const bool alpha_c0_high_min_one = (m_conf.ps.blend_c == 0 && GetAlphaMinMax().min > 128);
 	const bool alpha_c0_high_max_one = (m_conf.ps.blend_c == 0 && GetAlphaMinMax().max > 128);
 	const bool alpha_c0_less_max_one = (m_conf.ps.blend_c == 0 && GetAlphaMinMax().max <= 128);
+	const bool alpha_c1_high_min_one = (m_conf.ps.blend_c == 1 && rt_alpha_min > 128);
+	const bool alpha_c1_high_max_one = (m_conf.ps.blend_c == 1 && rt_alpha_max > 128);
+	const bool alpha_c1_high_no_rta_correct = m_conf.ps.blend_c == 1 && !(rt->m_rt_alpha_scale || m_can_correct_alpha);
 	const bool alpha_c2_zero = (m_conf.ps.blend_c == 2 && AFIX == 0u);
 	const bool alpha_c2_one = (m_conf.ps.blend_c == 2 && AFIX == 128u);
 	const bool alpha_c2_less_one = (m_conf.ps.blend_c == 2 && AFIX <= 128u);
@ -3886,7 +3889,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
 		m_conf.ps.blend_b = 0;
 	}
 	else if (COLCLAMP.CLAMP && m_conf.ps.blend_a == 2
-		&& (m_conf.ps.blend_d == 2 || (m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_c0_high_min_one || alpha_c2_high_one))))
+		&& (m_conf.ps.blend_d == 2 || (m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_c0_high_min_one || alpha_c1_high_min_one || alpha_c2_high_one))))
 	{
 		// CLAMP 1, negative result will be clamped to 0.
 		// Condition 1:
@ -3907,14 +3910,20 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
 	const bool blend_ad = m_conf.ps.blend_c == 1;
 	const bool alpha_mask = (m_cached_ctx.FRAME.FBMSK & 0xFF000000) == 0xFF000000;
 	bool blend_ad_alpha_masked = blend_ad && alpha_mask;
-	if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic) || (COLCLAMP.CLAMP == 0))
-		&& g_gs_device->Features().texture_barrier && blend_ad_alpha_masked)
+	if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic) || (COLCLAMP.CLAMP == 0)) && features.texture_barrier && blend_ad_alpha_masked)
+	{
+		// Swap Ad with As for hw blend.
+		m_conf.ps.a_masked = 1;
 		m_conf.ps.blend_c = 0;
-	else if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Medium)
-		// Detect barrier aka fbmask on d3d11.
-		|| m_conf.require_one_barrier)
-		&& blend_ad_alpha_masked)
+		m_conf.require_one_barrier |= true;
+	}
+	else if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Medium) || m_conf.require_one_barrier) && blend_ad_alpha_masked)
+	{
+		// Swap Ad with As for hw blend.
+		m_conf.ps.a_masked = 1;
 		m_conf.ps.blend_c = 0;
+		m_conf.require_one_barrier |= true;
+	}
 	else
 		blend_ad_alpha_masked = false;

@ -3948,29 +3957,31 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
 	bool blend_mix = (blend_mix1 || blend_mix2 || blend_mix3) && COLCLAMP.CLAMP;

 	const bool one_barrier = m_conf.require_one_barrier || blend_ad_alpha_masked;
+	// Primitives don't overlap.
+	const bool no_prim_overlap = (m_prim_overlap == PRIM_OVERLAP_NO);
+	// Condition 1: Require full sw blend for full barrier.
+	// Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead.
+	// Condition 3: A shuffle is unlikely to overlap, so when a barrier is enabled like from fbmask we can prefer full sw blend.
+	const bool prefer_sw_blend = (features.texture_barrier && m_conf.require_full_barrier) || (m_conf.require_one_barrier && (no_prim_overlap || m_conf.ps.shuffle));
+	const bool free_blend = blend_non_recursive // Free sw blending, doesn't require barriers or reading fb
+							|| accumulation_blend; // Mix of hw/sw blending

 	// Blend can be done on hw. As and F cases should be accurate.
 	// BLEND_HW_CLR1 with Ad, BLEND_HW_CLR3 might require sw blend.
 	// BLEND_HW_CLR1 with As/F and BLEND_HW_CLR2 can be done in hw.
-	const bool clr_blend = !!(blend_flag & (BLEND_HW_CLR1 | BLEND_HW_CLR2 | BLEND_HW_CLR3));
-	bool clr_blend1_2 = (blend_flag & (BLEND_HW_CLR1 | BLEND_HW_CLR2)) && (m_conf.ps.blend_c != 1) // Make sure it isn't an Ad case
+	bool clr_blend1_2 = (blend_flag & (BLEND_HW_CLR1 | BLEND_HW_CLR2)) && (m_conf.ps.blend_c != 1) // As or Af cases only.
 						&& !(m_draw_env->PABE.PABE && GetAlphaMinMax().min < 128) // No PABE as it will require sw blending.
 						&& (COLCLAMP.CLAMP) // Let's add a colclamp check too, hw blend will clamp to 0-1.
-						&& !(one_barrier || m_conf.require_full_barrier); // Also don't run if there are barriers present.
+						&& !prefer_sw_blend; // Don't run if sw blend is preferred.

 	// Warning no break on purpose
 	// Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks.
 	bool sw_blending = false;
 	if (features.texture_barrier)
 	{
-		// Condition 1: Require full sw blend for full barrier.
-		// Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead.
-		const bool prefer_sw_blend = m_conf.require_full_barrier || (one_barrier && (m_prim_overlap == PRIM_OVERLAP_NO || m_conf.ps.shuffle));
-		const bool no_prim_overlap = (m_prim_overlap == PRIM_OVERLAP_NO);
-		const bool free_blend = blend_non_recursive // Free sw blending, doesn't require barriers or reading fb
-			|| accumulation_blend; // Mix of hw/sw blending
 		const bool blend_requires_barrier = (blend_flag & BLEND_A_MAX) // Impossible blending
-			|| (m_conf.require_full_barrier) // Another effect (for example fbmask) already requires a full barrier
+			// Sw blend, either full barrier or one barrier with no overlap.
+			|| prefer_sw_blend
 			// Blend can be done in a single draw, and we already need a barrier
 			// On fbfetch, one barrier is like full barrier
 			|| (one_barrier && (no_prim_overlap || features.framebuffer_fetch))
@ -3988,7 +3999,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
 				sw_blending |= m_conf.ps.blend_a != m_conf.ps.blend_b && alpha_c0_high_max_one;
 				[[fallthrough]];
 			case AccBlendLevel::High:
-				sw_blending |= m_conf.ps.blend_c == 1 || (m_conf.ps.blend_a != m_conf.ps.blend_b && alpha_c2_high_one);
+				sw_blending |= (alpha_c1_high_max_one || alpha_c1_high_no_rta_correct) || (m_conf.ps.blend_a != m_conf.ps.blend_b && alpha_c2_high_one);
 				[[fallthrough]];
 			case AccBlendLevel::Medium:
 				// Initial idea was to enable accurate blending for sprite rendering to handle
@ -3997,17 +4008,14 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
 				sw_blending |= m_vt.m_primclass == GS_SPRITE_CLASS && m_drawlist.size() < 100;
 				[[fallthrough]];
 			case AccBlendLevel::Basic:
-				// SW FBMASK, needs sw blend, avoid hitting any hw blend pre enabled (accumulation, blend mix, blend cd),
-				// fixes shadows in Superman shadows of Apokolips.
-				// DATE_BARRIER already does full barrier so also makes more sense to do full sw blend.
+				// Prefer sw blend if possible.
 				color_dest_blend &= !prefer_sw_blend;
-				// If prims don't overlap prefer full sw blend on blend_ad_alpha_masked cases.
-				accumulation_blend &= !(prefer_sw_blend || (blend_ad_alpha_masked && m_prim_overlap == PRIM_OVERLAP_NO));
+				accumulation_blend &= !prefer_sw_blend;
 				// Enable sw blending for barriers.
 				sw_blending |= blend_requires_barrier;
 				// Try to do hw blend for clr2 case.
 				sw_blending &= !clr_blend1_2;
-				// Enable sw blending for free blending, should be done after blend_ad_improved check.
+				// Enable sw blending for free blending.
 				sw_blending |= free_blend;
 				// Do not run BLEND MIX if sw blending is already present, it's less accurate.
 				blend_mix &= !sw_blending;
@ -4019,42 +4027,38 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
 	}
 	else
 	{
-		// FBMASK, channel shuffle already reads the fb so it is safe to enable sw blend when there is no overlap or a texture shuffle.
-		const bool prefer_sw_blend = m_conf.require_one_barrier && (m_prim_overlap == PRIM_OVERLAP_NO || m_conf.ps.shuffle);
-
 		switch (GSConfig.AccurateBlendingUnit)
 		{
 			case AccBlendLevel::Maximum:
-				if (m_prim_overlap == PRIM_OVERLAP_NO)
+				// Enable sw blend when prims don't overlap.
+				if (no_prim_overlap)
 				{
 					clr_blend1_2 = false;
 					sw_blending |= true;
 				}
 				[[fallthrough]];
 			case AccBlendLevel::Full:
-				sw_blending |= ((m_conf.ps.blend_c == 1 || (blend_mix && (alpha_c2_high_one || alpha_c0_high_max_one))) && (m_prim_overlap == PRIM_OVERLAP_NO));
+				// Enable sw blend on cases where Alpha > 128 when prims don't overlap.
+				sw_blending |= (alpha_c0_high_max_one || alpha_c1_high_max_one || alpha_c2_high_one) && no_prim_overlap;
 				[[fallthrough]];
 			case AccBlendLevel::High:
-				sw_blending |= (!(clr_blend || blend_mix) && (m_prim_overlap == PRIM_OVERLAP_NO));
+				// Enable sw blend on Cd*(Alpha + 1) cases where prims don't overlap.
+				sw_blending |= (m_conf.ps.blend_a == m_conf.ps.blend_d == 1) && no_prim_overlap;
 				[[fallthrough]];
 			case AccBlendLevel::Medium:
-				// If prims don't overlap prefer full sw blend on blend_ad_alpha_masked cases.
-				if (blend_ad_alpha_masked && m_prim_overlap == PRIM_OVERLAP_NO)
-				{
-					accumulation_blend = false;
-					sw_blending |= true;
-				}
+				// Enable sw blend on Ad cases where prims don't overlap, blend_ad_alpha_masked or rta correction isn't possible.
+				sw_blending |= !blend_ad_alpha_masked && (alpha_c1_high_max_one || alpha_c1_high_no_rta_correct) && no_prim_overlap;
 				[[fallthrough]];
 			case AccBlendLevel::Basic:
-				// Disable accumulation blend when sw blend is preferred.
-				color_dest_blend   &= !prefer_sw_blend;
+				// Prefer sw blend if possible.
+				color_dest_blend &= !prefer_sw_blend;
 				accumulation_blend &= !prefer_sw_blend;
-				// Blending requires reading the framebuffer when there's no overlap.
+				// Enable sw blending for reading fb.
 				sw_blending |= prefer_sw_blend;
 				// Try to do hw blend for clr2 case.
 				sw_blending &= !clr_blend1_2;
-				// Enable sw blending for free blending, should be done after blend_ad_improved check.
-				sw_blending |= accumulation_blend || blend_non_recursive;
+				// Enable sw blending for free blending.
+				sw_blending |= free_blend;
 				// Do not run BLEND MIX if sw blending is already present, it's less accurate.
 				blend_mix &= !sw_blending;
 				sw_blending |= blend_mix;
@ -4110,7 +4114,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
 		if (features.framebuffer_fetch)
 			free_colclip = true;
 		else if (features.texture_barrier)
-			free_colclip = m_prim_overlap == PRIM_OVERLAP_NO || blend_non_recursive;
+			free_colclip = no_prim_overlap || blend_non_recursive;
 		else
 			free_colclip = blend_non_recursive;

@ -4186,13 +4190,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
 		}
 	}

-	// For stat to optimize accurate option
-#if 0
-	GL_INS("BLEND_INFO: %u/%u/%u/%u. Clamp:%u. Prim:%d number %u (drawlist %zu) (sw %d)",
-		m_conf.ps.blend_a, m_conf.ps.blend_b, m_conf.ps.blend_c, m_conf.ps.blend_d,
-		m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, m_drawlist.size(), sw_blending);
-#endif
-
 	if (color_dest_blend)
 	{
 		// Blend output will be Cd, disable hw/sw blending.
@ -4257,10 +4254,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT

 			// Dual source output not needed (accumulation blend replaces it with ONE).
 			m_conf.ps.no_color1 = true;
-
-			// Only Ad case will require one barrier
-			// No need to set a_masked bit for blend_ad_alpha_masked case
-			m_conf.require_one_barrier |= blend_ad_alpha_masked;
 		}
 		else if (blend_mix)
 		{
@ -4281,11 +4274,10 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT

 			if (blend_mix1)
 			{
-				if (m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_c0_high_min_one || alpha_c2_high_one))
+				if (m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_c0_high_min_one || alpha_c1_high_min_one || alpha_c2_high_one))
 				{
-					// Replace Cs*As + Cd*(1 - As) with Cs*As - Cd*(As - 1).
-					// Replace Cs*F + Cd*(1 - F) with Cs*F - Cd*(F - 1).
-					// As - 1 or F - 1 subtraction is only done for the dual source output (hw blending part) since we are changing the equation.
+					// Replace Cs*Alpha + Cd*(1 - Alpha) with Cs*Alpha - Cd*(Alpha - 1).
+					// Alpha - 1 subtraction is only done for the dual source output (hw blending part) since we are changing the equation.
 					// Af will be replaced with As in shader and send it to dual source output.
 					m_conf.blend = {true, GSDevice::CONST_ONE, GSDevice::SRC1_COLOR, GSDevice::OP_SUBTRACT, false, 0};
 					// blend hw 1 will disable alpha clamp, we can reuse the old bits.
@ -4295,8 +4287,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
 				}
 				else if (m_conf.ps.blend_a == m_conf.ps.blend_d)
 				{
-					// Compensate slightly for Cd*(As + 1) - Cs*As.
-					// Try to compensate a bit with subtracting 1 (0.00392) * (Alpha + 1) from Cs.
+					// Compensate slightly for Cd*(Alpha + 1) - Cs*Alpha.
 					m_conf.ps.blend_hw = 2;
 				}

@ -4322,14 +4313,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
 				m_conf.ps.blend_b = 0;
 				m_conf.ps.blend_d = 0;
 			}
-
-			// Only Ad case will require one barrier
-			if (blend_ad_alpha_masked)
-			{
-				// Swap Ad with As for hw blend
-				m_conf.ps.a_masked = 1;
-				m_conf.require_one_barrier |= true;
-			}
 		}
 		else
 		{
@ -4381,12 +4364,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
 			m_conf.ps.blend_hw = 3;
 		}

-		if (blend_ad_alpha_masked)
-		{
-			m_conf.ps.a_masked = 1;
-			m_conf.require_one_barrier |= true;
-		}
-
 		const HWBlend blend(GSDevice::GetBlend(blend_index, replace_dual_src));
 		m_conf.blend = {true, blend.src, blend.dst, blend.op, m_conf.ps.blend_c == 2, AFIX};