GS:HW: More accurate blend equation for blend mix

2022-08-08 17:19:20 -05:00 · 2022-08-08 17:19:20 -05:00 · cb64e8d504
parent 35a6dfb52a
commit cb64e8d504
4 changed files with 40 additions and 1 deletions
--- a/bin/resources/shaders/dx11/tfx.fx
+++ b/bin/resources/shaders/dx11/tfx.fx
@ -779,7 +779,19 @@ void ps_blend(inout float4 Color, inout float As, float2 pos_xy)
 		if (PS_BLEND_MIX && PS_CLR_HW != 1)
 			C = min(C, 1.0f);

-		Color.rgb = (PS_BLEND_A == PS_BLEND_B) ? D : trunc(((A - B) * C) + D);
+		if (PS_BLEND_A == PS_BLEND_B)
+			Color.rgb = D;
+		else if (PS_BLEND_MIX)
+			// In blend_mix, HW adds on some alpha factor * dst.
+			// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
+			// Instead, apply an offset to convert HW's round to a floor.
+			// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
+			// But they don't.  Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
+			// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256.  Nvidia is a lost cause.
+			// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
+			Color.rgb = ((A - B) * C + D) - (124.0f/256.0f);
+		else
+			Color.rgb = trunc(((A - B) * C) + D);

 		if (PS_CLR_HW == 1)
 		{
--- a/bin/resources/shaders/opengl/tfx_fs.glsl
+++ b/bin/resources/shaders/opengl/tfx_fs.glsl
@ -761,6 +761,15 @@ void ps_blend(inout vec4 Color, inout float As)

 #if PS_BLEND_A == PS_BLEND_B
    Color.rgb = D;
+#elif PS_BLEND_MIX
+    // In blend_mix, HW adds on some alpha factor * dst.
+    // Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
+    // Instead, apply an offset to convert HW's round to a floor.
+    // Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
+    // But they don't.  Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
+    // Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256.  Nvidia is a lost cause.
+    // 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
+    Color.rgb = ((A - B) * C + D) - (124.0f/256.0f);
 #else
    Color.rgb = trunc((A - B) * C + D);
 #endif
--- a/bin/resources/shaders/vulkan/tfx.glsl
+++ b/bin/resources/shaders/vulkan/tfx.glsl
@ -1060,6 +1060,15 @@ void ps_blend(inout vec4 Color, inout float As)

 		#if PS_BLEND_A == PS_BLEND_B
 				Color.rgb = D;
+		#elif PS_BLEND_MIX
+				// In blend_mix, HW adds on some alpha factor * dst.
+				// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
+				// Instead, apply an offset to convert HW's round to a floor.
+				// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
+				// But they don't.  Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
+				// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256.  Nvidia is a lost cause.
+				// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
+				Color.rgb = ((A - B) * C + D) - (124.0f/256.0f);
 		#else
 				Color.rgb = trunc((A - B) * C + D);
 		#endif
--- a/pcsx2/GS/Renderers/Metal/tfx.metal
+++ b/pcsx2/GS/Renderers/Metal/tfx.metal
@ -750,6 +750,15 @@ struct PSMain

 			if (PS_BLEND_A == PS_BLEND_B)
 				Color.rgb = D;
+			else if (PS_BLEND_MIX)
+				// In blend_mix, HW adds on some alpha factor * dst.
+				// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
+				// Instead, apply an offset to convert HW's round to a floor.
+				// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
+				// But they don't.  Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
+				// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256.  Nvidia is a lost cause.
+				// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
+				Color.rgb = ((A - B) * C + D) - (124.f/256.f);
 			else
 				Color.rgb = trunc((A - B) * C + D);