mirror of https://github.com/PCSX2/pcsx2.git
GS:HW: More accurate blend equation for blend mix
This commit is contained in:
parent
35a6dfb52a
commit
cb64e8d504
|
@ -779,7 +779,19 @@ void ps_blend(inout float4 Color, inout float As, float2 pos_xy)
|
|||
if (PS_BLEND_MIX && PS_CLR_HW != 1)
|
||||
C = min(C, 1.0f);
|
||||
|
||||
Color.rgb = (PS_BLEND_A == PS_BLEND_B) ? D : trunc(((A - B) * C) + D);
|
||||
if (PS_BLEND_A == PS_BLEND_B)
|
||||
Color.rgb = D;
|
||||
else if (PS_BLEND_MIX)
|
||||
// In blend_mix, HW adds on some alpha factor * dst.
|
||||
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
|
||||
// Instead, apply an offset to convert HW's round to a floor.
|
||||
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
|
||||
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
|
||||
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
|
||||
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
|
||||
Color.rgb = ((A - B) * C + D) - (124.0f/256.0f);
|
||||
else
|
||||
Color.rgb = trunc(((A - B) * C) + D);
|
||||
|
||||
if (PS_CLR_HW == 1)
|
||||
{
|
||||
|
|
|
@ -761,6 +761,15 @@ void ps_blend(inout vec4 Color, inout float As)
|
|||
|
||||
#if PS_BLEND_A == PS_BLEND_B
|
||||
Color.rgb = D;
|
||||
#elif PS_BLEND_MIX
|
||||
// In blend_mix, HW adds on some alpha factor * dst.
|
||||
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
|
||||
// Instead, apply an offset to convert HW's round to a floor.
|
||||
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
|
||||
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
|
||||
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
|
||||
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
|
||||
Color.rgb = ((A - B) * C + D) - (124.0f/256.0f);
|
||||
#else
|
||||
Color.rgb = trunc((A - B) * C + D);
|
||||
#endif
|
||||
|
|
|
@ -1060,6 +1060,15 @@ void ps_blend(inout vec4 Color, inout float As)
|
|||
|
||||
#if PS_BLEND_A == PS_BLEND_B
|
||||
Color.rgb = D;
|
||||
#elif PS_BLEND_MIX
|
||||
// In blend_mix, HW adds on some alpha factor * dst.
|
||||
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
|
||||
// Instead, apply an offset to convert HW's round to a floor.
|
||||
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
|
||||
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
|
||||
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
|
||||
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
|
||||
Color.rgb = ((A - B) * C + D) - (124.0f/256.0f);
|
||||
#else
|
||||
Color.rgb = trunc((A - B) * C + D);
|
||||
#endif
|
||||
|
|
|
@ -750,6 +750,15 @@ struct PSMain
|
|||
|
||||
if (PS_BLEND_A == PS_BLEND_B)
|
||||
Color.rgb = D;
|
||||
else if (PS_BLEND_MIX)
|
||||
// In blend_mix, HW adds on some alpha factor * dst.
|
||||
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
|
||||
// Instead, apply an offset to convert HW's round to a floor.
|
||||
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
|
||||
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
|
||||
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
|
||||
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
|
||||
Color.rgb = ((A - B) * C + D) - (124.f/256.f);
|
||||
else
|
||||
Color.rgb = trunc((A - B) * C + D);
|
||||
|
||||
|
|
Loading…
Reference in New Issue