GS/HW: Further expand blend multipass.

For formulas:
Cs*Alpha + Cd*(1 - Alpha).
Cd*Alpha + Cs*(1 - Alpha).
Where Alpha is higher than 1 and is either As or Af.
This commit is contained in:
lightningterror 2024-08-25 07:36:35 +02:00
parent 27fb7dc26e
commit 67d7744f7f
6 changed files with 102 additions and 42 deletions

View File

@ -959,6 +959,8 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
}
else
{
float3 Alpha = PS_BLEND_C == 2 ? (float3)Af : (float3)As;
if (PS_BLEND_HW == 1)
{
// Needed for Cd * (As/Ad/F + 1) blending modes
@ -967,8 +969,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
else if (PS_BLEND_HW == 2)
{
// Cd*As,Cd*Ad or Cd*F
float Alpha = PS_BLEND_C == 2 ? Af : As;
Color.rgb = saturate((float3)Alpha - (float3)1.0f) * (float3)255.0f;
Color.rgb = saturate(Alpha - (float3)1.0f) * (float3)255.0f;
}
else if (PS_BLEND_HW == 3 && PS_RTA_CORRECTION == 0)
{
@ -983,11 +984,24 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
}
else if (PS_BLEND_HW == 4)
{
// Needed for Cd * (1 - Ad) and Cd*(1 + Alpha)
float Alpha = PS_BLEND_C == 2 ? Af : As;
As_rgba.rgb = (float3)Alpha * (float3)(128.0f / 255.0f);
// Needed for Cd * (1 - Ad) and Cd*(1 + Alpha).
As_rgba.rgb = Alpha * (float3)(128.0f / 255.0f);
Color.rgb = (float3)127.5f;
}
else if (PS_BLEND_HW == 5)
{
// Needed for Cs*Alpha + Cd*(1 - Alpha).
Alpha *= (float3)(128.0f / 255.0f);
As_rgba.rgb = (Alpha - (float3)0.5f);
Color.rgb = (Color.rgb * Alpha);
}
else if (PS_BLEND_HW == 6)
{
// Needed for Cd*Alpha + Cs*(1 - Alpha).
Alpha *= (float3)(128.0f / 255.0f);
As_rgba.rgb = Alpha;
Color.rgb *= (Alpha - (float3)0.5f);
}
}
}

View File

@ -910,18 +910,19 @@ float As = As_rgba.a;
#endif
#else
#if PS_BLEND_C == 2
vec3 Alpha = vec3(Af);
#else
vec3 Alpha = vec3(As);
#endif
// Needed for Cd * (As/Ad/F + 1) blending modes
#if PS_BLEND_HW == 1
Color.rgb = vec3(255.0f);
#elif PS_BLEND_HW == 2
// Cd*As,Cd*Ad or Cd*F
#if PS_BLEND_C == 2
float Alpha = Af;
#else
float Alpha = As;
#endif
Color.rgb = max(vec3(0.0f), (Alpha - vec3(1.0f)));
Color.rgb *= vec3(255.0f);
#elif PS_BLEND_HW == 3 && PS_RTA_CORRECTION == 0
@ -934,15 +935,20 @@ float As = As_rgba.a;
float color_compensate = 255.0f / max(128.0f, max_color);
Color.rgb *= vec3(color_compensate);
#elif PS_BLEND_HW == 4
// Needed for Cd * (1 - Ad) and Cd*(1 + Alpha)
// Needed for Cd * (1 - Ad) and Cd*(1 + Alpha).
#if PS_BLEND_C == 2
float Alpha = Af;
#else
float Alpha = As;
#endif
As_rgba.rgb = vec3(Alpha) * vec3(128.0f / 255.0f);
As_rgba.rgb = Alpha * vec3(128.0f / 255.0f);
Color.rgb = vec3(127.5f);
#elif PS_BLEND_HW == 5
// Needed for Cs*Alpha + Cd*(1 - Alpha).
Alpha *= vec3(128.0f / 255.0f);
As_rgba.rgb = (Alpha - vec3(0.5f));
Color.rgb = (Color.rgb * Alpha);
#elif PS_BLEND_HW == 6
// Needed for Cd*Alpha + Cs*(1 - Alpha).
Alpha *= vec3(128.0f / 255.0f);
As_rgba.rgb = Alpha;
Color.rgb *= (Alpha - vec3(0.5f));
#endif
#endif

View File

@ -1177,18 +1177,19 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
#endif
#else
#if PS_BLEND_C == 2
vec3 Alpha = vec3(Af);
#else
vec3 Alpha = vec3(As);
#endif
#if PS_BLEND_HW == 1
// Needed for Cd * (As/Ad/F + 1) blending modes
Color.rgb = vec3(255.0f);
#elif PS_BLEND_HW == 2
// Cd*As,Cd*Ad or Cd*F
#if PS_BLEND_C == 2
float Alpha = Af;
#else
float Alpha = As;
#endif
Color.rgb = max(vec3(0.0f), (Alpha - vec3(1.0f)));
Color.rgb *= vec3(255.0f);
#elif PS_BLEND_HW == 3 && PS_RTA_CORRECTION == 0
@ -1201,16 +1202,20 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
float color_compensate = 255.0f / max(128.0f, max_color);
Color.rgb *= vec3(color_compensate);
#elif PS_BLEND_HW == 4
// Needed for Cd * (1 - Ad) and Cd*(1 + Alpha)
// Needed for Cd * (1 - Ad) and Cd*(1 + Alpha).
#if PS_BLEND_C == 2
float Alpha = Af;
#else
float Alpha = As;
#endif
As_rgba.rgb = vec3(Alpha) * vec3(128.0f / 255.0f);
As_rgba.rgb = Alpha * vec3(128.0f / 255.0f);
Color.rgb = vec3(127.5f);
#elif PS_BLEND_HW == 5
// Needed for Cs*Alpha + Cd*(1 - Alpha).
Alpha *= vec3(128.0f / 255.0f);
As_rgba.rgb = (Alpha - vec3(0.5f));
Color.rgb = (Color.rgb * Alpha);
#elif PS_BLEND_HW == 6
// Needed for Cd*Alpha + Cs*(1 - Alpha).
Alpha *= vec3(128.0f / 255.0f);
As_rgba.rgb = Alpha;
Color.rgb *= (Alpha - vec3(0.5f));
#endif
#endif
}

View File

@ -176,6 +176,8 @@ enum class HWBlendType
SRC_ALPHA_DST_FACTOR = 2, // Use the dest color as blend factor, Cs is set to (Alpha - 1).
SRC_DOUBLE = 3, // Double source color.
SRC_HALF_ONE_DST_FACTOR = 4, // Use the dest color as blend factor, Cs is set to 0.5, additionally divide As or Af by 2.
SRC_INV_DST_BLEND_HALF = 5, // Halve the alpha then double the final result.
INV_SRC_DST_BLEND_HALF = 6, // Halve the alpha then double the final result.
BMIX1_ALPHA_HIGH_ONE = 1, // Blend formula is replaced when alpha is higher than 1.
BMIX1_SRC_HALF = 2, // Impossible blend will always be wrong on hw, divide Cs by 2.

View File

@ -4155,6 +4155,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, const boo
const bool alpha_c2_eq_less_one = (m_conf.ps.blend_c == 2 && AFIX <= 128u);
const bool alpha_c2_high_one = (m_conf.ps.blend_c == 2 && AFIX > 128u);
const bool alpha_eq_one = alpha_c0_eq_one || alpha_c2_eq_one;
const bool alpha_high_one = alpha_c0_high_min_one || alpha_c2_high_one;
const bool alpha_eq_less_one = alpha_c0_eq_less_max_one || alpha_c2_eq_less_one;
// Optimize blending equations, must be done before index calculation
@ -4182,7 +4183,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, const boo
m_conf.ps.blend_b = 0;
}
else if (COLCLAMP.CLAMP && m_conf.ps.blend_a == 2
&& (m_conf.ps.blend_d == 2 || (m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_c0_high_min_one || alpha_c1_high_min_one || alpha_c2_high_one))))
&& (m_conf.ps.blend_d == 2 || (m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_high_one || alpha_c1_high_min_one))))
{
// CLAMP 1, negative result will be clamped to 0.
// Condition 1:
@ -4247,7 +4248,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, const boo
const bool blend_non_recursive = !!(blend_flag & BLEND_NO_REC);
// BLEND MIX selection, use a mix of hw/sw blending
const bool blend_mix1 = !!(blend_flag & BLEND_MIX1) && !(m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_c0_high_min_one || alpha_c2_high_one));
const bool blend_mix1 = !!(blend_flag & BLEND_MIX1) && !(m_conf.ps.blend_b == m_conf.ps.blend_d && alpha_high_one);
const bool blend_mix2 = !!(blend_flag & BLEND_MIX2);
const bool blend_mix3 = !!(blend_flag & BLEND_MIX3);
bool blend_mix = (blend_mix1 || blend_mix2 || blend_mix3) && COLCLAMP.CLAMP;
@ -4256,12 +4257,14 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, const boo
const bool no_prim_overlap = (m_prim_overlap == PRIM_OVERLAP_NO);
// HW blend can be done in multiple passes when there's no overlap.
// Blend second pass is only useful when texture barriers aren't supported.
// Speed wise Texture barriers > blend second pass > texture copies.
// Blend multi pass is only useful when texture barriers aren't supported.
// Speed wise Texture barriers > blend multi pass > texture copies.
const bool blend_multi_pass_support = !features.texture_barrier && no_prim_overlap && is_basic_blend;
const bool bmix1_second_pass = blend_multi_pass_support && blend_mix1 && (alpha_c0_high_max_one || alpha_c2_high_one) && m_conf.ps.blend_d == 2;
// We don't want to enable blend mix if we are doing a second pass, it's useless.
blend_mix &= !bmix1_second_pass;
const bool bmix1_multi_pass1 = blend_multi_pass_support && blend_mix1 && (alpha_c0_high_max_one || alpha_c2_high_one) && m_conf.ps.blend_d == 2;
const bool bmix1_multi_pass2 = blend_multi_pass_support && (blend_flag & BLEND_MIX1) && m_conf.ps.blend_b == m_conf.ps.blend_d && !m_conf.ps.dither && alpha_high_one;
const bool bmix3_multi_pass = blend_multi_pass_support && blend_mix3 && !m_conf.ps.dither && alpha_high_one;
// We don't want to enable blend mix if we are doing a multi pass, it's useless.
blend_mix &= !(bmix1_multi_pass1 || bmix1_multi_pass2 || bmix3_multi_pass);
const bool one_barrier = m_conf.require_one_barrier || blend_ad_alpha_masked;
// Condition 1: Require full sw blend for full barrier.
@ -4651,7 +4654,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, const boo
if (blend_multi_pass_support)
{
const HWBlend blend_multi_pass = GSDevice::GetBlend(blend_index);
if (bmix1_second_pass)
if (bmix1_multi_pass1)
{
// Alpha = As or Af.
// Cs*Alpha - Cd*Alpha, Cd*Alpha - Cs*Alpha.
@ -4663,6 +4666,34 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, const boo
m_conf.blend_multi_pass.blend_hw = static_cast<u8>(HWBlendType::SRC_ALPHA_DST_FACTOR);
m_conf.blend_multi_pass.blend = {true, GSDevice::DST_COLOR, (m_conf.ps.blend_c == 2) ? GSDevice::CONST_COLOR : GSDevice::SRC1_COLOR, GSDevice::OP_ADD, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, m_conf.ps.blend_c == 2, AFIX};
}
else if (bmix1_multi_pass2)
{
// Alpha = As or Af.
// Cs*Alpha + Cd*(1 - Alpha).
// Render pass 1: Do the blend but halve the alpha, subtract instead of add since alpha is higher than 1.
m_conf.ps.blend_hw = static_cast<u8>(HWBlendType::SRC_INV_DST_BLEND_HALF);
blend.src = GSDevice::CONST_ONE;
blend.dst = GSDevice::SRC1_COLOR;
blend.op = GSDevice::OP_SUBTRACT;
// Render pass 2: Take result (Cd) from render pass 1 and double it.
m_conf.blend_multi_pass.enable = true;
m_conf.blend_multi_pass.blend_hw = static_cast<u8>(HWBlendType::SRC_ONE_DST_FACTOR);
m_conf.blend_multi_pass.blend = {true, GSDevice::DST_COLOR, GSDevice::CONST_ONE, blend_multi_pass.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0};
}
else if (bmix3_multi_pass)
{
// Alpha = As or Af.
// Cd*Alpha + Cs*(1 - Alpha).
// Render pass 1: Do the blend but halve the alpha, subtract instead of add since alpha is higher than 1.
m_conf.ps.blend_hw = static_cast<u8>(HWBlendType::INV_SRC_DST_BLEND_HALF);
blend.src = GSDevice::CONST_ONE;
blend.dst = GSDevice::SRC1_COLOR;
blend.op = GSDevice::OP_REV_SUBTRACT;
// Render pass 2: Take result (Cd) from render pass 1 and double it.
m_conf.blend_multi_pass.enable = true;
m_conf.blend_multi_pass.blend_hw = static_cast<u8>(HWBlendType::SRC_ONE_DST_FACTOR);
m_conf.blend_multi_pass.blend = {true, GSDevice::DST_COLOR, GSDevice::CONST_ONE, blend_multi_pass.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0};
}
else if ((alpha_c0_high_max_one || alpha_c1_high_no_rta_correct || alpha_c2_high_one) && (blend_flag & BLEND_HW1))
{
// Alpha = As, Ad or Af.
@ -4773,7 +4804,9 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, const boo
if (m_conf.ps.blend_c == 2 && (m_conf.ps.blend_hw == static_cast<u8>(HWBlendType::SRC_ALPHA_DST_FACTOR)
|| m_conf.ps.blend_hw == static_cast<u8>(HWBlendType::SRC_HALF_ONE_DST_FACTOR)
|| m_conf.blend_multi_pass.blend_hw == static_cast<u8>(HWBlendType::SRC_ALPHA_DST_FACTOR)))
|| m_conf.blend_multi_pass.blend_hw == static_cast<u8>(HWBlendType::SRC_ALPHA_DST_FACTOR)
|| m_conf.ps.blend_hw == static_cast<u8>(HWBlendType::SRC_INV_DST_BLEND_HALF)
|| m_conf.ps.blend_hw == static_cast<u8>(HWBlendType::INV_SRC_DST_BLEND_HALF)))
{
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(AFIX) / 128.0f;
}

View File

@ -3,4 +3,4 @@
/// Version number for GS and other shaders. Increment whenever any of the contents of the
/// shaders change, to invalidate the cache.
static constexpr u32 SHADER_CACHE_VERSION = 56;
static constexpr u32 SHADER_CACHE_VERSION = 57;