GS/HW: Extend blend second pass to more blend formulas.

`Cs + Cd*Ad, Cs - Cd*Ad, Cd*(1 - Ad), Cs*(1 + Ad), Cs*(1 - Ad).`
This commit is contained in:
lightningterror 2024-04-19 00:47:45 +02:00
parent 4e06d51a00
commit d824ae6e0c
7 changed files with 77 additions and 20 deletions

View File

@ -972,6 +972,11 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
float color_compensate = 255.0f / max(128.0f, max_color); float color_compensate = 255.0f / max(128.0f, max_color);
Color.rgb *= (float3)color_compensate; Color.rgb *= (float3)color_compensate;
} }
else if (PS_BLEND_HW == 4)
{
// Needed for Cd * (1 - Ad)
Color.rgb = (float3)128.0f;
}
} }
} }

View File

@ -933,6 +933,9 @@ float As = As_rgba.a;
float max_color = max(max(Color.r, Color.g), Color.b); float max_color = max(max(Color.r, Color.g), Color.b);
float color_compensate = 255.0f / max(128.0f, max_color); float color_compensate = 255.0f / max(128.0f, max_color);
Color.rgb *= vec3(color_compensate); Color.rgb *= vec3(color_compensate);
#elif PS_BLEND_HW == 4
// Needed for Cd * (1 - Ad)
Color.rgb = vec3(128.0f);
#endif #endif
#endif #endif

View File

@ -1200,6 +1200,9 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
float max_color = max(max(Color.r, Color.g), Color.b); float max_color = max(max(Color.r, Color.g), Color.b);
float color_compensate = 255.0f / max(128.0f, max_color); float color_compensate = 255.0f / max(128.0f, max_color);
Color.rgb *= vec3(color_compensate); Color.rgb *= vec3(color_compensate);
#elif PS_BLEND_HW == 4
// Needed for Cd * (1 - Ad)
Color.rgb = vec3(128.0f);
#endif #endif
#endif #endif
} }

View File

@ -889,7 +889,7 @@ const std::array<HWBlend, 3*3*3*3> GSDevice::m_blendMap =
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0200: (Cs - 0)*As + Cs ==> Cs*(As + 1) { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0200: (Cs - 0)*As + Cs ==> Cs*(As + 1)
{ BLEND_ACCU , OP_ADD , SRC1_COLOR , CONST_ONE} , // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd { BLEND_ACCU , OP_ADD , SRC1_COLOR , CONST_ONE} , // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd
{ BLEND_NO_REC , OP_ADD , SRC1_COLOR , CONST_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As { BLEND_NO_REC , OP_ADD , SRC1_COLOR , CONST_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As
{ BLEND_A_MAX , OP_ADD , CONST_ONE , CONST_ZERO} , // 0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) { BLEND_A_MAX | BLEND_HW8 , OP_ADD , CONST_ONE , CONST_ZERO} , // 0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1)
{ BLEND_HW3 , OP_ADD , DST_ALPHA , CONST_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd { BLEND_HW3 , OP_ADD , DST_ALPHA , CONST_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd
{ BLEND_HW3 , OP_ADD , DST_ALPHA , CONST_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad { BLEND_HW3 , OP_ADD , DST_ALPHA , CONST_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0220: (Cs - 0)*F + Cs ==> Cs*(F + 1)
@ -916,7 +916,7 @@ const std::array<HWBlend, 3*3*3*3> GSDevice::m_blendMap =
{ BLEND_HW4 , OP_ADD , CONST_ONE , SRC1_COLOR} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As { BLEND_HW4 , OP_ADD , CONST_ONE , SRC1_COLOR} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As
{ BLEND_HW1 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) { BLEND_HW1 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1201: (Cd - 0)*As + Cd ==> Cd*(1 + As)
{ BLEND_HW2 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1202: (Cd - 0)*As + 0 ==> Cd*As { BLEND_HW2 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1202: (Cd - 0)*As + 0 ==> Cd*As
{ 0 , OP_ADD , CONST_ONE , DST_ALPHA} , // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad { BLEND_HW6 , OP_ADD , CONST_ONE , DST_ALPHA} , // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad
{ BLEND_HW1 , OP_ADD , DST_COLOR , DST_ALPHA} , // 1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad) { BLEND_HW1 , OP_ADD , DST_COLOR , DST_ALPHA} , // 1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad)
{ BLEND_HW5 , OP_ADD , CONST_ZERO , DST_ALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad { BLEND_HW5 , OP_ADD , CONST_ZERO , DST_ALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad
{ BLEND_HW4 , OP_ADD , CONST_ONE , CONST_COLOR} , // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F { BLEND_HW4 , OP_ADD , CONST_ONE , CONST_COLOR} , // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F
@ -925,7 +925,7 @@ const std::array<HWBlend, 3*3*3*3> GSDevice::m_blendMap =
{ BLEND_NO_REC , OP_ADD , INV_SRC1_COLOR , CONST_ZERO} , // 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As) { BLEND_NO_REC , OP_ADD , INV_SRC1_COLOR , CONST_ZERO} , // 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As)
{ BLEND_ACCU , OP_REV_SUBTRACT , SRC1_COLOR , CONST_ONE} , // 2001: (0 - Cs)*As + Cd ==> Cd - Cs*As { BLEND_ACCU , OP_REV_SUBTRACT , SRC1_COLOR , CONST_ONE} , // 2001: (0 - Cs)*As + Cd ==> Cd - Cs*As
{ BLEND_NO_REC , OP_REV_SUBTRACT , SRC1_COLOR , CONST_ZERO} , // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As { BLEND_NO_REC , OP_REV_SUBTRACT , SRC1_COLOR , CONST_ZERO} , // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As
{ 0 , OP_ADD , INV_DST_ALPHA , CONST_ZERO} , // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad) { BLEND_HW9 , OP_ADD , INV_DST_ALPHA , CONST_ZERO} , // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad)
{ BLEND_HW3 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ONE} , // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad { BLEND_HW3 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ONE} , // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad
{ 0 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ZERO} , // 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad { 0 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ZERO} , // 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad
{ BLEND_NO_REC , OP_ADD , INV_CONST_COLOR , CONST_ZERO} , // 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F) { BLEND_NO_REC , OP_ADD , INV_CONST_COLOR , CONST_ZERO} , // 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F)
@ -934,8 +934,8 @@ const std::array<HWBlend, 3*3*3*3> GSDevice::m_blendMap =
{ BLEND_HW4 , OP_SUBTRACT , CONST_ONE , SRC1_COLOR} , // 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As { BLEND_HW4 , OP_SUBTRACT , CONST_ONE , SRC1_COLOR} , // 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As
{ 0 , OP_ADD , CONST_ZERO , INV_SRC1_COLOR} , // 2101: (0 - Cd)*As + Cd ==> Cd*(1 - As) { 0 , OP_ADD , CONST_ZERO , INV_SRC1_COLOR} , // 2101: (0 - Cd)*As + Cd ==> Cd*(1 - As)
{ 0 , OP_SUBTRACT , CONST_ZERO , SRC1_COLOR} , // 2102: (0 - Cd)*As + 0 ==> 0 - Cd*As { 0 , OP_SUBTRACT , CONST_ZERO , SRC1_COLOR} , // 2102: (0 - Cd)*As + 0 ==> 0 - Cd*As
{ 0 , OP_SUBTRACT , CONST_ONE , DST_ALPHA} , // 2110: (0 - Cd)*Ad + Cs ==> Cs - Cd*Ad { BLEND_HW6 , OP_SUBTRACT , CONST_ONE , DST_ALPHA} , // 2110: (0 - Cd)*Ad + Cs ==> Cs - Cd*Ad
{ 0 , OP_ADD , CONST_ZERO , INV_DST_ALPHA} , // 2111: (0 - Cd)*Ad + Cd ==> Cd*(1 - Ad) { BLEND_HW7 , OP_ADD , CONST_ZERO , INV_DST_ALPHA} , // 2111: (0 - Cd)*Ad + Cd ==> Cd*(1 - Ad)
{ 0 , OP_SUBTRACT , CONST_ZERO , DST_ALPHA} , // 2112: (0 - Cd)*Ad + 0 ==> 0 - Cd*Ad { 0 , OP_SUBTRACT , CONST_ZERO , DST_ALPHA} , // 2112: (0 - Cd)*Ad + 0 ==> 0 - Cd*Ad
{ BLEND_HW4 , OP_SUBTRACT , CONST_ONE , CONST_COLOR} , // 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F { BLEND_HW4 , OP_SUBTRACT , CONST_ONE , CONST_COLOR} , // 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F
{ 0 , OP_ADD , CONST_ZERO , INV_CONST_COLOR} , // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F) { 0 , OP_ADD , CONST_ZERO , INV_CONST_COLOR} , // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F)

View File

@ -227,12 +227,16 @@ enum HWBlendFlags
BLEND_HW3 = 0x8, // Multiply Cs by (255/128) to compensate for wrong Ad/255 value, should be Ad/128 BLEND_HW3 = 0x8, // Multiply Cs by (255/128) to compensate for wrong Ad/255 value, should be Ad/128
BLEND_HW4 = 0x10, // HW rendering is split in 2 passes BLEND_HW4 = 0x10, // HW rendering is split in 2 passes
BLEND_HW5 = 0x20, // HW rendering is split in 2 passes BLEND_HW5 = 0x20, // HW rendering is split in 2 passes
BLEND_MIX1 = 0x40, // Mix of hw and sw, do Cs*F or Cs*As in shader BLEND_HW6 = 0x40, // HW rendering is split in 2 passes
BLEND_MIX2 = 0x80, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader BLEND_HW7 = 0x80, // HW rendering is split in 2 passes
BLEND_MIX3 = 0x100, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader BLEND_HW8 = 0x100, // HW rendering is split in 2 passes
BLEND_ACCU = 0x200, // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds BLEND_HW9 = 0x200, // HW rendering is split in 2 passes
BLEND_NO_REC = 0x400, // Doesn't require sampling of the RT as a texture BLEND_MIX1 = 0x400, // Mix of hw and sw, do Cs*F or Cs*As in shader
BLEND_A_MAX = 0x800, // Impossible blending uses coeff bigger than 1 BLEND_MIX2 = 0x800, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader
BLEND_MIX3 = 0x1000, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader
BLEND_ACCU = 0x2000, // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds
BLEND_NO_REC = 0x4000, // Doesn't require sampling of the RT as a texture
BLEND_A_MAX = 0x8000, // Impossible blending uses coeff bigger than 1
}; };
// Determines the HW blend function for DX11/OGL // Determines the HW blend function for DX11/OGL
@ -332,7 +336,7 @@ struct alignas(16) GSHWDrawConfig
u32 blend_c : 2; u32 blend_c : 2;
u32 blend_d : 2; u32 blend_d : 2;
u32 fixed_one_a : 1; u32 fixed_one_a : 1;
u32 blend_hw : 2; u32 blend_hw : 3;
u32 a_masked : 1; u32 a_masked : 1;
u32 hdr : 1; u32 hdr : 1;
u32 rta_correction : 1; u32 rta_correction : 1;

View File

@ -4050,7 +4050,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
// HW blend can be done in multiple passes when there's no overlap. // HW blend can be done in multiple passes when there's no overlap.
// Blend second pass is only useful when texture barriers aren't supported. // Blend second pass is only useful when texture barriers aren't supported.
// Speed wise Texture barriers > blend second pass > texture copies. // Speed wise Texture barriers > blend second pass > texture copies.
// TODO: 24bit and 32bit formats on clamp 1 can always prefer blend second pass depending on the blend equations.
const bool blend_second_pass_support = !features.texture_barrier && no_prim_overlap && is_basic_blend; const bool blend_second_pass_support = !features.texture_barrier && no_prim_overlap && is_basic_blend;
const bool bmix1_second_pass = blend_second_pass_support && blend_mix1 && (alpha_c0_high_max_one || alpha_c2_high_one) && m_conf.ps.blend_d == 2; const bool bmix1_second_pass = blend_second_pass_support && blend_mix1 && (alpha_c0_high_max_one || alpha_c2_high_one) && m_conf.ps.blend_d == 2;
// We don't want to enable blend mix if we are doing a second pass, it's useless. // We don't want to enable blend mix if we are doing a second pass, it's useless.
@ -4464,9 +4463,52 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
m_conf.blend_second_pass.blend_hw = 1; m_conf.blend_second_pass.blend_hw = 1;
m_conf.blend_second_pass.blend = {true, GSDevice::DST_COLOR, GSDevice::CONST_ONE, GSDevice::OP_ADD, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0}; m_conf.blend_second_pass.blend = {true, GSDevice::DST_COLOR, GSDevice::CONST_ONE, GSDevice::OP_ADD, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0};
} }
else if (alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW6))
if (m_conf.ps.blend_c == 2 && m_conf.blend_second_pass.enable) {
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(AFIX) / 128.0f; // Alpha = Ad.
// Cs + Cd*Alpha, Cs - Cd*Alpha.
// Render pass 1: Multiply Cs by 0.5, then do Cs + Cd*Alpha or Cs - Cd*Alpha.
m_conf.ps.blend_c = 2;
AFIX = 64;
blend.src = GSDevice::CONST_COLOR;
// Render pass 2: Take result (Cd) from render pass 1 and double it.
m_conf.blend_second_pass.enable = true;
m_conf.blend_second_pass.blend_hw = 1;
m_conf.blend_second_pass.blend = {true, GSDevice::DST_COLOR, GSDevice::CONST_ONE, GSDevice::OP_ADD, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0};
}
else if (alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW7))
{
// Alpha = Ad.
// Cd*(1 - Alpha).
// Render pass 1: Multiply Cd by 0.5, then do Cd - Cd*Alpha.
m_conf.ps.blend_hw = 4;
blend.src = GSDevice::DST_COLOR;
blend.dst = GSDevice::DST_ALPHA;
blend.op = GSDevice::OP_SUBTRACT;
// Render pass 2: Take result (Cd) from render pass 1 and double it.
m_conf.blend_second_pass.enable = true;
m_conf.blend_second_pass.blend_hw = 1;
m_conf.blend_second_pass.blend = {true, GSDevice::DST_COLOR, GSDevice::CONST_ONE, GSDevice::OP_ADD, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0};
}
else if (blend_flag & BLEND_HW8)
{
// Alpha = Ad.
// Cs*(1 + Alpha).
// Render pass 1: Do Cs.
// Render pass 2: Try to double Cs, then take result (Cd) from render pass 1 and add Cs*Alpha to it.
m_conf.blend_second_pass.enable = true;
m_conf.blend_second_pass.blend_hw = 3;
m_conf.blend_second_pass.blend = {true, GSDevice::DST_ALPHA, GSDevice::CONST_ONE, blend_second_pass.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0};
}
else if (alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW9))
{
// Alpha = Ad.
// Cs*(1 - Alpha).
// Render pass 1: Do Cs*(1 - Alpha).
// Render pass 2: Take result (Cd) from render pass 1 and subtract Cs*Alpha from it.
m_conf.blend_second_pass.enable = true;
m_conf.blend_second_pass.blend = {true, GSDevice::DST_ALPHA, GSDevice::CONST_ONE, GSDevice::OP_REV_SUBTRACT, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0};
}
} }
if (blend_flag & BLEND_HW1) if (blend_flag & BLEND_HW1)
@ -4475,9 +4517,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
} }
else if (blend_flag & BLEND_HW2) else if (blend_flag & BLEND_HW2)
{ {
if (m_conf.ps.blend_c == 2)
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(AFIX) / 128.0f;
m_conf.ps.blend_hw = 2; m_conf.ps.blend_hw = 2;
} }
else if (!m_conf.blend_second_pass.enable && alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW3)) else if (!m_conf.blend_second_pass.enable && alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW3))
@ -4485,6 +4524,9 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
m_conf.ps.blend_hw = 3; m_conf.ps.blend_hw = 3;
} }
if (m_conf.ps.blend_c == 2 && (m_conf.ps.blend_hw == 2 || m_conf.blend_second_pass.blend_hw == 2))
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(AFIX) / 128.0f;
const GSDevice::BlendFactor src_factor_alpha = m_conf.blend_second_pass.enable ? GSDevice::CONST_ZERO : GSDevice::CONST_ONE; const GSDevice::BlendFactor src_factor_alpha = m_conf.blend_second_pass.enable ? GSDevice::CONST_ZERO : GSDevice::CONST_ONE;
const GSDevice::BlendFactor dst_factor_alpha = m_conf.blend_second_pass.enable ? GSDevice::CONST_ONE : GSDevice::CONST_ZERO; const GSDevice::BlendFactor dst_factor_alpha = m_conf.blend_second_pass.enable ? GSDevice::CONST_ONE : GSDevice::CONST_ZERO;
m_conf.blend = {true, blend.src, blend.dst, blend.op, src_factor_alpha, dst_factor_alpha, m_conf.ps.blend_c == 2, AFIX}; m_conf.blend = {true, blend.src, blend.dst, blend.op, src_factor_alpha, dst_factor_alpha, m_conf.ps.blend_c == 2, AFIX};

View File

@ -3,4 +3,4 @@
/// Version number for GS and other shaders. Increment whenever any of the contents of the /// Version number for GS and other shaders. Increment whenever any of the contents of the
/// shaders change, to invalidate the cache. /// shaders change, to invalidate the cache.
static constexpr u32 SHADER_CACHE_VERSION = 46; static constexpr u32 SHADER_CACHE_VERSION = 47;