GS/HW: Add support for blend second pass.

Allows us to blend Cd with full alpha range of 0-2 bypassing hw blend limitations.
Not all Cd cases are covered, but it's a good start.

Also allows us to do Ad cases where we can double the blend to get the
proper blend result since Ad range is 0-1 instead of 0-2.
This commit is contained in:
lightningterror 2024-03-28 13:40:24 +01:00
parent dcdb39026c
commit b1f4f67130
8 changed files with 194 additions and 60 deletions

View File

@ -943,7 +943,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
float Alpha = PS_BLEND_C == 2 ? Af : As;
Color.rgb = max((float3)0.0f, (Alpha - (float3)1.0f));
Color.rgb = saturate((float3)Alpha - (float3)1.0f);
Color.rgb *= (float3)255.0f;
}
else if (PS_BLEND_HW == 3 && PS_RTA_CORRECTION == 0)

View File

@ -882,7 +882,7 @@ const std::array<HWBlend, 3*3*3*3> GSDevice::m_blendMap =
{ BLEND_MIX1 , OP_SUBTRACT , SRC1_COLOR , SRC1_COLOR} , // 0102: (Cs - Cd)*As + 0 ==> Cs*As - Cd*As
{ BLEND_A_MAX , OP_SUBTRACT , CONST_ONE , DST_ALPHA} , // 0110: (Cs - Cd)*Ad + Cs ==> Cs*(Ad + 1) - Cd*Ad
{ 0 , OP_ADD , DST_ALPHA , INV_DST_ALPHA} , // 0111: (Cs - Cd)*Ad + Cd ==> Cs*Ad + Cd*(1 - Ad)
{ 0 , OP_SUBTRACT , DST_ALPHA , DST_ALPHA} , // 0112: (Cs - Cd)*Ad + 0 ==> Cs*Ad - Cd*Ad
{ BLEND_HW5 , OP_SUBTRACT , DST_ALPHA , DST_ALPHA} , // 0112: (Cs - Cd)*Ad + 0 ==> Cs*Ad - Cd*Ad
{ BLEND_A_MAX | BLEND_MIX2 , OP_SUBTRACT , CONST_ONE , CONST_COLOR} , // 0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F
{ BLEND_MIX1 , OP_ADD , CONST_COLOR , INV_CONST_COLOR} , // 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F)
{ BLEND_MIX1 , OP_SUBTRACT , CONST_COLOR , CONST_COLOR} , // 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F
@ -890,8 +890,8 @@ const std::array<HWBlend, 3*3*3*3> GSDevice::m_blendMap =
{ BLEND_ACCU , OP_ADD , SRC1_COLOR , CONST_ONE} , // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd
{ BLEND_NO_REC , OP_ADD , SRC1_COLOR , CONST_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As
{ BLEND_A_MAX , OP_ADD , CONST_ONE , CONST_ZERO} , // 0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1)
{ BLEND_HW_CLR3 , OP_ADD , DST_ALPHA , CONST_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd
{ BLEND_HW_CLR3 , OP_ADD , DST_ALPHA , CONST_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad
{ BLEND_HW3 , OP_ADD , DST_ALPHA , CONST_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd
{ BLEND_HW3 , OP_ADD , DST_ALPHA , CONST_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0220: (Cs - 0)*F + Cs ==> Cs*(F + 1)
{ BLEND_ACCU , OP_ADD , CONST_COLOR , CONST_ONE} , // 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd
{ BLEND_NO_REC , OP_ADD , CONST_COLOR , CONST_ZERO} , // 0222: (Cs - 0)*F + 0 ==> Cs*F
@ -900,7 +900,7 @@ const std::array<HWBlend, 3*3*3*3> GSDevice::m_blendMap =
{ BLEND_MIX1 , OP_REV_SUBTRACT , SRC1_COLOR , SRC1_COLOR} , // 1002: (Cd - Cs)*As + 0 ==> Cd*As - Cs*As
{ 0 , OP_ADD , INV_DST_ALPHA , DST_ALPHA} , // 1010: (Cd - Cs)*Ad + Cs ==> Cd*Ad + Cs*(1 - Ad)
{ BLEND_A_MAX , OP_REV_SUBTRACT , DST_ALPHA , CONST_ONE} , // 1011: (Cd - Cs)*Ad + Cd ==> Cd*(Ad + 1) - Cs*Ad
{ 0 , OP_REV_SUBTRACT , DST_ALPHA , DST_ALPHA} , // 1012: (Cd - Cs)*Ad + 0 ==> Cd*Ad - Cs*Ad
{ BLEND_HW5 , OP_REV_SUBTRACT , DST_ALPHA , DST_ALPHA} , // 1012: (Cd - Cs)*Ad + 0 ==> Cd*Ad - Cs*Ad
{ BLEND_MIX3 , OP_ADD , INV_CONST_COLOR , CONST_COLOR} , // 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F)
{ BLEND_A_MAX | BLEND_MIX1 , OP_REV_SUBTRACT , CONST_COLOR , CONST_ONE} , // 1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F
{ BLEND_MIX1 , OP_REV_SUBTRACT , CONST_COLOR , CONST_COLOR} , // 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F
@ -913,31 +913,31 @@ const std::array<HWBlend, 3*3*3*3> GSDevice::m_blendMap =
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 1120: (Cd - Cd)*F + Cs ==> Cs
{ BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 1121: (Cd - Cd)*F + Cd ==> Cd
{ BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1122: (Cd - Cd)*F + 0 ==> 0
{ 0 , OP_ADD , CONST_ONE , SRC1_COLOR} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As
{ BLEND_HW_CLR1 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1201: (Cd - 0)*As + Cd ==> Cd*(1 + As)
{ BLEND_HW_CLR2 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1202: (Cd - 0)*As + 0 ==> Cd*As
{ BLEND_HW4 , OP_ADD , CONST_ONE , SRC1_COLOR} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As
{ BLEND_HW1 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1201: (Cd - 0)*As + Cd ==> Cd*(1 + As)
{ BLEND_HW2 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1202: (Cd - 0)*As + 0 ==> Cd*As
{ 0 , OP_ADD , CONST_ONE , DST_ALPHA} , // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad
{ BLEND_HW_CLR1 , OP_ADD , DST_COLOR , DST_ALPHA} , // 1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad)
{ 0 , OP_ADD , CONST_ZERO , DST_ALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad
{ 0 , OP_ADD , CONST_ONE , CONST_COLOR} , // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F
{ BLEND_HW_CLR1 , OP_ADD , DST_COLOR , CONST_COLOR} , // 1221: (Cd - 0)*F + Cd ==> Cd*(1 + F)
{ BLEND_HW_CLR2 , OP_ADD , DST_COLOR , CONST_COLOR} , // 1222: (Cd - 0)*F + 0 ==> Cd*F
{ BLEND_HW1 , OP_ADD , DST_COLOR , DST_ALPHA} , // 1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad)
{ BLEND_HW5 , OP_ADD , CONST_ZERO , DST_ALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad
{ BLEND_HW4 , OP_ADD , CONST_ONE , CONST_COLOR} , // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F
{ BLEND_HW1 , OP_ADD , DST_COLOR , CONST_COLOR} , // 1221: (Cd - 0)*F + Cd ==> Cd*(1 + F)
{ BLEND_HW2 , OP_ADD , DST_COLOR , CONST_COLOR} , // 1222: (Cd - 0)*F + 0 ==> Cd*F
{ BLEND_NO_REC , OP_ADD , INV_SRC1_COLOR , CONST_ZERO} , // 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As)
{ BLEND_ACCU , OP_REV_SUBTRACT , SRC1_COLOR , CONST_ONE} , // 2001: (0 - Cs)*As + Cd ==> Cd - Cs*As
{ BLEND_NO_REC , OP_REV_SUBTRACT , SRC1_COLOR , CONST_ZERO} , // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As
{ 0 , OP_ADD , INV_DST_ALPHA , CONST_ZERO} , // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad)
{ BLEND_HW_CLR3 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ONE} , // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad
{ BLEND_HW3 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ONE} , // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad
{ 0 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ZERO} , // 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad
{ BLEND_NO_REC , OP_ADD , INV_CONST_COLOR , CONST_ZERO} , // 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F)
{ BLEND_ACCU , OP_REV_SUBTRACT , CONST_COLOR , CONST_ONE} , // 2021: (0 - Cs)*F + Cd ==> Cd - Cs*F
{ BLEND_NO_REC , OP_REV_SUBTRACT , CONST_COLOR , CONST_ZERO} , // 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F
{ 0 , OP_SUBTRACT , CONST_ONE , SRC1_COLOR} , // 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As
{ BLEND_HW4 , OP_SUBTRACT , CONST_ONE , SRC1_COLOR} , // 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As
{ 0 , OP_ADD , CONST_ZERO , INV_SRC1_COLOR} , // 2101: (0 - Cd)*As + Cd ==> Cd*(1 - As)
{ 0 , OP_SUBTRACT , CONST_ZERO , SRC1_COLOR} , // 2102: (0 - Cd)*As + 0 ==> 0 - Cd*As
{ 0 , OP_SUBTRACT , CONST_ONE , DST_ALPHA} , // 2110: (0 - Cd)*Ad + Cs ==> Cs - Cd*Ad
{ 0 , OP_ADD , CONST_ZERO , INV_DST_ALPHA} , // 2111: (0 - Cd)*Ad + Cd ==> Cd*(1 - Ad)
{ 0 , OP_SUBTRACT , CONST_ZERO , DST_ALPHA} , // 2112: (0 - Cd)*Ad + 0 ==> 0 - Cd*Ad
{ 0 , OP_SUBTRACT , CONST_ONE , CONST_COLOR} , // 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F
{ BLEND_HW4 , OP_SUBTRACT , CONST_ONE , CONST_COLOR} , // 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F
{ 0 , OP_ADD , CONST_ZERO , INV_CONST_COLOR} , // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F)
{ 0 , OP_SUBTRACT , CONST_ZERO , CONST_COLOR} , // 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 2200: (0 - 0)*As + Cs ==> Cs

View File

@ -221,16 +221,18 @@ static_assert(sizeof(InterlaceConstantBuffer) == 16, "InterlaceConstantBuffer is
enum HWBlendFlags
{
// Flags to determine blending behavior
BLEND_CD = 0x1, // Output is Cd, hw blend can handle it
BLEND_HW_CLR1 = 0x2, // Clear color blending (use directly the destination color as blending factor)
BLEND_HW_CLR2 = 0x4, // Clear color blending (use directly the destination color as blending factor)
BLEND_HW_CLR3 = 0x8, // Multiply Cs by (255/128) to compensate for wrong Ad/255 value, should be Ad/128
BLEND_MIX1 = 0x10, // Mix of hw and sw, do Cs*F or Cs*As in shader
BLEND_MIX2 = 0x20, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader
BLEND_MIX3 = 0x40, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader
BLEND_ACCU = 0x80, // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds
BLEND_NO_REC = 0x100, // Doesn't require sampling of the RT as a texture
BLEND_A_MAX = 0x200, // Impossible blending uses coeff bigger than 1
BLEND_CD = 0x1, // Output is Cd, hw blend can handle it
BLEND_HW1 = 0x2, // Clear color blending (use directly the destination color as blending factor)
BLEND_HW2 = 0x4, // Clear color blending (use directly the destination color as blending factor)
BLEND_HW3 = 0x8, // Multiply Cs by (255/128) to compensate for wrong Ad/255 value, should be Ad/128
BLEND_HW4 = 0x10, // HW rendering is split in 2 passes
BLEND_HW5 = 0x20, // HW rendering is split in 2 passes
BLEND_MIX1 = 0x40, // Mix of hw and sw, do Cs*F or Cs*As in shader
BLEND_MIX2 = 0x80, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader
BLEND_MIX3 = 0x100, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader
BLEND_ACCU = 0x200, // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds
BLEND_NO_REC = 0x400, // Doesn't require sampling of the RT as a texture
BLEND_A_MAX = 0x800, // Impossible blending uses coeff bigger than 1
};
// Determines the HW blend function for DX11/OGL
@ -691,6 +693,16 @@ struct alignas(16) GSHWDrawConfig
AlphaPass alpha_second_pass;
struct BlendPass
{
BlendState blend;
u8 blend_hw;
bool enable;
};
static_assert(sizeof(BlendPass) == 8, "blend pass is 8 bytes");
BlendPass blend_second_pass;
VSConstantBuffer cb_vs;
PSConstantBuffer cb_ps;
};

View File

@ -2624,6 +2624,14 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor);
DrawIndexedPrimitive();
if (config.blend_second_pass.enable)
{
config.ps.blend_hw = config.blend_second_pass.blend_hw;
SetupPS(config.ps, &config.cb_ps, config.sampler);
SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend_second_pass.blend), config.blend_second_pass.blend.constant);
DrawIndexedPrimitive();
}
if (config.alpha_second_pass.enable)
{
preprocessSel(config.alpha_second_pass.ps);

View File

@ -3942,6 +3942,18 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
if (BindDrawPipeline(pipe))
DrawIndexedPrimitive();
// blend second pass
if (config.blend_second_pass.enable)
{
if (config.blend_second_pass.blend.constant_enable)
SetBlendConstants(config.blend_second_pass.blend.constant);
pipe.bs = config.blend_second_pass.blend;
pipe.ps.blend_hw = config.blend_second_pass.blend_hw;
if (BindDrawPipeline(pipe))
DrawIndexedPrimitive();
}
// and the alpha pass
if (config.alpha_second_pass.enable)
{

View File

@ -3977,7 +3977,8 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
const bool blend_ad = m_conf.ps.blend_c == 1;
const bool alpha_mask = (m_cached_ctx.FRAME.FBMSK & 0xFF000000) == 0xFF000000;
bool blend_ad_alpha_masked = blend_ad && alpha_mask;
if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic) || (COLCLAMP.CLAMP == 0)) && features.texture_barrier && blend_ad_alpha_masked)
const bool is_basic_blend = GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic;
if ((is_basic_blend || (COLCLAMP.CLAMP == 0)) && features.texture_barrier && blend_ad_alpha_masked)
{
// Swap Ad with As for hw blend.
m_conf.ps.a_masked = 1;
@ -3996,6 +3997,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
u8 blend_index = static_cast<u8>(((m_conf.ps.blend_a * 3 + m_conf.ps.blend_b) * 3 + m_conf.ps.blend_c) * 3 + m_conf.ps.blend_d);
const HWBlend blend_preliminary = GSDevice::GetBlend(blend_index);
HWBlend blend = GSDevice::GetBlend(blend_index);
const int blend_flag = blend_preliminary.flags;
// Re set alpha, it was modified, must be done after index calculation
@ -4022,9 +4024,19 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
const bool blend_mix3 = !!(blend_flag & BLEND_MIX3);
bool blend_mix = (blend_mix1 || blend_mix2 || blend_mix3) && COLCLAMP.CLAMP;
const bool one_barrier = m_conf.require_one_barrier || blend_ad_alpha_masked;
// Primitives don't overlap.
const bool no_prim_overlap = (m_prim_overlap == PRIM_OVERLAP_NO);
// HW blend can be done in multiple passes when there's no overlap.
// Blend second pass is only useful when texture barriers aren't supported.
// Speed wise Texture barriers > blend second pass > texture copies.
// TODO: 24bit and 32bit formats on clamp 1 can always prefer blend second pass depending on the blend equations.
const bool blend_second_pass_support = !features.texture_barrier && no_prim_overlap && is_basic_blend;
const bool bmix1_second_pass = blend_second_pass_support && blend_mix1 && (alpha_c0_high_max_one || alpha_c2_high_one) && m_conf.ps.blend_d == 2;
// We don't want to enable blend mix if we are doing a second pass, it's useless.
blend_mix &= !bmix1_second_pass;
const bool one_barrier = m_conf.require_one_barrier || blend_ad_alpha_masked;
// Condition 1: Require full sw blend for full barrier.
// Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead.
// Condition 3: A shuffle is unlikely to overlap, so when a barrier is enabled like from fbmask we can prefer full sw blend.
@ -4033,9 +4045,9 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
|| accumulation_blend; // Mix of hw/sw blending
// Blend can be done on hw. As and F cases should be accurate.
// BLEND_HW_CLR1 with Ad, BLEND_HW_CLR3 might require sw blend.
// BLEND_HW_CLR1 with As/F and BLEND_HW_CLR2 can be done in hw.
bool clr_blend1_2 = (blend_flag & (BLEND_HW_CLR1 | BLEND_HW_CLR2)) && (m_conf.ps.blend_c != 1) // As or Af cases only.
// BLEND_HW1 with Ad, BLEND_HW3 might require sw blend.
// BLEND_HW1 with As/F and BLEND_HW2 can be done in hw.
bool clr_blend1_2 = (blend_flag & (BLEND_HW1 | BLEND_HW2)) && (m_conf.ps.blend_c != 1) // As or Af cases only.
&& !(m_draw_env->PABE.PABE && GetAlphaMinMax().min < 128) // No PABE as it will require sw blending.
&& (COLCLAMP.CLAMP) // Let's add a colclamp check too, hw blend will clamp to 0-1.
&& !prefer_sw_blend; // Don't run if sw blend is preferred.
@ -4249,7 +4261,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
if (m_conf.ps.blend_c == 2)
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(AFIX) / 128.0f;
const HWBlend blend = GSDevice::GetBlend(blend_index);
if (accumulation_blend)
{
// Keep HW blending to do the addition/subtraction
@ -4304,29 +4315,21 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
m_conf.ps.dither_adjust = can_dither;
}
// For mixed blend, the source blend is done in the shader (so we use CONST_ONE as a factor).
m_conf.blend = {true, GSDevice::CONST_ONE, blend.dst, blend.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO,m_conf.ps.blend_c == 2, AFIX};
m_conf.ps.blend_mix = (blend.op == GSDevice::OP_REV_SUBTRACT) ? 2 : 1;
// Elide DSB colour output if not used by dest.
m_conf.ps.no_color1 |= !GSDevice::IsDualSourceBlendFactor(blend.dst);
if (blend_mix1)
{
if (m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_c0_high_min_one || alpha_c1_high_min_one || alpha_c2_high_one))
{
// Alpha is guaranteed to be > 128.
// Replace Cs*Alpha + Cd*(1 - Alpha) with Cs*Alpha - Cd*(Alpha - 1).
// Alpha - 1 subtraction is only done for the dual source output (hw blending part) since we are changing the equation.
// Af will be replaced with As in shader and send it to dual source output.
m_conf.blend = {true, GSDevice::CONST_ONE, GSDevice::SRC1_COLOR, GSDevice::OP_SUBTRACT, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0};
// blend hw 1 will disable alpha clamp, we can reuse the old bits.
blend.dst = GSDevice::SRC1_COLOR;
blend.op = GSDevice::OP_SUBTRACT;
m_conf.ps.blend_hw = 1;
// DSB output will always be used.
m_conf.ps.no_color1 = false;
}
else if (m_conf.ps.blend_a == m_conf.ps.blend_d)
{
// Compensate slightly for Cd*(Alpha + 1) - Cs*Alpha.
// Cd*(Alpha + 1) - Cs*Alpha will always be wrong.
// Let's cheat a little and divide blended Cs by Alpha.
// Result will still be wrong but closer to what we want.
m_conf.ps.blend_hw = 2;
}
@ -4336,11 +4339,10 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
}
else if (blend_mix2)
{
// Allow to compensate when Cs*(Alpha + 1) overflows, to compensate we change
// the alpha output value for Cd*Alpha.
m_conf.blend = {true, GSDevice::CONST_ONE, GSDevice::SRC1_COLOR, blend.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO,false, 0};
// Allow to compensate when Cs*(Alpha + 1) overflows,
// to compensate we change the alpha output value for Cd*Alpha.
blend.dst = GSDevice::SRC1_COLOR;
m_conf.ps.blend_hw = 3;
m_conf.ps.no_color1 = false;
m_conf.ps.blend_a = 0;
m_conf.ps.blend_b = 2;
@ -4352,6 +4354,13 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
m_conf.ps.blend_b = 0;
m_conf.ps.blend_d = 0;
}
// Elide DSB colour output if not used by dest.
m_conf.ps.no_color1 = !GSDevice::IsDualSourceBlendFactor(blend.dst);
// For mixed blend, the source blend is done in the shader (so we use CONST_ONE as a factor).
m_conf.blend = {true, GSDevice::CONST_ONE, blend.dst, blend.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, m_conf.ps.blend_c == 2, AFIX};
m_conf.ps.blend_mix = (blend.op == GSDevice::OP_REV_SUBTRACT) ? 2 : 1;
}
else
{
@ -4394,29 +4403,84 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
m_conf.ps.rta_correction = rt->m_rt_alpha_scale;
}
// Care for hw blend value, 6 is for hw/sw, sw blending used.
if (blend_flag & BLEND_HW_CLR1)
if (blend_second_pass_support)
{
const HWBlend blend_second_pass = GSDevice::GetBlend(blend_index);
if (bmix1_second_pass)
{
// Alpha = As or Af.
// Cs*Alpha - Cd*Alpha, Cd*Alpha - Cs*Alpha.
// Render pass 1: Do (Cd - Cs) or (Cs - Cd) on first pass.
blend.src = GSDevice::CONST_ONE;
blend.dst = GSDevice::CONST_ONE;
// Render pass 2: Blend the result (Cd) from render pass 1 with alpha range of 0-2.
m_conf.blend_second_pass.enable = true;
m_conf.blend_second_pass.blend_hw = 2;
m_conf.blend_second_pass.blend = {true, GSDevice::DST_COLOR, (m_conf.ps.blend_c == 2) ? GSDevice::CONST_COLOR : GSDevice::SRC1_COLOR, GSDevice::OP_ADD, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, m_conf.ps.blend_c == 2, AFIX};
}
else if (alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW3))
{
// Alpha = Ad.
// Cs*Alpha, Cs*Alpha + Cd, Cd - Cs*Alpha.
// Render pass 1: Do Cs*Alpha, Cs*Alpha + Cd or Cd - Cs*Alpha on first pass.
// Render pass 2: Take result (Cd) from render pass 1 and either add or rev subtract Cs*Alpha based on the blend operation.
m_conf.blend_second_pass.enable = true;
m_conf.blend_second_pass.blend = {true, blend_second_pass.src, GSDevice::CONST_ONE, blend_second_pass.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0};
}
else if ((alpha_c0_high_max_one || alpha_c2_high_one) && (blend_flag & BLEND_HW4))
{
// Alpha = As or Af.
// Cs + Cd*Alpha, Cs - Cd*Alpha.
// Render pass 1: Calculate Cd*Alpha with an alpha range of 0-2.
m_conf.ps.blend_hw = 2;
blend.src = GSDevice::DST_COLOR;
blend.dst = (m_conf.ps.blend_c == 2) ? GSDevice::CONST_COLOR : GSDevice::SRC1_COLOR;
blend.op = GSDevice::OP_ADD;
// Render pass 2: Add or subtract result of render pass 1(Cd) from Cs.
m_conf.blend_second_pass.enable = true;
m_conf.blend_second_pass.blend_hw = 0;
m_conf.blend_second_pass.blend = {true, blend_second_pass.src, GSDevice::CONST_ONE, blend_second_pass.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0};
}
else if (alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW5))
{
// Alpha = Ad.
// Cd*Alpha - Cs*Alpha, Cs*Alpha - Cd*Alpha.
// Render pass 1: Do (Cd - Cs)*Alpha, (Cs - Cd)*Alpha or Cd*Alpha on first pass.
// Render pass 2: Take result (Cd) from render pass 1 and double it.
m_conf.blend_second_pass.enable = true;
m_conf.blend_second_pass.blend_hw = 1;
m_conf.blend_second_pass.blend = {true, GSDevice::DST_COLOR, GSDevice::CONST_ONE, GSDevice::OP_ADD, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0};
}
if (m_conf.ps.blend_c == 2 && m_conf.blend_second_pass.enable)
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(AFIX) / 128.0f;
}
if (blend_flag & BLEND_HW1)
{
m_conf.ps.blend_hw = 1;
}
else if (blend_flag & BLEND_HW_CLR2)
else if (blend_flag & BLEND_HW2)
{
if (m_conf.ps.blend_c == 2)
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(AFIX) / 128.0f;
m_conf.ps.blend_hw = 2;
}
else if (!rta_correction && (blend_flag & BLEND_HW_CLR3))
else if (!m_conf.blend_second_pass.enable && alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW3))
{
m_conf.ps.blend_hw = 3;
}
const HWBlend blend = GSDevice::GetBlend(blend_index);
m_conf.blend = {true, blend.src, blend.dst, blend.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO,m_conf.ps.blend_c == 2, AFIX};
const GSDevice::BlendFactor src_factor_alpha = m_conf.blend_second_pass.enable ? GSDevice::CONST_ZERO : GSDevice::CONST_ONE;
const GSDevice::BlendFactor dst_factor_alpha = m_conf.blend_second_pass.enable ? GSDevice::CONST_ONE : GSDevice::CONST_ZERO;
m_conf.blend = {true, blend.src, blend.dst, blend.op, src_factor_alpha, dst_factor_alpha, m_conf.ps.blend_c == 2, AFIX};
// Remove second color output when unused. Works around bugs in some drivers (e.g. Intel).
m_conf.ps.no_color1 |= !GSDevice::IsDualSourceBlendFactor(m_conf.blend.src_factor) &&
!GSDevice::IsDualSourceBlendFactor(m_conf.blend.dst_factor);
m_conf.ps.no_color1 = !GSDevice::IsDualSourceBlendFactor(m_conf.blend.src_factor) &&
!GSDevice::IsDualSourceBlendFactor(m_conf.blend.dst_factor) &&
!GSDevice::IsDualSourceBlendFactor(m_conf.blend_second_pass.blend.src_factor) &&
!GSDevice::IsDualSourceBlendFactor(m_conf.blend_second_pass.blend.dst_factor);
}
// Notify the shader that it needs to invert rounding
@ -5584,8 +5648,16 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
}
else
{
m_conf.blend.src_factor_alpha = GSDevice::SRC1_ALPHA;
m_conf.blend.dst_factor_alpha = GSDevice::INV_SRC1_ALPHA;
if (m_conf.blend_second_pass.enable)
{
m_conf.blend_second_pass.blend.src_factor_alpha = GSDevice::SRC1_ALPHA;
m_conf.blend_second_pass.blend.dst_factor_alpha = GSDevice::INV_SRC1_ALPHA;
}
else
{
m_conf.blend.src_factor_alpha = GSDevice::SRC1_ALPHA;
m_conf.blend.dst_factor_alpha = GSDevice::INV_SRC1_ALPHA;
}
}
// If Z writes are on, unfortunately we can't single pass it.

View File

@ -2573,6 +2573,24 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
SendHWDraw(config, psel.ps.IsFeedbackLoop());
if (config.blend_second_pass.enable)
{
if (config.blend.IsEffective(config.colormask))
{
OMSetBlendState(config.blend_second_pass.blend.enable, s_gl_blend_factors[config.blend_second_pass.blend.src_factor],
s_gl_blend_factors[config.blend_second_pass.blend.dst_factor], s_gl_blend_ops[config.blend_second_pass.blend.op],
s_gl_blend_factors[config.blend_second_pass.blend.src_factor_alpha], s_gl_blend_factors[config.blend_second_pass.blend.dst_factor_alpha],
config.blend_second_pass.blend.constant_enable, config.blend_second_pass.blend.constant);
}
else
{
OMSetBlendState();
}
psel.ps.blend_hw = config.blend_second_pass.blend_hw;
SetupPipeline(psel);
SendHWDraw(config, psel.ps.IsFeedbackLoop());
}
if (config.alpha_second_pass.enable)
{
// cbuffer will definitely be dirty if aref changes, no need to check it

View File

@ -5852,6 +5852,18 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
if (BindDrawPipeline(pipe))
SendHWDraw(config, draw_rt, skip_first_barrier);
// blend second pass
if (config.blend_second_pass.enable)
{
if (config.blend_second_pass.blend.constant_enable)
SetBlendConstants(config.blend_second_pass.blend.constant);
pipe.bs = config.blend_second_pass.blend;
pipe.ps.blend_hw = config.blend_second_pass.blend_hw;
if (BindDrawPipeline(pipe))
DrawIndexedPrimitive();
}
// and the alpha pass
if (config.alpha_second_pass.enable)
{