diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 3491ec402f..18646bd9fd 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -943,7 +943,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) float Alpha = PS_BLEND_C == 2 ? Af : As; - Color.rgb = max((float3)0.0f, (Alpha - (float3)1.0f)); + Color.rgb = saturate((float3)Alpha - (float3)1.0f); Color.rgb *= (float3)255.0f; } else if (PS_BLEND_HW == 3 && PS_RTA_CORRECTION == 0) diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index 713f47e1be..14cff795c2 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -882,7 +882,7 @@ const std::array GSDevice::m_blendMap = { BLEND_MIX1 , OP_SUBTRACT , SRC1_COLOR , SRC1_COLOR} , // 0102: (Cs - Cd)*As + 0 ==> Cs*As - Cd*As { BLEND_A_MAX , OP_SUBTRACT , CONST_ONE , DST_ALPHA} , // 0110: (Cs - Cd)*Ad + Cs ==> Cs*(Ad + 1) - Cd*Ad { 0 , OP_ADD , DST_ALPHA , INV_DST_ALPHA} , // 0111: (Cs - Cd)*Ad + Cd ==> Cs*Ad + Cd*(1 - Ad) - { 0 , OP_SUBTRACT , DST_ALPHA , DST_ALPHA} , // 0112: (Cs - Cd)*Ad + 0 ==> Cs*Ad - Cd*Ad + { BLEND_HW5 , OP_SUBTRACT , DST_ALPHA , DST_ALPHA} , // 0112: (Cs - Cd)*Ad + 0 ==> Cs*Ad - Cd*Ad { BLEND_A_MAX | BLEND_MIX2 , OP_SUBTRACT , CONST_ONE , CONST_COLOR} , // 0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F { BLEND_MIX1 , OP_ADD , CONST_COLOR , INV_CONST_COLOR} , // 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F) { BLEND_MIX1 , OP_SUBTRACT , CONST_COLOR , CONST_COLOR} , // 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F @@ -890,8 +890,8 @@ const std::array GSDevice::m_blendMap = { BLEND_ACCU , OP_ADD , SRC1_COLOR , CONST_ONE} , // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd { BLEND_NO_REC , OP_ADD , SRC1_COLOR , CONST_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As { BLEND_A_MAX , OP_ADD , CONST_ONE , CONST_ZERO} , // 0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) - { BLEND_HW_CLR3 , OP_ADD , DST_ALPHA , CONST_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd - { BLEND_HW_CLR3 , OP_ADD , DST_ALPHA , CONST_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad + { BLEND_HW3 , OP_ADD , DST_ALPHA , CONST_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd + { BLEND_HW3 , OP_ADD , DST_ALPHA , CONST_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) { BLEND_ACCU , OP_ADD , CONST_COLOR , CONST_ONE} , // 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd { BLEND_NO_REC , OP_ADD , CONST_COLOR , CONST_ZERO} , // 0222: (Cs - 0)*F + 0 ==> Cs*F @@ -900,7 +900,7 @@ const std::array GSDevice::m_blendMap = { BLEND_MIX1 , OP_REV_SUBTRACT , SRC1_COLOR , SRC1_COLOR} , // 1002: (Cd - Cs)*As + 0 ==> Cd*As - Cs*As { 0 , OP_ADD , INV_DST_ALPHA , DST_ALPHA} , // 1010: (Cd - Cs)*Ad + Cs ==> Cd*Ad + Cs*(1 - Ad) { BLEND_A_MAX , OP_REV_SUBTRACT , DST_ALPHA , CONST_ONE} , // 1011: (Cd - Cs)*Ad + Cd ==> Cd*(Ad + 1) - Cs*Ad - { 0 , OP_REV_SUBTRACT , DST_ALPHA , DST_ALPHA} , // 1012: (Cd - Cs)*Ad + 0 ==> Cd*Ad - Cs*Ad + { BLEND_HW5 , OP_REV_SUBTRACT , DST_ALPHA , DST_ALPHA} , // 1012: (Cd - Cs)*Ad + 0 ==> Cd*Ad - Cs*Ad { BLEND_MIX3 , OP_ADD , INV_CONST_COLOR , CONST_COLOR} , // 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F) { BLEND_A_MAX | BLEND_MIX1 , OP_REV_SUBTRACT , CONST_COLOR , CONST_ONE} , // 1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F { BLEND_MIX1 , OP_REV_SUBTRACT , CONST_COLOR , CONST_COLOR} , // 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F @@ -913,31 +913,31 @@ const std::array GSDevice::m_blendMap = { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 1120: (Cd - Cd)*F + Cs ==> Cs { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 1121: (Cd - Cd)*F + Cd ==> Cd { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1122: (Cd - Cd)*F + 0 ==> 0 - { 0 , OP_ADD , CONST_ONE , SRC1_COLOR} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As - { BLEND_HW_CLR1 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) - { BLEND_HW_CLR2 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1202: (Cd - 0)*As + 0 ==> Cd*As + { BLEND_HW4 , OP_ADD , CONST_ONE , SRC1_COLOR} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As + { BLEND_HW1 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) + { BLEND_HW2 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1202: (Cd - 0)*As + 0 ==> Cd*As { 0 , OP_ADD , CONST_ONE , DST_ALPHA} , // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad - { BLEND_HW_CLR1 , OP_ADD , DST_COLOR , DST_ALPHA} , // 1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad) - { 0 , OP_ADD , CONST_ZERO , DST_ALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad - { 0 , OP_ADD , CONST_ONE , CONST_COLOR} , // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F - { BLEND_HW_CLR1 , OP_ADD , DST_COLOR , CONST_COLOR} , // 1221: (Cd - 0)*F + Cd ==> Cd*(1 + F) - { BLEND_HW_CLR2 , OP_ADD , DST_COLOR , CONST_COLOR} , // 1222: (Cd - 0)*F + 0 ==> Cd*F + { BLEND_HW1 , OP_ADD , DST_COLOR , DST_ALPHA} , // 1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad) + { BLEND_HW5 , OP_ADD , CONST_ZERO , DST_ALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad + { BLEND_HW4 , OP_ADD , CONST_ONE , CONST_COLOR} , // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F + { BLEND_HW1 , OP_ADD , DST_COLOR , CONST_COLOR} , // 1221: (Cd - 0)*F + Cd ==> Cd*(1 + F) + { BLEND_HW2 , OP_ADD , DST_COLOR , CONST_COLOR} , // 1222: (Cd - 0)*F + 0 ==> Cd*F { BLEND_NO_REC , OP_ADD , INV_SRC1_COLOR , CONST_ZERO} , // 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As) { BLEND_ACCU , OP_REV_SUBTRACT , SRC1_COLOR , CONST_ONE} , // 2001: (0 - Cs)*As + Cd ==> Cd - Cs*As { BLEND_NO_REC , OP_REV_SUBTRACT , SRC1_COLOR , CONST_ZERO} , // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As { 0 , OP_ADD , INV_DST_ALPHA , CONST_ZERO} , // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad) - { BLEND_HW_CLR3 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ONE} , // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad + { BLEND_HW3 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ONE} , // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad { 0 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ZERO} , // 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad { BLEND_NO_REC , OP_ADD , INV_CONST_COLOR , CONST_ZERO} , // 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F) { BLEND_ACCU , OP_REV_SUBTRACT , CONST_COLOR , CONST_ONE} , // 2021: (0 - Cs)*F + Cd ==> Cd - Cs*F { BLEND_NO_REC , OP_REV_SUBTRACT , CONST_COLOR , CONST_ZERO} , // 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F - { 0 , OP_SUBTRACT , CONST_ONE , SRC1_COLOR} , // 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As + { BLEND_HW4 , OP_SUBTRACT , CONST_ONE , SRC1_COLOR} , // 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As { 0 , OP_ADD , CONST_ZERO , INV_SRC1_COLOR} , // 2101: (0 - Cd)*As + Cd ==> Cd*(1 - As) { 0 , OP_SUBTRACT , CONST_ZERO , SRC1_COLOR} , // 2102: (0 - Cd)*As + 0 ==> 0 - Cd*As { 0 , OP_SUBTRACT , CONST_ONE , DST_ALPHA} , // 2110: (0 - Cd)*Ad + Cs ==> Cs - Cd*Ad { 0 , OP_ADD , CONST_ZERO , INV_DST_ALPHA} , // 2111: (0 - Cd)*Ad + Cd ==> Cd*(1 - Ad) { 0 , OP_SUBTRACT , CONST_ZERO , DST_ALPHA} , // 2112: (0 - Cd)*Ad + 0 ==> 0 - Cd*Ad - { 0 , OP_SUBTRACT , CONST_ONE , CONST_COLOR} , // 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F + { BLEND_HW4 , OP_SUBTRACT , CONST_ONE , CONST_COLOR} , // 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F { 0 , OP_ADD , CONST_ZERO , INV_CONST_COLOR} , // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F) { 0 , OP_SUBTRACT , CONST_ZERO , CONST_COLOR} , // 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 2200: (0 - 0)*As + Cs ==> Cs diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 28173e7ebc..72fd685f47 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -221,16 +221,18 @@ static_assert(sizeof(InterlaceConstantBuffer) == 16, "InterlaceConstantBuffer is enum HWBlendFlags { // Flags to determine blending behavior - BLEND_CD = 0x1, // Output is Cd, hw blend can handle it - BLEND_HW_CLR1 = 0x2, // Clear color blending (use directly the destination color as blending factor) - BLEND_HW_CLR2 = 0x4, // Clear color blending (use directly the destination color as blending factor) - BLEND_HW_CLR3 = 0x8, // Multiply Cs by (255/128) to compensate for wrong Ad/255 value, should be Ad/128 - BLEND_MIX1 = 0x10, // Mix of hw and sw, do Cs*F or Cs*As in shader - BLEND_MIX2 = 0x20, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader - BLEND_MIX3 = 0x40, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader - BLEND_ACCU = 0x80, // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds - BLEND_NO_REC = 0x100, // Doesn't require sampling of the RT as a texture - BLEND_A_MAX = 0x200, // Impossible blending uses coeff bigger than 1 + BLEND_CD = 0x1, // Output is Cd, hw blend can handle it + BLEND_HW1 = 0x2, // Clear color blending (use directly the destination color as blending factor) + BLEND_HW2 = 0x4, // Clear color blending (use directly the destination color as blending factor) + BLEND_HW3 = 0x8, // Multiply Cs by (255/128) to compensate for wrong Ad/255 value, should be Ad/128 + BLEND_HW4 = 0x10, // HW rendering is split in 2 passes + BLEND_HW5 = 0x20, // HW rendering is split in 2 passes + BLEND_MIX1 = 0x40, // Mix of hw and sw, do Cs*F or Cs*As in shader + BLEND_MIX2 = 0x80, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader + BLEND_MIX3 = 0x100, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader + BLEND_ACCU = 0x200, // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds + BLEND_NO_REC = 0x400, // Doesn't require sampling of the RT as a texture + BLEND_A_MAX = 0x800, // Impossible blending uses coeff bigger than 1 }; // Determines the HW blend function for DX11/OGL @@ -691,6 +693,16 @@ struct alignas(16) GSHWDrawConfig AlphaPass alpha_second_pass; + struct BlendPass + { + BlendState blend; + u8 blend_hw; + bool enable; + }; + static_assert(sizeof(BlendPass) == 8, "blend pass is 8 bytes"); + + BlendPass blend_second_pass; + VSConstantBuffer cb_vs; PSConstantBuffer cb_ps; }; diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index d01cf64c59..ed876169d2 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -2624,6 +2624,14 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor); DrawIndexedPrimitive(); + if (config.blend_second_pass.enable) + { + config.ps.blend_hw = config.blend_second_pass.blend_hw; + SetupPS(config.ps, &config.cb_ps, config.sampler); + SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend_second_pass.blend), config.blend_second_pass.blend.constant); + DrawIndexedPrimitive(); + } + if (config.alpha_second_pass.enable) { preprocessSel(config.alpha_second_pass.ps); diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 7355aa1694..2409548ef8 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -3942,6 +3942,18 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) if (BindDrawPipeline(pipe)) DrawIndexedPrimitive(); + // blend second pass + if (config.blend_second_pass.enable) + { + if (config.blend_second_pass.blend.constant_enable) + SetBlendConstants(config.blend_second_pass.blend.constant); + + pipe.bs = config.blend_second_pass.blend; + pipe.ps.blend_hw = config.blend_second_pass.blend_hw; + if (BindDrawPipeline(pipe)) + DrawIndexedPrimitive(); + } + // and the alpha pass if (config.alpha_second_pass.enable) { diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 7f933dad3b..a2f1090b49 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -3977,7 +3977,8 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT const bool blend_ad = m_conf.ps.blend_c == 1; const bool alpha_mask = (m_cached_ctx.FRAME.FBMSK & 0xFF000000) == 0xFF000000; bool blend_ad_alpha_masked = blend_ad && alpha_mask; - if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic) || (COLCLAMP.CLAMP == 0)) && features.texture_barrier && blend_ad_alpha_masked) + const bool is_basic_blend = GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic; + if ((is_basic_blend || (COLCLAMP.CLAMP == 0)) && features.texture_barrier && blend_ad_alpha_masked) { // Swap Ad with As for hw blend. m_conf.ps.a_masked = 1; @@ -3996,6 +3997,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT u8 blend_index = static_cast(((m_conf.ps.blend_a * 3 + m_conf.ps.blend_b) * 3 + m_conf.ps.blend_c) * 3 + m_conf.ps.blend_d); const HWBlend blend_preliminary = GSDevice::GetBlend(blend_index); + HWBlend blend = GSDevice::GetBlend(blend_index); const int blend_flag = blend_preliminary.flags; // Re set alpha, it was modified, must be done after index calculation @@ -4022,9 +4024,19 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT const bool blend_mix3 = !!(blend_flag & BLEND_MIX3); bool blend_mix = (blend_mix1 || blend_mix2 || blend_mix3) && COLCLAMP.CLAMP; - const bool one_barrier = m_conf.require_one_barrier || blend_ad_alpha_masked; // Primitives don't overlap. const bool no_prim_overlap = (m_prim_overlap == PRIM_OVERLAP_NO); + + // HW blend can be done in multiple passes when there's no overlap. + // Blend second pass is only useful when texture barriers aren't supported. + // Speed wise Texture barriers > blend second pass > texture copies. + // TODO: 24bit and 32bit formats on clamp 1 can always prefer blend second pass depending on the blend equations. + const bool blend_second_pass_support = !features.texture_barrier && no_prim_overlap && is_basic_blend; + const bool bmix1_second_pass = blend_second_pass_support && blend_mix1 && (alpha_c0_high_max_one || alpha_c2_high_one) && m_conf.ps.blend_d == 2; + // We don't want to enable blend mix if we are doing a second pass, it's useless. + blend_mix &= !bmix1_second_pass; + + const bool one_barrier = m_conf.require_one_barrier || blend_ad_alpha_masked; // Condition 1: Require full sw blend for full barrier. // Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead. // Condition 3: A shuffle is unlikely to overlap, so when a barrier is enabled like from fbmask we can prefer full sw blend. @@ -4033,9 +4045,9 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT || accumulation_blend; // Mix of hw/sw blending // Blend can be done on hw. As and F cases should be accurate. - // BLEND_HW_CLR1 with Ad, BLEND_HW_CLR3 might require sw blend. - // BLEND_HW_CLR1 with As/F and BLEND_HW_CLR2 can be done in hw. - bool clr_blend1_2 = (blend_flag & (BLEND_HW_CLR1 | BLEND_HW_CLR2)) && (m_conf.ps.blend_c != 1) // As or Af cases only. + // BLEND_HW1 with Ad, BLEND_HW3 might require sw blend. + // BLEND_HW1 with As/F and BLEND_HW2 can be done in hw. + bool clr_blend1_2 = (blend_flag & (BLEND_HW1 | BLEND_HW2)) && (m_conf.ps.blend_c != 1) // As or Af cases only. && !(m_draw_env->PABE.PABE && GetAlphaMinMax().min < 128) // No PABE as it will require sw blending. && (COLCLAMP.CLAMP) // Let's add a colclamp check too, hw blend will clamp to 0-1. && !prefer_sw_blend; // Don't run if sw blend is preferred. @@ -4249,7 +4261,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT if (m_conf.ps.blend_c == 2) m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast(AFIX) / 128.0f; - const HWBlend blend = GSDevice::GetBlend(blend_index); if (accumulation_blend) { // Keep HW blending to do the addition/subtraction @@ -4304,29 +4315,21 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT m_conf.ps.dither_adjust = can_dither; } - // For mixed blend, the source blend is done in the shader (so we use CONST_ONE as a factor). - m_conf.blend = {true, GSDevice::CONST_ONE, blend.dst, blend.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO,m_conf.ps.blend_c == 2, AFIX}; - m_conf.ps.blend_mix = (blend.op == GSDevice::OP_REV_SUBTRACT) ? 2 : 1; - - // Elide DSB colour output if not used by dest. - m_conf.ps.no_color1 |= !GSDevice::IsDualSourceBlendFactor(blend.dst); - if (blend_mix1) { if (m_conf.ps.blend_b == m_conf.ps.blend_d && (alpha_c0_high_min_one || alpha_c1_high_min_one || alpha_c2_high_one)) { + // Alpha is guaranteed to be > 128. // Replace Cs*Alpha + Cd*(1 - Alpha) with Cs*Alpha - Cd*(Alpha - 1). - // Alpha - 1 subtraction is only done for the dual source output (hw blending part) since we are changing the equation. - // Af will be replaced with As in shader and send it to dual source output. - m_conf.blend = {true, GSDevice::CONST_ONE, GSDevice::SRC1_COLOR, GSDevice::OP_SUBTRACT, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0}; - // blend hw 1 will disable alpha clamp, we can reuse the old bits. + blend.dst = GSDevice::SRC1_COLOR; + blend.op = GSDevice::OP_SUBTRACT; m_conf.ps.blend_hw = 1; - // DSB output will always be used. - m_conf.ps.no_color1 = false; } else if (m_conf.ps.blend_a == m_conf.ps.blend_d) { - // Compensate slightly for Cd*(Alpha + 1) - Cs*Alpha. + // Cd*(Alpha + 1) - Cs*Alpha will always be wrong. + // Let's cheat a little and divide blended Cs by Alpha. + // Result will still be wrong but closer to what we want. m_conf.ps.blend_hw = 2; } @@ -4336,11 +4339,10 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT } else if (blend_mix2) { - // Allow to compensate when Cs*(Alpha + 1) overflows, to compensate we change - // the alpha output value for Cd*Alpha. - m_conf.blend = {true, GSDevice::CONST_ONE, GSDevice::SRC1_COLOR, blend.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO,false, 0}; + // Allow to compensate when Cs*(Alpha + 1) overflows, + // to compensate we change the alpha output value for Cd*Alpha. + blend.dst = GSDevice::SRC1_COLOR; m_conf.ps.blend_hw = 3; - m_conf.ps.no_color1 = false; m_conf.ps.blend_a = 0; m_conf.ps.blend_b = 2; @@ -4352,6 +4354,13 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT m_conf.ps.blend_b = 0; m_conf.ps.blend_d = 0; } + + // Elide DSB colour output if not used by dest. + m_conf.ps.no_color1 = !GSDevice::IsDualSourceBlendFactor(blend.dst); + + // For mixed blend, the source blend is done in the shader (so we use CONST_ONE as a factor). + m_conf.blend = {true, GSDevice::CONST_ONE, blend.dst, blend.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, m_conf.ps.blend_c == 2, AFIX}; + m_conf.ps.blend_mix = (blend.op == GSDevice::OP_REV_SUBTRACT) ? 2 : 1; } else { @@ -4394,29 +4403,84 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT m_conf.ps.rta_correction = rt->m_rt_alpha_scale; } - // Care for hw blend value, 6 is for hw/sw, sw blending used. - if (blend_flag & BLEND_HW_CLR1) + if (blend_second_pass_support) + { + const HWBlend blend_second_pass = GSDevice::GetBlend(blend_index); + if (bmix1_second_pass) + { + // Alpha = As or Af. + // Cs*Alpha - Cd*Alpha, Cd*Alpha - Cs*Alpha. + // Render pass 1: Do (Cd - Cs) or (Cs - Cd) on first pass. + blend.src = GSDevice::CONST_ONE; + blend.dst = GSDevice::CONST_ONE; + // Render pass 2: Blend the result (Cd) from render pass 1 with alpha range of 0-2. + m_conf.blend_second_pass.enable = true; + m_conf.blend_second_pass.blend_hw = 2; + m_conf.blend_second_pass.blend = {true, GSDevice::DST_COLOR, (m_conf.ps.blend_c == 2) ? GSDevice::CONST_COLOR : GSDevice::SRC1_COLOR, GSDevice::OP_ADD, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, m_conf.ps.blend_c == 2, AFIX}; + } + else if (alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW3)) + { + // Alpha = Ad. + // Cs*Alpha, Cs*Alpha + Cd, Cd - Cs*Alpha. + // Render pass 1: Do Cs*Alpha, Cs*Alpha + Cd or Cd - Cs*Alpha on first pass. + // Render pass 2: Take result (Cd) from render pass 1 and either add or rev subtract Cs*Alpha based on the blend operation. + m_conf.blend_second_pass.enable = true; + m_conf.blend_second_pass.blend = {true, blend_second_pass.src, GSDevice::CONST_ONE, blend_second_pass.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0}; + } + else if ((alpha_c0_high_max_one || alpha_c2_high_one) && (blend_flag & BLEND_HW4)) + { + // Alpha = As or Af. + // Cs + Cd*Alpha, Cs - Cd*Alpha. + // Render pass 1: Calculate Cd*Alpha with an alpha range of 0-2. + m_conf.ps.blend_hw = 2; + blend.src = GSDevice::DST_COLOR; + blend.dst = (m_conf.ps.blend_c == 2) ? GSDevice::CONST_COLOR : GSDevice::SRC1_COLOR; + blend.op = GSDevice::OP_ADD; + // Render pass 2: Add or subtract result of render pass 1(Cd) from Cs. + m_conf.blend_second_pass.enable = true; + m_conf.blend_second_pass.blend_hw = 0; + m_conf.blend_second_pass.blend = {true, blend_second_pass.src, GSDevice::CONST_ONE, blend_second_pass.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0}; + } + else if (alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW5)) + { + // Alpha = Ad. + // Cd*Alpha - Cs*Alpha, Cs*Alpha - Cd*Alpha. + // Render pass 1: Do (Cd - Cs)*Alpha, (Cs - Cd)*Alpha or Cd*Alpha on first pass. + // Render pass 2: Take result (Cd) from render pass 1 and double it. + m_conf.blend_second_pass.enable = true; + m_conf.blend_second_pass.blend_hw = 1; + m_conf.blend_second_pass.blend = {true, GSDevice::DST_COLOR, GSDevice::CONST_ONE, GSDevice::OP_ADD, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0}; + } + + if (m_conf.ps.blend_c == 2 && m_conf.blend_second_pass.enable) + m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast(AFIX) / 128.0f; + } + + if (blend_flag & BLEND_HW1) { m_conf.ps.blend_hw = 1; } - else if (blend_flag & BLEND_HW_CLR2) + else if (blend_flag & BLEND_HW2) { if (m_conf.ps.blend_c == 2) m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast(AFIX) / 128.0f; m_conf.ps.blend_hw = 2; } - else if (!rta_correction && (blend_flag & BLEND_HW_CLR3)) + else if (!m_conf.blend_second_pass.enable && alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW3)) { m_conf.ps.blend_hw = 3; } - const HWBlend blend = GSDevice::GetBlend(blend_index); - m_conf.blend = {true, blend.src, blend.dst, blend.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO,m_conf.ps.blend_c == 2, AFIX}; + const GSDevice::BlendFactor src_factor_alpha = m_conf.blend_second_pass.enable ? GSDevice::CONST_ZERO : GSDevice::CONST_ONE; + const GSDevice::BlendFactor dst_factor_alpha = m_conf.blend_second_pass.enable ? GSDevice::CONST_ONE : GSDevice::CONST_ZERO; + m_conf.blend = {true, blend.src, blend.dst, blend.op, src_factor_alpha, dst_factor_alpha, m_conf.ps.blend_c == 2, AFIX}; // Remove second color output when unused. Works around bugs in some drivers (e.g. Intel). - m_conf.ps.no_color1 |= !GSDevice::IsDualSourceBlendFactor(m_conf.blend.src_factor) && - !GSDevice::IsDualSourceBlendFactor(m_conf.blend.dst_factor); + m_conf.ps.no_color1 = !GSDevice::IsDualSourceBlendFactor(m_conf.blend.src_factor) && + !GSDevice::IsDualSourceBlendFactor(m_conf.blend.dst_factor) && + !GSDevice::IsDualSourceBlendFactor(m_conf.blend_second_pass.blend.src_factor) && + !GSDevice::IsDualSourceBlendFactor(m_conf.blend_second_pass.blend.dst_factor); } // Notify the shader that it needs to invert rounding @@ -5584,8 +5648,16 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta } else { - m_conf.blend.src_factor_alpha = GSDevice::SRC1_ALPHA; - m_conf.blend.dst_factor_alpha = GSDevice::INV_SRC1_ALPHA; + if (m_conf.blend_second_pass.enable) + { + m_conf.blend_second_pass.blend.src_factor_alpha = GSDevice::SRC1_ALPHA; + m_conf.blend_second_pass.blend.dst_factor_alpha = GSDevice::INV_SRC1_ALPHA; + } + else + { + m_conf.blend.src_factor_alpha = GSDevice::SRC1_ALPHA; + m_conf.blend.dst_factor_alpha = GSDevice::INV_SRC1_ALPHA; + } } // If Z writes are on, unfortunately we can't single pass it. diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index e3c4f3ba27..14c6eae36c 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -2573,6 +2573,24 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config) SendHWDraw(config, psel.ps.IsFeedbackLoop()); + if (config.blend_second_pass.enable) + { + if (config.blend.IsEffective(config.colormask)) + { + OMSetBlendState(config.blend_second_pass.blend.enable, s_gl_blend_factors[config.blend_second_pass.blend.src_factor], + s_gl_blend_factors[config.blend_second_pass.blend.dst_factor], s_gl_blend_ops[config.blend_second_pass.blend.op], + s_gl_blend_factors[config.blend_second_pass.blend.src_factor_alpha], s_gl_blend_factors[config.blend_second_pass.blend.dst_factor_alpha], + config.blend_second_pass.blend.constant_enable, config.blend_second_pass.blend.constant); + } + else + { + OMSetBlendState(); + } + psel.ps.blend_hw = config.blend_second_pass.blend_hw; + SetupPipeline(psel); + SendHWDraw(config, psel.ps.IsFeedbackLoop()); + } + if (config.alpha_second_pass.enable) { // cbuffer will definitely be dirty if aref changes, no need to check it diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 80a69379f8..5abf6ec525 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -5852,6 +5852,18 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) if (BindDrawPipeline(pipe)) SendHWDraw(config, draw_rt, skip_first_barrier); + // blend second pass + if (config.blend_second_pass.enable) + { + if (config.blend_second_pass.blend.constant_enable) + SetBlendConstants(config.blend_second_pass.blend.constant); + + pipe.bs = config.blend_second_pass.blend; + pipe.ps.blend_hw = config.blend_second_pass.blend_hw; + if (BindDrawPipeline(pipe)) + DrawIndexedPrimitive(); + } + // and the alpha pass if (config.alpha_second_pass.enable) {