GS-hw: Separate the Alpha masked Ad case from blend hw bit.

Allows for cleaner code.
This commit is contained in:
lightningterror 2023-03-10 13:02:18 +01:00
parent df2d11e70d
commit 06aed8491c
12 changed files with 48 additions and 46 deletions

View File

@ -31,6 +31,7 @@
#define PS_FOG 0
#define PS_IIP 0
#define PS_BLEND_HW 0
#define PS_A_MASKED 0
#define PS_FBA 0
#define PS_FBMASK 0
#define PS_LTF 1
@ -71,6 +72,7 @@
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
struct VS_INPUT
{
@ -831,7 +833,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
float3 alpha_compensate = max((float3)1.0f, Color.rgb / (float3)255.0f);
As_rgba.rgb -= alpha_compensate;
}
else if (PS_BLEND_HW == 2 || PS_BLEND_HW == 4)
else if (PS_BLEND_HW == 2)
{
// Compensate slightly for Cd*(As + 1) - Cs*As.
// The initial factor we chose is 1 (0.00392)
@ -841,7 +843,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
float color_compensate = 1.0f * (C + 1.0f);
Color.rgb -= (float3)color_compensate;
}
else if (PS_BLEND_HW == 3 || PS_BLEND_HW == 5)
else if (PS_BLEND_HW == 3)
{
// As, Ad or Af clamped.
As_rgba.rgb = (float3)C_clamped;
@ -854,13 +856,13 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
}
else
{
if (PS_BLEND_HW == 1 || PS_BLEND_HW == 5)
if (PS_BLEND_HW == 1)
{
// Needed for Cd * (As/Ad/F + 1) blending modes
Color.rgb = (float3)255.0f;
}
else if (PS_BLEND_HW == 2 || PS_BLEND_HW == 4)
else if (PS_BLEND_HW == 2)
{
// Cd*As,Cd*Ad or Cd*F
@ -943,7 +945,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
}
float4 alpha_blend;
if (PS_BLEND_C == 1 && PS_BLEND_HW > 3)
if (SW_AD_TO_HW)
{
float4 RT = trunc(RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + 0.1f);
alpha_blend = (float4)(RT.a / 128.0f);

View File

@ -21,7 +21,7 @@
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_BLEND_HW > 3)
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
#define PS_PRIMID_INIT (PS_DATE == 1 || PS_DATE == 2)
#define NEEDS_RT_EARLY (PS_TEX_IS_FB == 1 || PS_DATE >= 5)
#define NEEDS_RT (NEEDS_RT_EARLY || (!PS_PRIMID_INIT && (PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW)))
@ -786,7 +786,7 @@ float As = As_rgba.a;
// changed alpha should only be done for hw blend.
vec3 alpha_compensate = max(vec3(1.0f), Color.rgb / vec3(255.0f));
As_rgba.rgb -= alpha_compensate;
#elif PS_BLEND_HW == 2 || PS_BLEND_HW == 4
#elif PS_BLEND_HW == 2
// Compensate slightly for Cd*(As + 1) - Cs*As.
// The initial factor we chose is 1 (0.00392)
// as that is the minimum color Cd can be,
@ -794,7 +794,7 @@ float As = As_rgba.a;
// blended value it can be.
float color_compensate = 1.0f * (C + 1.0f);
Color.rgb -= vec3(color_compensate);
#elif PS_BLEND_HW == 3 || PS_BLEND_HW == 5
#elif PS_BLEND_HW == 3
// As, Ad or Af clamped.
As_rgba.rgb = vec3(C_clamped);
// Cs*(Alpha + 1) might overflow, if it does then adjust alpha value
@ -806,9 +806,9 @@ float As = As_rgba.a;
#else
// Needed for Cd * (As/Ad/F + 1) blending modes
#if PS_BLEND_HW == 1 || PS_BLEND_HW == 5
#if PS_BLEND_HW == 1
Color.rgb = vec3(255.0f);
#elif PS_BLEND_HW == 2 || PS_BLEND_HW == 4
#elif PS_BLEND_HW == 2
// Cd*As,Cd*Ad or Cd*F
#if PS_BLEND_C == 2

View File

@ -321,6 +321,7 @@ void main()
#define PS_ATST 1
#define PS_FOG 0
#define PS_BLEND_HW 0
#define PS_A_MASKED 0
#define PS_FBA 0
#define PS_FBMASK 0
#define PS_LTF 1
@ -352,6 +353,7 @@ void main()
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
#define SW_BLEND_NEEDS_RT (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1)
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
#define PS_FEEDBACK_LOOP_IS_NEEDED (PS_TEX_IS_FB == 1 || PS_FBMASK || SW_BLEND_NEEDS_RT || (PS_DATE >= 5))
@ -1105,7 +1107,7 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
// changed alpha should only be done for hw blend.
vec3 alpha_compensate = max(vec3(1.0f), Color.rgb / vec3(255.0f));
As_rgba.rgb -= alpha_compensate;
#elif PS_BLEND_HW == 2 || PS_BLEND_HW == 4
#elif PS_BLEND_HW == 2
// Compensate slightly for Cd*(As + 1) - Cs*As.
// The initial factor we chose is 1 (0.00392)
// as that is the minimum color Cd can be,
@ -1113,7 +1115,7 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
// blended value it can be.
float color_compensate = 1.0f * (C + 1.0f);
Color.rgb -= vec3(color_compensate);
#elif PS_BLEND_HW == 3 || PS_BLEND_HW == 5
#elif PS_BLEND_HW == 3
// As, Ad or Af clamped.
As_rgba.rgb = vec3(C_clamped);
// Cs*(Alpha + 1) might overflow, if it does then adjust alpha value
@ -1124,10 +1126,10 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
#endif
#else
#if PS_BLEND_HW == 1 || PS_BLEND_HW == 5
#if PS_BLEND_HW == 1
// Needed for Cd * (As/Ad/F + 1) blending modes
Color.rgb = vec3(255.0f);
#elif PS_BLEND_HW == 2 || PS_BLEND_HW == 4
#elif PS_BLEND_HW == 2
// Cd*As,Cd*Ad or Cd*F
#if PS_BLEND_C == 2
@ -1231,7 +1233,7 @@ void main()
C.a = 128.0f;
#endif
#if (PS_BLEND_C == 1 && PS_BLEND_HW > 3)
#if (SW_AD_TO_HW)
vec4 RT = trunc(subpassLoad(RtSampler) * 255.0f + 0.1f);
vec4 alpha_blend = vec4(RT.a / 128.0f);
#else

View File

@ -327,7 +327,8 @@ struct alignas(16) GSHWDrawConfig
u32 blend_c : 2;
u32 blend_d : 2;
u32 fixed_one_a : 1;
u32 blend_hw : 3;
u32 blend_hw : 2;
u32 a_masked : 1;
u32 hdr : 1;
u32 colclip : 1;
u32 blend_mix : 2;

View File

@ -153,6 +153,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
sm.AddMacro("PS_FOG", sel.fog);
sm.AddMacro("PS_IIP", sel.iip);
sm.AddMacro("PS_BLEND_HW", sel.blend_hw);
sm.AddMacro("PS_A_MASKED", sel.a_masked);
sm.AddMacro("PS_FBA", sel.fba);
sm.AddMacro("PS_FBMASK", sel.fbmask);
sm.AddMacro("PS_LTF", sel.ltf);

View File

@ -1614,6 +1614,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
sm.AddMacro("PS_FOG", sel.fog);
sm.AddMacro("PS_IIP", sel.iip);
sm.AddMacro("PS_BLEND_HW", sel.blend_hw);
sm.AddMacro("PS_A_MASKED", sel.a_masked);
sm.AddMacro("PS_FBA", sel.fba);
sm.AddMacro("PS_FBMASK", sel.fbmask);
sm.AddMacro("PS_LTF", sel.ltf);

View File

@ -3105,6 +3105,7 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
m_conf.ps.no_color1 = true;
// Only Ad case will require one barrier
// No need to set a_masked bit for blend_ad_alpha_masked case
m_conf.require_one_barrier |= blend_ad_alpha_masked;
}
else if (blend_mix)
@ -3134,7 +3135,7 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
{
// Compensate slightly for Cd*(As + 1) - Cs*As.
// Try to compensate a bit with subtracting 1 (0.00392) * (Alpha + 1) from Cs.
m_conf.ps.blend_hw = blend_ad_alpha_masked ? 4 : 2;
m_conf.ps.blend_hw = 2;
}
m_conf.ps.blend_a = 0;
@ -3146,7 +3147,7 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
// Allow to compensate when Cs*(Alpha + 1) overflows, to compensate we change
// the alpha output value for Cd*Alpha.
m_conf.blend = {true, GSDevice::CONST_ONE, GSDevice::SRC1_COLOR, blend.op, false, 0};
m_conf.ps.blend_hw = blend_ad_alpha_masked ? 5 : 3;
m_conf.ps.blend_hw = 3;
m_conf.ps.no_color1 = false;
m_conf.ps.blend_a = 0;
@ -3163,11 +3164,9 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
// Only Ad case will require one barrier
if (blend_ad_alpha_masked)
{
m_conf.require_one_barrier |= true;
// Swap Ad with As for hw blend
// Check if blend mix 1 or 2 already enabled clr
if (m_conf.ps.blend_hw == 0)
m_conf.ps.blend_hw = 6;
m_conf.ps.a_masked = 1;
m_conf.require_one_barrier |= true;
}
}
else
@ -3178,6 +3177,7 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
replace_dual_src = false;
blending_alpha_pass = false;
// No need to set a_masked bit for blend_ad_alpha_masked case
const bool blend_non_recursive_one_barrier = blend_non_recursive && blend_ad_alpha_masked;
if (blend_non_recursive_one_barrier)
m_conf.require_one_barrier |= true;
@ -3197,35 +3197,25 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
// Care for hw blend value, 6 is for hw/sw, sw blending used.
if (blend_flag & BLEND_HW_CLR1)
{
if (blend_ad_alpha_masked)
m_conf.ps.blend_hw = 5;
else
m_conf.ps.blend_hw = 1;
m_conf.ps.blend_hw = 1;
}
else if (blend_flag & (BLEND_HW_CLR2))
{
if (blend_ad_alpha_masked)
{
m_conf.ps.blend_hw = 4;
}
else
{
if (m_conf.ps.blend_c == 2)
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(AFIX) / 128.0f;
if (m_conf.ps.blend_c == 2)
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(AFIX) / 128.0f;
m_conf.ps.blend_hw = 2;
}
m_conf.ps.blend_hw = 2;
}
else if (blend_flag & BLEND_HW_CLR3)
{
m_conf.ps.blend_hw = 3;
}
else if (blend_ad_alpha_masked)
{
m_conf.ps.blend_hw = 6;
}
m_conf.require_one_barrier |= blend_ad_alpha_masked;
if (blend_ad_alpha_masked)
{
m_conf.ps.a_masked = 1;
m_conf.require_one_barrier |= true;
}
const HWBlend blend(GSDevice::GetBlend(blend_index, replace_dual_src));
m_conf.blend = {true, blend.src, blend.dst, blend.op, m_conf.ps.blend_c == 2, AFIX};

View File

@ -1388,6 +1388,7 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
setFnConstantI(m_fn_constants, pssel.blend_c, GSMTLConstantIndex_PS_BLEND_C);
setFnConstantI(m_fn_constants, pssel.blend_d, GSMTLConstantIndex_PS_BLEND_D);
setFnConstantI(m_fn_constants, pssel.blend_hw, GSMTLConstantIndex_PS_BLEND_HW);
setFnConstantI(m_fn_constants, pssel.a_masked, GSMTLConstantIndex_PS_A_MASKED);
setFnConstantB(m_fn_constants, pssel.hdr, GSMTLConstantIndex_PS_HDR);
setFnConstantB(m_fn_constants, pssel.colclip, GSMTLConstantIndex_PS_COLCLIP);
setFnConstantI(m_fn_constants, pssel.blend_mix, GSMTLConstantIndex_PS_BLEND_MIX);

View File

@ -183,6 +183,7 @@ enum GSMTLFnConstants
GSMTLConstantIndex_PS_BLEND_C,
GSMTLConstantIndex_PS_BLEND_D,
GSMTLConstantIndex_PS_BLEND_HW,
GSMTLConstantIndex_PS_A_MASKED,
GSMTLConstantIndex_PS_HDR,
GSMTLConstantIndex_PS_COLCLIP,
GSMTLConstantIndex_PS_BLEND_MIX,

View File

@ -50,6 +50,7 @@ constant uint PS_BLEND_B [[function_constant(GSMTLConstantIndex_PS_BL
constant uint PS_BLEND_C [[function_constant(GSMTLConstantIndex_PS_BLEND_C)]];
constant uint PS_BLEND_D [[function_constant(GSMTLConstantIndex_PS_BLEND_D)]];
constant uint PS_BLEND_HW [[function_constant(GSMTLConstantIndex_PS_BLEND_HW)]];
constant bool PS_A_MASKED [[function_constant(GSMTLConstantIndex_PS_A_MASKED)]];
constant bool PS_HDR [[function_constant(GSMTLConstantIndex_PS_HDR)]];
constant bool PS_COLCLIP [[function_constant(GSMTLConstantIndex_PS_COLCLIP)]];
constant uint PS_BLEND_MIX [[function_constant(GSMTLConstantIndex_PS_BLEND_MIX)]];
@ -95,7 +96,7 @@ constant bool PS_TEX_IS_COLOR = !PS_TEX_IS_DEPTH;
constant bool PS_HAS_PALETTE = PS_PAL_FMT != 0 || (PS_CHANNEL >= 1 && PS_CHANNEL <= 5);
constant bool NOT_IIP = !IIP;
constant bool SW_BLEND = (PS_BLEND_A != PS_BLEND_B) || PS_BLEND_D;
constant bool SW_AD_TO_HW = PS_BLEND_C == 1 && PS_BLEND_HW > 3;
constant bool SW_AD_TO_HW = (PS_BLEND_C == 1 && PS_A_MASKED);
constant bool NEEDS_RT_FOR_BLEND = (((PS_BLEND_A != PS_BLEND_B) && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1)) || PS_BLEND_D == 1 || SW_AD_TO_HW);
constant bool NEEDS_RT_EARLY = PS_TEX_IS_FB || PS_DATE >= 5;
constant bool NEEDS_RT = NEEDS_RT_EARLY || (!PS_PRIM_CHECKING_INIT && (PS_FBMASK || NEEDS_RT_FOR_BLEND));
@ -891,7 +892,7 @@ struct PSMain
float3 alpha_compensate = max(float3(1.f), Color.rgb / float3(255.f));
As_rgba.rgb -= alpha_compensate;
}
else if (PS_BLEND_HW == 2 || PS_BLEND_HW == 4)
else if (PS_BLEND_HW == 2)
{
// Compensate slightly for Cd*(As + 1) - Cs*As.
// The initial factor we chose is 1 (0.00392)
@ -901,7 +902,7 @@ struct PSMain
float color_compensate = 1.f * (C + 1.f);
Color.rgb -= float3(color_compensate);
}
else if (PS_BLEND_HW == 3 || PS_BLEND_HW == 5)
else if (PS_BLEND_HW == 3)
{
// As, Ad or Af clamped.
As_rgba.rgb = float3(C_clamped);
@ -915,11 +916,11 @@ struct PSMain
else
{
// Needed for Cd * (As/Ad/F + 1) blending mdoes
if (PS_BLEND_HW == 1 || PS_BLEND_HW == 5)
if (PS_BLEND_HW == 1)
{
Color.rgb = 255.f;
}
else if (PS_BLEND_HW == 2 || PS_BLEND_HW == 4)
else if (PS_BLEND_HW == 2)
{
float Alpha = PS_BLEND_C == 2 ? cb.alpha_fix : As;
Color.rgb = saturate(Alpha - 1.f) * 255.f;

View File

@ -1045,6 +1045,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
+ fmt::format("#define PS_ATST {}\n", sel.atst)
+ fmt::format("#define PS_FOG {}\n", sel.fog)
+ fmt::format("#define PS_BLEND_HW {}\n", sel.blend_hw)
+ fmt::format("#define PS_A_MASKED {}\n", sel.a_masked)
+ fmt::format("#define PS_FBA {}\n", sel.fba)
+ fmt::format("#define PS_LTF {}\n", sel.ltf)
+ fmt::format("#define PS_AUTOMATIC_LOD {}\n", sel.automatic_lod)

View File

@ -2077,6 +2077,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
AddMacro(ss, "PS_ATST", sel.atst);
AddMacro(ss, "PS_FOG", sel.fog);
AddMacro(ss, "PS_BLEND_HW", sel.blend_hw);
AddMacro(ss, "PS_A_MASKED", sel.a_masked);
AddMacro(ss, "PS_FBA", sel.fba);
AddMacro(ss, "PS_LTF", sel.ltf);
AddMacro(ss, "PS_AUTOMATIC_LOD", sel.automatic_lod);