GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.

Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.

We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.

D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.

OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.

All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
This commit is contained in:
lightningterror 2022-02-02 12:36:56 +01:00
parent ae14afd5f7
commit 3ca4272230
4 changed files with 96 additions and 18 deletions

View File

@ -763,15 +763,15 @@ void ps_blend(inout float4 Color, float As, float2 pos_xy)
}
else
{
if (PS_CLR_HW == 1)
if (PS_CLR_HW == 1 || PS_CLR_HW == 5)
{
// Needed for Cd * (As/Ad/F + 1) blending modes
Color.rgb = (float3)255.0f;
}
else if (PS_CLR_HW == 2)
else if (PS_CLR_HW == 2 || PS_CLR_HW == 4)
{
// Cd*As or Cd*F
// Cd*As,Cd*Ad or Cd*F
float Alpha = PS_BLEND_C == 2 ? Af : As;
@ -829,7 +829,16 @@ PS_OUTPUT ps_main(PS_INPUT input)
}
// Must be done before alpha correction
float alpha_blend = C.a / 128.0f;
float alpha_blend;
if (PS_BLEND_C == 1 && PS_CLR_HW > 3)
{
float4 RT = trunc(RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + 0.1f);
alpha_blend = (PS_DFMT == FMT_24) ? 1.0f : RT.a / 128.0f;
}
else
{
alpha_blend = C.a / 128.0f;
}
// Alpha correction
if (PS_DFMT == FMT_16)

View File

@ -727,10 +727,10 @@ void ps_blend(inout vec4 Color, float As)
#else
// Needed for Cd * (As/Ad/F + 1) blending modes
#if PS_CLR_HW == 1
#if PS_CLR_HW == 1 || PS_CLR_HW == 5
Color.rgb = vec3(255.0f);
#elif PS_CLR_HW == 2
// Cd*As or Cd*F
#elif PS_CLR_HW == 2 || PS_CLR_HW == 4
// Cd*As,Cd*Ad or Cd*F
#if PS_BLEND_C == 2
float Alpha = Af;
@ -853,7 +853,12 @@ void ps_main()
#endif
// Must be done before alpha correction
#if (PS_BLEND_C == 1 && PS_CLR_HW > 3)
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
float alpha_blend = (PS_DFMT == FMT_24) ? 1.0f : RT.a / 128.0f;
#else
float alpha_blend = C.a / 128.0f;
#endif
// Correct the ALPHA value based on the output format
#if (PS_DFMT == FMT_16)

View File

@ -1064,11 +1064,11 @@ void ps_blend(inout vec4 Color, float As)
#endif
#else
#if PS_CLR_HW == 1
#if PS_CLR_HW == 1 || PS_CLR_HW == 5
// Needed for Cd * (As/Ad/F + 1) blending modes
Color.rgb = vec3(255.0f);
#elif PS_CLR_HW == 2
// Cd*As or Cd*F
#elif PS_CLR_HW == 2 || PS_CLR_HW == 4
// Cd*As,Cd*Ad or Cd*F
#if PS_BLEND_C == 2
float Alpha = Af;
@ -1163,7 +1163,12 @@ void main()
#endif
// Must be done before alpha correction
#if (PS_BLEND_C == 1 && PS_CLR_HW > 3)
vec4 RT = trunc(subpassLoad(RtSampler) * 255.0f + 0.1f);
float alpha_blend = (PS_DFMT == FMT_24) ? 1.0f : RT.a / 128.0f;
#else
float alpha_blend = C.a / 128.0f;
#endif
// Correct the ALPHA value based on the output format
#if (PS_DFMT == FMT_16)

View File

@ -533,7 +533,18 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
// Compute the blending equation to detect special case
const GIFRegALPHA& ALPHA = m_context->ALPHA;
u8 blend_index = u8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D);
// Ad cases, alpha write is masked, one barrier is enough, for d3d11 read the fb
// Replace Ad with As, blend flags will be used from As since we are chaging the blend_index value.
bool blend_ad_alpha_masked = (ALPHA.C == 1) && (m_context->FRAME.FBMSK & 0xFF000000) == 0xFF000000;
u8 ALPHA_C = ALPHA.C;
if (!g_gs_device->Features().texture_barrier && (GSConfig.AccurateBlendingUnit >= AccBlendLevel::Medium) && blend_ad_alpha_masked)
ALPHA_C = 0;
else if (g_gs_device->Features().texture_barrier && blend_ad_alpha_masked)
ALPHA_C = 0;
else
blend_ad_alpha_masked = false;
u8 blend_index = u8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA_C) * 3 + ALPHA.D);
const int blend_flag = g_gs_device->GetBlendFlags(blend_index);
// HW blend can handle Cd output.
@ -588,7 +599,8 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
// fixes shadows in Superman shadows of Apokolips.
// DATE_BARRIER already does full barrier so also makes more sense to do full sw blend.
color_dest_blend &= !m_conf.require_full_barrier;
accumulation_blend &= !m_conf.require_full_barrier;
// If prims don't overlap prefer full sw blend on blend_ad_alpha_masked cases.
accumulation_blend &= !(m_conf.require_full_barrier || (blend_ad_alpha_masked && m_prim_overlap == PRIM_OVERLAP_NO));
sw_blending |= impossible_or_free_blend;
// Do not run BLEND MIX if sw blending is already present, it's less accurate
blend_mix &= !sw_blending;
@ -619,8 +631,16 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
sw_blending |= (!(clr_blend || blend_mix) && (m_prim_overlap == PRIM_OVERLAP_NO));
[[fallthrough]];
case AccBlendLevel::Medium:
// If prims don't overlap prefer full sw blend on blend_ad_alpha_masked cases.
if (blend_ad_alpha_masked && m_prim_overlap == PRIM_OVERLAP_NO)
{
accumulation_blend = false;
sw_blending |= true;
}
[[fallthrough]];
case AccBlendLevel::Basic:
// Disable accumulation blend when there is fbmask with no overlap, will be faster.
color_dest_blend &= !fbmask_no_overlap;
accumulation_blend &= !fbmask_no_overlap;
sw_blending |= accumulation_blend || blend_non_recursive || fbmask_no_overlap;
// Do not run BLEND MIX if sw blending is already present, it's less accurate
@ -746,7 +766,8 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
// Remove the addition/substraction from the SW blending
m_conf.ps.blend_d = 2;
// Note accumulation_blend doesn't require a barrier
// Only Ad case will require one barrier
m_conf.require_one_barrier |= blend_ad_alpha_masked;
}
else if (blend_mix)
{
@ -771,13 +792,24 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
m_conf.ps.blend_b = 0;
m_conf.ps.blend_d = 0;
}
// Only Ad case will require one barrier
if (blend_ad_alpha_masked)
{
m_conf.require_one_barrier |= true;
// Swap Ad with As for hw blend
m_conf.ps.clr_hw = 6;
}
}
else
{
// Disable HW blending
m_conf.blend = {};
if (g_gs_device->Features().texture_barrier)
const bool blend_non_recursive_one_barrier = blend_non_recursive && blend_ad_alpha_masked;
if (blend_non_recursive_one_barrier)
m_conf.require_one_barrier |= true;
else if (g_gs_device->Features().texture_barrier)
m_conf.require_full_barrier |= !blend_non_recursive;
else
m_conf.require_one_barrier |= !blend_non_recursive;
@ -789,24 +821,51 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
}
else
{
// Care for clr_hw value, 6 is for hw/sw, sw blending used.
if (blend_flag & BLEND_C_CLR1)
{
m_conf.ps.clr_hw = 1;
if (blend_ad_alpha_masked)
{
m_conf.ps.blend_c = 1;
m_conf.ps.clr_hw = 5;
m_conf.require_one_barrier |= true;
}
else
{
m_conf.ps.clr_hw = 1;
}
}
else if (blend_flag & (BLEND_C_CLR2_AF | BLEND_C_CLR2_AS))
{
if (ALPHA.C == 2)
if (blend_ad_alpha_masked)
{
m_conf.ps.blend_c = 1;
m_conf.ps.clr_hw = 4;
m_conf.require_one_barrier |= true;
}
else if (ALPHA.C == 2)
{
m_conf.ps.blend_c = 2;
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(ALPHA.FIX) / 128.0f;
m_conf.ps.clr_hw = 2;
}
else // ALPHA.C == 0
{
m_conf.ps.blend_c = 0;
m_conf.ps.clr_hw = 2;
}
m_conf.ps.clr_hw = 2;
}
else if (blend_flag & BLEND_C_CLR3)
{
m_conf.ps.clr_hw = 3;
}
else if (blend_ad_alpha_masked)
{
m_conf.ps.blend_c = 1;
m_conf.ps.clr_hw = 6;
m_conf.require_one_barrier |= true;
}
if (m_conf.ps.dfmt == 1 && ALPHA.C == 1)
{