GS-hw: Implement alternative hw blending for Cd*As, Cd*F.

Do hw blend for Cd*As, Cd*F, expand the clear color blend method as it is almost the same.

Most helpful for dx11 ofc, for gl/vulkan will help with rendering on lower
levels of blending without needing a barrier.
This commit is contained in:
lightningterror 2022-01-23 12:39:01 +01:00
parent f1576eeb32
commit 4ab9d1a493
6 changed files with 88 additions and 30 deletions

View File

@ -673,11 +673,6 @@ float4 ps_color(PS_INPUT input)
C = fog(C, input.t.z);
if(PS_CLR1) // needed for Cd * (As/Ad/F + 1) blending modes
{
C.rgb = (float3)255.0f;
}
return C;
}
@ -744,7 +739,7 @@ void ps_blend(inout float4 Color, float As, float2 pos_xy)
float3 A = (PS_BLEND_A == 0) ? Cs : ((PS_BLEND_A == 1) ? Cd : (float3)0.0f);
float3 B = (PS_BLEND_B == 0) ? Cs : ((PS_BLEND_B == 1) ? Cd : (float3)0.0f);
float C = (PS_BLEND_C == 0) ? As : ((PS_BLEND_C == 1) ? Ad : Af);
float C = (PS_BLEND_C == 0) ? As : ((PS_BLEND_C == 1) ? Ad : Af);
float3 D = (PS_BLEND_D == 0) ? Cs : ((PS_BLEND_D == 1) ? Cd : (float3)0.0f);
// As/Af clamp alpha for Blend mix
@ -753,6 +748,25 @@ void ps_blend(inout float4 Color, float As, float2 pos_xy)
Color.rgb = (PS_BLEND_A == PS_BLEND_B) ? D : trunc(((A - B) * C) + D);
}
else
{
// Needed for Cd * (As/Ad/F + 1) blending modes
if (PS_CLR1 == 1)
{
Color.rgb = (float3)255.0f;
}
else if (PS_CLR1 > 1)
{
// PS_CLR1 2 Af, PS_CLR1 3 As
// Cd*As or Cd*F
float Alpha = PS_CLR1 == 2 ? Af : As;
Color.rgb /= (float3)255.0f;
Color.rgb = max((float3)0.0f, (Alpha - (float3)1.0f));
Color.rgb *= (float3)255.0f;
}
}
}
PS_OUTPUT ps_main(PS_INPUT input)

View File

@ -597,10 +597,6 @@ vec4 ps_color()
fog(C, PSin.t_float.z);
#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes
C.rgb = vec3(255.0f);
#endif
return C;
}
@ -721,6 +717,25 @@ void ps_blend(inout vec4 Color, float As)
Color.rgb = trunc((A - B) * C + D);
#endif
#else
// Needed for Cd * (As/Ad/F + 1) blending modes
#if PS_CLR1 == 1
C.rgb = vec3(255.0f);
#elif PS_CLR1 > 1
// PS_CLR1 2 Af, PS_CLR1 3 As
// Cd*As or Cd*F
#if PS_CLR1 == 2
float Alpha = Af;
#else
float Alpha = As;
#endif
Color.rgb /= vec3(255.0f);
Color.rgb = max(vec3(0.0f), (Alpha - vec3(1.0f)));
Color.rgb *= vec3(255.0f);
#endif
#endif
}

View File

@ -928,10 +928,6 @@ vec4 ps_color()
C = fog(C, vsIn.t.z);
#if PS_CLR1 // needed for Cd * (As/Ad/F + 1) blending modes
C.rgb = vec3(255.0f);
#endif
return C;
}
@ -1059,6 +1055,24 @@ void ps_blend(inout vec4 Color, float As)
Color.rgb = trunc((A - B) * C + D);
#endif
#else
// Needed for Cd * (As/Ad/F + 1) blending modes
#if PS_CLR1 == 1
C.rgb = vec3(255.0f);
#elif PS_CLR1 > 1
// PS_CLR1 2 Af, PS_CLR1 3 As
// Cd*As or Cd*F
#if PS_CLR1 == 2
float Alpha = Af;
#else
float Alpha = As;
#endif
Color.rgb /= vec3(255.0f);
Color.rgb = max(vec3(0.0f), (Alpha - vec3(1.0f)));
Color.rgb *= vec3(255.0f);
#endif
#endif
}

View File

@ -557,14 +557,14 @@ std::array<HWBlend, 3*3*3*3 + 1> GSDevice::m_blendMap =
{ BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 1121: (Cd - Cd)*F + Cd ==> Cd
{ BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1122: (Cd - Cd)*F + 0 ==> 0
{ 0 , OP_ADD , CONST_ONE , SRC1_ALPHA} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As
{ BLEND_C_CLR , OP_ADD , DST_COLOR , SRC1_ALPHA} , //#1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) // ffxii main menu background
{ 0 , OP_ADD , CONST_ZERO , SRC1_ALPHA} , // 1202: (Cd - 0)*As + 0 ==> Cd*As
{ BLEND_C_CLR1 , OP_ADD , DST_COLOR , SRC1_ALPHA} , //#1201: (Cd - 0)*As + Cd ==> Cd*(1 + As)
{ BLEND_C_CLR2_AS , OP_ADD , DST_COLOR , SRC1_ALPHA} , // 1202: (Cd - 0)*As + 0 ==> Cd*As
{ 0 , OP_ADD , CONST_ONE , DST_ALPHA} , // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad
{ BLEND_C_CLR , OP_ADD , DST_COLOR , DST_ALPHA} , //#1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad)
{ BLEND_C_CLR1 , OP_ADD , DST_COLOR , DST_ALPHA} , //#1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad)
{ 0 , OP_ADD , CONST_ZERO , DST_ALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad
{ 0 , OP_ADD , CONST_ONE , CONST_COLOR} , // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F
{ BLEND_C_CLR , OP_ADD , DST_COLOR , CONST_COLOR} , //#1221: (Cd - 0)*F + Cd ==> Cd*(1 + F)
{ 0 , OP_ADD , CONST_ZERO , CONST_COLOR} , // 1222: (Cd - 0)*F + 0 ==> Cd*F
{ BLEND_C_CLR1 , OP_ADD , DST_COLOR , CONST_COLOR} , //#1221: (Cd - 0)*F + Cd ==> Cd*(1 + F)
{ BLEND_C_CLR2_AF , OP_ADD , DST_COLOR , CONST_COLOR} , // 1222: (Cd - 0)*F + 0 ==> Cd*F
{ BLEND_NO_REC , OP_ADD , INV_SRC1_ALPHA , CONST_ZERO} , // 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As)
{ BLEND_ACCU , OP_REV_SUBTRACT , SRC1_ALPHA , CONST_ONE} , //?2001: (0 - Cs)*As + Cd ==> Cd - Cs*As
{ BLEND_NO_REC , OP_REV_SUBTRACT , SRC1_ALPHA , CONST_ZERO} , // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As

View File

@ -115,14 +115,16 @@ public:
enum HWBlendFlags
{
// A couple of flag to determine the blending behavior
BLEND_CD = 0x10, // Output is Cd, hw blend can handle it
BLEND_MIX1 = 0x20, // Mix of hw and sw, do Cs*F or Cs*As in shader
BLEND_MIX2 = 0x40, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader
BLEND_MIX3 = 0x80, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader
BLEND_A_MAX = 0x100, // Impossible blending uses coeff bigger than 1
BLEND_C_CLR = 0x200, // Clear color blending (use directly the destination color as blending factor)
BLEND_NO_REC = 0x400, // Doesn't require sampling of the RT as a texture
BLEND_ACCU = 0x800, // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds
BLEND_CD = 0x10, // Output is Cd, hw blend can handle it
BLEND_MIX1 = 0x20, // Mix of hw and sw, do Cs*F or Cs*As in shader
BLEND_MIX2 = 0x40, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader
BLEND_MIX3 = 0x80, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader
BLEND_A_MAX = 0x100, // Impossible blending uses coeff bigger than 1
BLEND_C_CLR1 = 0x200, // Clear color blending (use directly the destination color as blending factor)
BLEND_C_CLR2_AS = 0x400, // Clear color blending (use directly the destination color as blending factor)
BLEND_C_CLR2_AF = 0x800, // Clear color blending (use directly the destination color as blending factor)
BLEND_NO_REC = 0x1000, // Doesn't require sampling of the RT as a texture
BLEND_ACCU = 0x2000, // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds
};
// Determines the HW blend function for DX11/OGL
@ -224,7 +226,7 @@ struct alignas(16) GSHWDrawConfig
u32 blend_b : 2;
u32 blend_c : 2;
u32 blend_d : 2;
u32 clr1 : 1; // useful?
u32 clr1 : 2;
u32 hdr : 1;
u32 colclip : 1;
u32 alpha_clamp : 1;
@ -252,7 +254,7 @@ struct alignas(16) GSHWDrawConfig
// Scan mask
u32 scanmsk : 2;
u32 _free2 : 2;
u32 _free2 : 1;
};
u64 key;

View File

@ -773,7 +773,20 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
}
else
{
m_conf.ps.clr1 = !!(blend_flag & BLEND_C_CLR);
if (blend_flag & BLEND_C_CLR1)
{
m_conf.ps.clr1 = 1;
}
else if (blend_flag & BLEND_C_CLR2_AF)
{
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(ALPHA.FIX) / 128.0f;
m_conf.ps.clr1 = 2;
}
else if (blend_flag & BLEND_C_CLR2_AS)
{
m_conf.ps.clr1 = 3;
}
if (m_conf.ps.dfmt == 1 && ALPHA.C == 1)
{
// 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent