GS-hw: Implement alternative hw blending for Cs*Ad, Cs*Ad + Cd, Cd - Cs*Ad.

Alpha destination value is wrong so let us try to compensate.

Multiply Cs by (255/128) in shader.
Will work best if Cs is 0.5 or less,  if it's higher than 0.5 then the closer to 1 the less accurate it gets.

Ofc it is best to use sw blending but it will help if sw blending is not present/selected for specific draw, will help d3d11 quite more.
This commit is contained in:
lightningterror 2022-01-26 03:05:06 +01:00
parent 4955a4ef98
commit 3b691da8d1
9 changed files with 60 additions and 37 deletions

View File

@ -27,7 +27,7 @@
#define PS_ATST 1 #define PS_ATST 1
#define PS_FOG 0 #define PS_FOG 0
#define PS_IIP 0 #define PS_IIP 0
#define PS_CLR1 0 #define PS_CLR_HW 0
#define PS_FBA 0 #define PS_FBA 0
#define PS_FBMASK 0 #define PS_FBMASK 0
#define PS_LTF 1 #define PS_LTF 1
@ -750,21 +750,29 @@ void ps_blend(inout float4 Color, float As, float2 pos_xy)
} }
else else
{ {
// Needed for Cd * (As/Ad/F + 1) blending modes if (PS_CLR_HW == 1)
if (PS_CLR1 == 1)
{ {
// Needed for Cd * (As/Ad/F + 1) blending modes
Color.rgb = (float3)255.0f; Color.rgb = (float3)255.0f;
} }
else if (PS_CLR1 > 1) else if (PS_CLR_HW == 2 || PS_CLR_HW == 3)
{ {
// PS_CLR1 2 Af, PS_CLR1 3 As // PS_CLR_HW 2 Af, PS_CLR_HW 3 As
// Cd*As or Cd*F // Cd*As or Cd*F
float Alpha = PS_CLR1 == 2 ? Af : As; float Alpha = PS_CLR_HW == 2 ? Af : As;
Color.rgb = max((float3)0.0f, (Alpha - (float3)1.0f)); Color.rgb = max((float3)0.0f, (Alpha - (float3)1.0f));
Color.rgb *= (float3)255.0f; Color.rgb *= (float3)255.0f;
} }
else if (PS_CLR_HW == 4)
{
// Needed for Cs*Ad, Cs*Ad + Cd, Cd - Cs*Ad
// Multiply Color.rgb by (255/128) to compensate for wrong Ad/255 value
Color.rgb *= (255.0f / 128.0f);
}
} }
} }

View File

@ -719,13 +719,13 @@ void ps_blend(inout vec4 Color, float As)
#else #else
// Needed for Cd * (As/Ad/F + 1) blending modes // Needed for Cd * (As/Ad/F + 1) blending modes
#if PS_CLR1 == 1 #if PS_CLR_HW == 1
Color.rgb = vec3(255.0f); Color.rgb = vec3(255.0f);
#elif PS_CLR1 > 1 #elif PS_CLR_HW == 2 || PS_CLR_HW == 3
// PS_CLR1 2 Af, PS_CLR1 3 As // PS_CLR_HW 2 Af, PS_CLR_HW 3 As
// Cd*As or Cd*F // Cd*As or Cd*F
#if PS_CLR1 == 2 #if PS_CLR_HW == 2
float Alpha = Af; float Alpha = Af;
#else #else
float Alpha = As; float Alpha = As;
@ -733,6 +733,11 @@ void ps_blend(inout vec4 Color, float As)
Color.rgb = max(vec3(0.0f), (Alpha - vec3(1.0f))); Color.rgb = max(vec3(0.0f), (Alpha - vec3(1.0f)));
Color.rgb *= vec3(255.0f); Color.rgb *= vec3(255.0f);
#elif PS_CLR_HW == 4
// Needed for Cs*Ad, Cs*Ad + Cd, Cd - Cs*Ad
// Multiply Color.rgb by (255/128) to compensate for wrong Ad/255 value
Color.rgb *= (255.0f / 128.0f);
#endif #endif
#endif #endif

View File

@ -315,7 +315,7 @@ void main()
#define PS_TCC 1 #define PS_TCC 1
#define PS_ATST 1 #define PS_ATST 1
#define PS_FOG 0 #define PS_FOG 0
#define PS_CLR1 0 #define PS_CLR_HW 0
#define PS_FBA 0 #define PS_FBA 0
#define PS_FBMASK 0 #define PS_FBMASK 0
#define PS_LTF 1 #define PS_LTF 1
@ -1056,14 +1056,14 @@ void ps_blend(inout vec4 Color, float As)
#endif #endif
#else #else
// Needed for Cd * (As/Ad/F + 1) blending modes #if PS_CLR_HW == 1
#if PS_CLR1 == 1 // Needed for Cd * (As/Ad/F + 1) blending modes
Color.rgb = vec3(255.0f); Color.rgb = vec3(255.0f);
#elif PS_CLR1 > 1 #elif PS_CLR_HW == 2 || PS_CLR_HW == 3
// PS_CLR1 2 Af, PS_CLR1 3 As // PS_CLR_HW 2 Af, PS_CLR_HW 3 As
// Cd*As or Cd*F // Cd*As or Cd*F
#if PS_CLR1 == 2 #if PS_CLR_HW == 2
float Alpha = Af; float Alpha = Af;
#else #else
float Alpha = As; float Alpha = As;
@ -1071,6 +1071,11 @@ void ps_blend(inout vec4 Color, float As)
Color.rgb = max(vec3(0.0f), (Alpha - vec3(1.0f))); Color.rgb = max(vec3(0.0f), (Alpha - vec3(1.0f)));
Color.rgb *= vec3(255.0f); Color.rgb *= vec3(255.0f);
#elif PS_CLR_HW == 4
// Needed for Cs*Ad, Cs*Ad + Cd, Cd - Cs*Ad
// Multiply Color.rgb by (255/128) to compensate for wrong Ad/255 value
Color.rgb *= (255.0f / 128.0f);
#endif #endif
#endif #endif
} }

View File

@ -533,8 +533,8 @@ std::array<HWBlend, 3*3*3*3 + 1> GSDevice::m_blendMap =
{ BLEND_ACCU , OP_ADD , SRC1_ALPHA , CONST_ONE} , //?0201: (Cs - 0)*As + Cd ==> Cs*As + Cd { BLEND_ACCU , OP_ADD , SRC1_ALPHA , CONST_ONE} , //?0201: (Cs - 0)*As + Cd ==> Cs*As + Cd
{ BLEND_NO_REC , OP_ADD , SRC1_ALPHA , CONST_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As { BLEND_NO_REC , OP_ADD , SRC1_ALPHA , CONST_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As
{ BLEND_A_MAX , OP_ADD , CONST_ONE , CONST_ZERO} , //*0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) { BLEND_A_MAX , OP_ADD , CONST_ONE , CONST_ZERO} , //*0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1)
{ 0 , OP_ADD , DST_ALPHA , CONST_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd { BLEND_C_CLR4 , OP_ADD , DST_ALPHA , CONST_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd
{ 0 , OP_ADD , DST_ALPHA , CONST_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad { BLEND_C_CLR4 , OP_ADD , DST_ALPHA , CONST_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad
{ BLEND_NO_REC | BLEND_A_MAX , OP_ADD , CONST_ONE , CONST_ZERO} , //*0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) { BLEND_NO_REC | BLEND_A_MAX , OP_ADD , CONST_ONE , CONST_ZERO} , //*0220: (Cs - 0)*F + Cs ==> Cs*(F + 1)
{ BLEND_ACCU , OP_ADD , CONST_COLOR , CONST_ONE} , //?0221: (Cs - 0)*F + Cd ==> Cs*F + Cd { BLEND_ACCU , OP_ADD , CONST_COLOR , CONST_ONE} , //?0221: (Cs - 0)*F + Cd ==> Cs*F + Cd
{ BLEND_NO_REC , OP_ADD , CONST_COLOR , CONST_ZERO} , // 0222: (Cs - 0)*F + 0 ==> Cs*F { BLEND_NO_REC , OP_ADD , CONST_COLOR , CONST_ZERO} , // 0222: (Cs - 0)*F + 0 ==> Cs*F
@ -558,7 +558,7 @@ std::array<HWBlend, 3*3*3*3 + 1> GSDevice::m_blendMap =
{ BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1122: (Cd - Cd)*F + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1122: (Cd - Cd)*F + 0 ==> 0
{ 0 , OP_ADD , CONST_ONE , SRC1_ALPHA} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As { 0 , OP_ADD , CONST_ONE , SRC1_ALPHA} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As
{ BLEND_C_CLR1 , OP_ADD , DST_COLOR , SRC1_ALPHA} , //#1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) { BLEND_C_CLR1 , OP_ADD , DST_COLOR , SRC1_ALPHA} , //#1201: (Cd - 0)*As + Cd ==> Cd*(1 + As)
{ BLEND_C_CLR2_AS , OP_ADD , DST_COLOR , SRC1_ALPHA} , // 1202: (Cd - 0)*As + 0 ==> Cd*As { BLEND_C_CLR3_AS , OP_ADD , DST_COLOR , SRC1_ALPHA} , // 1202: (Cd - 0)*As + 0 ==> Cd*As
{ 0 , OP_ADD , CONST_ONE , DST_ALPHA} , // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad { 0 , OP_ADD , CONST_ONE , DST_ALPHA} , // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad
{ BLEND_C_CLR1 , OP_ADD , DST_COLOR , DST_ALPHA} , //#1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad) { BLEND_C_CLR1 , OP_ADD , DST_COLOR , DST_ALPHA} , //#1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad)
{ 0 , OP_ADD , CONST_ZERO , DST_ALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad { 0 , OP_ADD , CONST_ZERO , DST_ALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad
@ -569,7 +569,7 @@ std::array<HWBlend, 3*3*3*3 + 1> GSDevice::m_blendMap =
{ BLEND_ACCU , OP_REV_SUBTRACT , SRC1_ALPHA , CONST_ONE} , //?2001: (0 - Cs)*As + Cd ==> Cd - Cs*As { BLEND_ACCU , OP_REV_SUBTRACT , SRC1_ALPHA , CONST_ONE} , //?2001: (0 - Cs)*As + Cd ==> Cd - Cs*As
{ BLEND_NO_REC , OP_REV_SUBTRACT , SRC1_ALPHA , CONST_ZERO} , // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As { BLEND_NO_REC , OP_REV_SUBTRACT , SRC1_ALPHA , CONST_ZERO} , // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As
{ 0 , OP_ADD , INV_DST_ALPHA , CONST_ZERO} , // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad) { 0 , OP_ADD , INV_DST_ALPHA , CONST_ZERO} , // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad)
{ 0 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ONE} , // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad { BLEND_C_CLR4 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ONE} , // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad
{ 0 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ZERO} , // 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad { 0 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ZERO} , // 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad
{ BLEND_NO_REC , OP_ADD , INV_CONST_COLOR , CONST_ZERO} , // 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F) { BLEND_NO_REC , OP_ADD , INV_CONST_COLOR , CONST_ZERO} , // 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F)
{ BLEND_ACCU , OP_REV_SUBTRACT , CONST_COLOR , CONST_ONE} , //?2021: (0 - Cs)*F + Cd ==> Cd - Cs*F { BLEND_ACCU , OP_REV_SUBTRACT , CONST_COLOR , CONST_ONE} , //?2021: (0 - Cs)*F + Cd ==> Cd - Cs*F

View File

@ -115,14 +115,15 @@ public:
enum HWBlendFlags enum HWBlendFlags
{ {
// A couple of flag to determine the blending behavior // A couple of flag to determine the blending behavior
BLEND_CD = 0x10, // Output is Cd, hw blend can handle it BLEND_CD = 0x8, // Output is Cd, hw blend can handle it
BLEND_MIX1 = 0x20, // Mix of hw and sw, do Cs*F or Cs*As in shader BLEND_MIX1 = 0x10, // Mix of hw and sw, do Cs*F or Cs*As in shader
BLEND_MIX2 = 0x40, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader BLEND_MIX2 = 0x20, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader
BLEND_MIX3 = 0x80, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader BLEND_MIX3 = 0x40, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader
BLEND_A_MAX = 0x100, // Impossible blending uses coeff bigger than 1 BLEND_A_MAX = 0x80, // Impossible blending uses coeff bigger than 1
BLEND_C_CLR1 = 0x200, // Clear color blending (use directly the destination color as blending factor) BLEND_C_CLR1 = 0x100, // Clear color blending (use directly the destination color as blending factor)
BLEND_C_CLR2_AS = 0x400, // Clear color blending (use directly the destination color as blending factor) BLEND_C_CLR2_AF = 0x200, // Clear color blending (use directly the destination color as blending factor)
BLEND_C_CLR2_AF = 0x800, // Clear color blending (use directly the destination color as blending factor) BLEND_C_CLR3_AS = 0x400, // Clear color blending (use directly the destination color as blending factor)
BLEND_C_CLR4 = 0x800, // Multiply Cs by (255/128) to compensate for wrong Ad/255 value, should be Ad/128
BLEND_NO_REC = 0x1000, // Doesn't require sampling of the RT as a texture BLEND_NO_REC = 0x1000, // Doesn't require sampling of the RT as a texture
BLEND_ACCU = 0x2000, // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds BLEND_ACCU = 0x2000, // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds
}; };
@ -226,7 +227,7 @@ struct alignas(16) GSHWDrawConfig
u32 blend_b : 2; u32 blend_b : 2;
u32 blend_c : 2; u32 blend_c : 2;
u32 blend_d : 2; u32 blend_d : 2;
u32 clr1 : 2; u32 clr_hw : 3;
u32 hdr : 1; u32 hdr : 1;
u32 colclip : 1; u32 colclip : 1;
u32 alpha_clamp : 1; u32 alpha_clamp : 1;
@ -254,7 +255,7 @@ struct alignas(16) GSHWDrawConfig
// Scan mask // Scan mask
u32 scanmsk : 2; u32 scanmsk : 2;
u32 _free2 : 1; //u32 _free2 : 0;
}; };
u64 key; u64 key;

View File

@ -166,7 +166,7 @@ void GSDevice11::SetupPS(PSSelector sel, const GSHWDrawConfig::PSConstantBuffer*
sm.AddMacro("PS_ATST", sel.atst); sm.AddMacro("PS_ATST", sel.atst);
sm.AddMacro("PS_FOG", sel.fog); sm.AddMacro("PS_FOG", sel.fog);
sm.AddMacro("PS_IIP", sel.iip); sm.AddMacro("PS_IIP", sel.iip);
sm.AddMacro("PS_CLR1", sel.clr1); sm.AddMacro("PS_CLR_HW", sel.clr_hw);
sm.AddMacro("PS_FBA", sel.fba); sm.AddMacro("PS_FBA", sel.fba);
sm.AddMacro("PS_FBMASK", sel.fbmask); sm.AddMacro("PS_FBMASK", sel.fbmask);
sm.AddMacro("PS_LTF", sel.ltf); sm.AddMacro("PS_LTF", sel.ltf);

View File

@ -777,16 +777,20 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
{ {
if (blend_flag & BLEND_C_CLR1) if (blend_flag & BLEND_C_CLR1)
{ {
m_conf.ps.clr1 = 1; m_conf.ps.clr_hw = 1;
} }
else if (blend_flag & BLEND_C_CLR2_AF) else if (blend_flag & BLEND_C_CLR2_AF)
{ {
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(ALPHA.FIX) / 128.0f; m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(ALPHA.FIX) / 128.0f;
m_conf.ps.clr1 = 2; m_conf.ps.clr_hw = 2;
} }
else if (blend_flag & BLEND_C_CLR2_AS) else if (blend_flag & BLEND_C_CLR3_AS)
{ {
m_conf.ps.clr1 = 3; m_conf.ps.clr_hw = 3;
}
else if (blend_flag & BLEND_C_CLR4)
{
m_conf.ps.clr_hw = 4;
} }
if (m_conf.ps.dfmt == 1 && ALPHA.C == 1) if (m_conf.ps.dfmt == 1 && ALPHA.C == 1)

View File

@ -1090,7 +1090,7 @@ std::string GSDeviceOGL::GetPSSource(PSSelector sel)
+ format("#define PS_TCC %d\n", sel.tcc) + format("#define PS_TCC %d\n", sel.tcc)
+ format("#define PS_ATST %d\n", sel.atst) + format("#define PS_ATST %d\n", sel.atst)
+ format("#define PS_FOG %d\n", sel.fog) + format("#define PS_FOG %d\n", sel.fog)
+ format("#define PS_CLR1 %d\n", sel.clr1) + format("#define PS_CLR_HW %d\n", sel.clr_hw)
+ format("#define PS_FBA %d\n", sel.fba) + format("#define PS_FBA %d\n", sel.fba)
+ format("#define PS_LTF %d\n", sel.ltf) + format("#define PS_LTF %d\n", sel.ltf)
+ format("#define PS_AUTOMATIC_LOD %d\n", sel.automatic_lod) + format("#define PS_AUTOMATIC_LOD %d\n", sel.automatic_lod)

View File

@ -1781,7 +1781,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(GSHWDrawConfig::PSSelector sel)
AddMacro(ss, "PS_TCC", sel.tcc); AddMacro(ss, "PS_TCC", sel.tcc);
AddMacro(ss, "PS_ATST", sel.atst); AddMacro(ss, "PS_ATST", sel.atst);
AddMacro(ss, "PS_FOG", sel.fog); AddMacro(ss, "PS_FOG", sel.fog);
AddMacro(ss, "PS_CLR1", sel.clr1); AddMacro(ss, "PS_CLR_HW", sel.clr_hw);
AddMacro(ss, "PS_FBA", sel.fba); AddMacro(ss, "PS_FBA", sel.fba);
AddMacro(ss, "PS_LTF", sel.ltf); AddMacro(ss, "PS_LTF", sel.ltf);
AddMacro(ss, "PS_AUTOMATIC_LOD", sel.automatic_lod); AddMacro(ss, "PS_AUTOMATIC_LOD", sel.automatic_lod);