GS-hw: Implement BLEND CD.

Allow hw blend only to do the math, output will be Cd, no need to run costly barriers, will help Vulkan/Opengl.

Disable colclip on BLEND CD as there is no wrapping, will help all renderers, especially on hdr cases.
This commit is contained in:
lightningterror 2022-01-18 14:30:06 +01:00
parent 6a8cffe615
commit d0b34dc04c
3 changed files with 43 additions and 25 deletions

View File

@ -512,13 +512,13 @@ u16 GSDevice::GetBlendFlags(size_t index) { return m_blendMap[index].flags; }
std::array<HWBlend, 3*3*3*3 + 1> GSDevice::m_blendMap = std::array<HWBlend, 3*3*3*3 + 1> GSDevice::m_blendMap =
{{ {{
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0000: (Cs - Cs)*As + Cs ==> Cs { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0000: (Cs - Cs)*As + Cs ==> Cs
{ 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 0001: (Cs - Cs)*As + Cd ==> Cd { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 0001: (Cs - Cs)*As + Cd ==> Cd
{ BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 0002: (Cs - Cs)*As + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 0002: (Cs - Cs)*As + 0 ==> 0
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0010: (Cs - Cs)*Ad + Cs ==> Cs { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0010: (Cs - Cs)*Ad + Cs ==> Cs
{ 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 0011: (Cs - Cs)*Ad + Cd ==> Cd { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 0011: (Cs - Cs)*Ad + Cd ==> Cd
{ BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 0012: (Cs - Cs)*Ad + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 0012: (Cs - Cs)*Ad + 0 ==> 0
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0020: (Cs - Cs)*F + Cs ==> Cs { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0020: (Cs - Cs)*F + Cs ==> Cs
{ 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 0021: (Cs - Cs)*F + Cd ==> Cd { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 0021: (Cs - Cs)*F + Cd ==> Cd
{ BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 0022: (Cs - Cs)*F + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 0022: (Cs - Cs)*F + 0 ==> 0
{ BLEND_A_MAX | BLEND_MIX2 , OP_SUBTRACT , CONST_ONE , SRC1_ALPHA} , //*0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As { BLEND_A_MAX | BLEND_MIX2 , OP_SUBTRACT , CONST_ONE , SRC1_ALPHA} , //*0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As
{ BLEND_MIX1 , OP_ADD , SRC1_ALPHA , INV_SRC1_ALPHA} , // 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As) { BLEND_MIX1 , OP_ADD , SRC1_ALPHA , INV_SRC1_ALPHA} , // 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As)
@ -548,13 +548,13 @@ std::array<HWBlend, 3*3*3*3 + 1> GSDevice::m_blendMap =
{ BLEND_A_MAX | BLEND_MIX1 , OP_REV_SUBTRACT , CONST_COLOR , CONST_ONE} , //*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F { BLEND_A_MAX | BLEND_MIX1 , OP_REV_SUBTRACT , CONST_COLOR , CONST_ONE} , //*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F
{ BLEND_MIX1 , OP_REV_SUBTRACT , CONST_COLOR , CONST_COLOR} , // 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F { BLEND_MIX1 , OP_REV_SUBTRACT , CONST_COLOR , CONST_COLOR} , // 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 1100: (Cd - Cd)*As + Cs ==> Cs { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 1100: (Cd - Cd)*As + Cs ==> Cs
{ 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 1101: (Cd - Cd)*As + Cd ==> Cd { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 1101: (Cd - Cd)*As + Cd ==> Cd
{ BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1102: (Cd - Cd)*As + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1102: (Cd - Cd)*As + 0 ==> 0
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 1110: (Cd - Cd)*Ad + Cs ==> Cs { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 1110: (Cd - Cd)*Ad + Cs ==> Cs
{ 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 1111: (Cd - Cd)*Ad + Cd ==> Cd { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 1111: (Cd - Cd)*Ad + Cd ==> Cd
{ BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1112: (Cd - Cd)*Ad + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1112: (Cd - Cd)*Ad + 0 ==> 0
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 1120: (Cd - Cd)*F + Cs ==> Cs { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 1120: (Cd - Cd)*F + Cs ==> Cs
{ 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 1121: (Cd - Cd)*F + Cd ==> Cd { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 1121: (Cd - Cd)*F + Cd ==> Cd
{ BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1122: (Cd - Cd)*F + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1122: (Cd - Cd)*F + 0 ==> 0
{ 0 , OP_ADD , CONST_ONE , SRC1_ALPHA} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As { 0 , OP_ADD , CONST_ONE , SRC1_ALPHA} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As
{ BLEND_C_CLR , OP_ADD , DST_COLOR , SRC1_ALPHA} , //#1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) // ffxii main menu background { BLEND_C_CLR , OP_ADD , DST_COLOR , SRC1_ALPHA} , //#1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) // ffxii main menu background
@ -584,13 +584,13 @@ std::array<HWBlend, 3*3*3*3 + 1> GSDevice::m_blendMap =
{ 0 , OP_ADD , CONST_ZERO , INV_CONST_COLOR} , // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F) { 0 , OP_ADD , CONST_ZERO , INV_CONST_COLOR} , // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F)
{ 0 , OP_SUBTRACT , CONST_ONE , CONST_COLOR} , // 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F { 0 , OP_SUBTRACT , CONST_ONE , CONST_COLOR} , // 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 2200: (0 - 0)*As + Cs ==> Cs { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 2200: (0 - 0)*As + Cs ==> Cs
{ 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 2201: (0 - 0)*As + Cd ==> Cd { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 2201: (0 - 0)*As + Cd ==> Cd
{ BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 2202: (0 - 0)*As + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 2202: (0 - 0)*As + 0 ==> 0
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 2210: (0 - 0)*Ad + Cs ==> Cs { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 2210: (0 - 0)*Ad + Cs ==> Cs
{ 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 2211: (0 - 0)*Ad + Cd ==> Cd { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 2211: (0 - 0)*Ad + Cd ==> Cd
{ BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 2212: (0 - 0)*Ad + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 2212: (0 - 0)*Ad + 0 ==> 0
{ BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 2220: (0 - 0)*F + Cs ==> Cs { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 2220: (0 - 0)*F + Cs ==> Cs
{ 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 2221: (0 - 0)*F + Cd ==> Cd { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 2221: (0 - 0)*F + Cd ==> Cd
{ BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 2222: (0 - 0)*F + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 2222: (0 - 0)*F + 0 ==> 0
{ 0 , OP_ADD , SRC_ALPHA , INV_SRC_ALPHA} , // extra for merge operation { 0 , OP_ADD , SRC_ALPHA , INV_SRC_ALPHA} , // extra for merge operation
}}; }};

View File

@ -115,6 +115,7 @@ public:
enum HWBlendFlags enum HWBlendFlags
{ {
// A couple of flag to determine the blending behavior // A couple of flag to determine the blending behavior
BLEND_CD = 0x10, // Output is Cd, hw blend can handle it
BLEND_MIX1 = 0x20, // Mix of hw and sw, do Cs*F or Cs*As in shader BLEND_MIX1 = 0x20, // Mix of hw and sw, do Cs*F or Cs*As in shader
BLEND_MIX2 = 0x40, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader BLEND_MIX2 = 0x40, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader
BLEND_MIX3 = 0x80, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader BLEND_MIX3 = 0x80, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader

View File

@ -532,6 +532,9 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
u8 blend_index = u8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D); u8 blend_index = u8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D);
const int blend_flag = g_gs_device->GetBlendFlags(blend_index); const int blend_flag = g_gs_device->GetBlendFlags(blend_index);
// HW blend can handle Cd output.
bool color_dest_blend = !!(blend_flag & BLEND_CD);
// Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd // Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd
bool accumulation_blend = !!(blend_flag & BLEND_ACCU); bool accumulation_blend = !!(blend_flag & BLEND_ACCU);
@ -544,18 +547,18 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
const bool blend_mix3 = !!(blend_flag & BLEND_MIX3); const bool blend_mix3 = !!(blend_flag & BLEND_MIX3);
bool blend_mix = (blend_mix1 || blend_mix2 || blend_mix3); bool blend_mix = (blend_mix1 || blend_mix2 || blend_mix3);
// SW Blend is (nearly) free. Let's use it.
const bool impossible_or_free_blend = (blend_flag & BLEND_A_MAX) // Impossible blending
|| blend_non_recursive // Free sw blending, doesn't require barriers or reading fb
|| accumulation_blend // Mix of hw/sw blending
|| (m_prim_overlap == PRIM_OVERLAP_NO) // Blend can be done in a single draw
|| (m_conf.require_full_barrier); // Another effect (for example fbmask) already requires a full barrier
// Warning no break on purpose // Warning no break on purpose
// Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks. // Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks.
bool sw_blending = false; bool sw_blending = false;
if (g_gs_device->Features().texture_barrier) if (g_gs_device->Features().texture_barrier)
{ {
// SW Blend is (nearly) free. Let's use it.
const bool impossible_or_free_blend = (blend_flag & BLEND_A_MAX) // Impossible blending
|| blend_non_recursive // Free sw blending, doesn't require barriers or reading fb
|| accumulation_blend // Mix of hw/sw blending
|| (m_prim_overlap == PRIM_OVERLAP_NO) // Blend can be done in a single draw
|| (m_conf.require_full_barrier); // Another effect (for example fbmask) already requires a full barrier
switch (GSConfig.AccurateBlendingUnit) switch (GSConfig.AccurateBlendingUnit)
{ {
case AccBlendLevel::Ultra: case AccBlendLevel::Ultra:
@ -606,21 +609,28 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
free_colclip = blend_non_recursive; free_colclip = blend_non_recursive;
GL_DBG("COLCLIP Info (Blending: %d/%d/%d/%d, SW FBMASK: %d, OVERLAP: %d)", GL_DBG("COLCLIP Info (Blending: %d/%d/%d/%d, SW FBMASK: %d, OVERLAP: %d)",
ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, sw_fbmask_colclip, m_prim_overlap); ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, sw_fbmask_colclip, m_prim_overlap);
if (free_colclip)
if (color_dest_blend)
{
// No overflow, disable colclip.
GL_INS("COLCLIP mode DISABLED");
}
else if (free_colclip)
{ {
// The fastest algo that requires a single pass // The fastest algo that requires a single pass
GL_INS("COLCLIP Free mode ENABLED"); GL_INS("COLCLIP Free mode ENABLED");
m_conf.ps.colclip = 1; m_conf.ps.colclip = 1;
sw_blending = true; sw_blending = true;
accumulation_blend = false; // disable the HDR algo // Disable the HDR algo
blend_mix = false; accumulation_blend = false;
blend_mix = false;
} }
else if (accumulation_blend || blend_mix) else if (accumulation_blend || blend_mix)
{ {
// A fast algo that requires 2 passes // A fast algo that requires 2 passes
GL_INS("COLCLIP Fast HDR mode ENABLED"); GL_INS("COLCLIP Fast HDR mode ENABLED");
m_conf.ps.hdr = 1; m_conf.ps.hdr = 1;
sw_blending = true; // Enable sw blending for the HDR algo sw_blending = true; // Enable sw blending for the HDR algo
} }
else if (sw_blending && g_gs_device->Features().texture_barrier) else if (sw_blending && g_gs_device->Features().texture_barrier)
{ {
@ -646,9 +656,10 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
if (g_gs_device->Features().texture_barrier) if (g_gs_device->Features().texture_barrier)
{ {
// Disable hw/sw blend and do pure sw blend with reading the framebuffer. // Disable hw/sw blend and do pure sw blend with reading the framebuffer.
color_dest_blend = false;
accumulation_blend = false; accumulation_blend = false;
blend_mix = false; blend_mix = false;
m_conf.ps.pabe = 1; m_conf.ps.pabe = 1;
} }
else else
{ {
@ -668,7 +679,13 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (drawlist %d) (sw %d)", GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (drawlist %d) (sw %d)",
ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, m_drawlist.size(), sw_blending); ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, m_drawlist.size(), sw_blending);
#endif #endif
if (sw_blending) if (color_dest_blend)
{
// Blend output will be Cd, no need to set Af.
m_conf.blend = {blend_index, 0, ALPHA.C == 2, false, false};
sw_blending = false; // DATE_PRIMID
}
else if (sw_blending)
{ {
m_conf.ps.blend_a = ALPHA.A; m_conf.ps.blend_a = ALPHA.A;
m_conf.ps.blend_b = ALPHA.B; m_conf.ps.blend_b = ALPHA.B;