From d0b34dc04cd1b9388e69fdce084187f0ac26521a Mon Sep 17 00:00:00 2001 From: lightningterror <18107717+lightningterror@users.noreply.github.com> Date: Tue, 18 Jan 2022 14:30:06 +0100 Subject: [PATCH] GS-hw: Implement BLEND CD. Allow hw blend only to do the math, output will be Cd, no need to run costly barriers, will help Vulkan/Opengl. Disable colclip on BLEND CD as there is no wrapping, will help all renderers, especially on hdr cases. --- pcsx2/GS/Renderers/Common/GSDevice.cpp | 18 ++++----- pcsx2/GS/Renderers/Common/GSDevice.h | 1 + pcsx2/GS/Renderers/HW/GSRendererNew.cpp | 49 +++++++++++++++++-------- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index b317f79366..d12c8ea4c6 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -512,13 +512,13 @@ u16 GSDevice::GetBlendFlags(size_t index) { return m_blendMap[index].flags; } std::array GSDevice::m_blendMap = {{ { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0000: (Cs - Cs)*As + Cs ==> Cs - { 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 0001: (Cs - Cs)*As + Cd ==> Cd + { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 0001: (Cs - Cs)*As + Cd ==> Cd { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 0002: (Cs - Cs)*As + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0010: (Cs - Cs)*Ad + Cs ==> Cs - { 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 0011: (Cs - Cs)*Ad + Cd ==> Cd + { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 0011: (Cs - Cs)*Ad + Cd ==> Cd { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 0012: (Cs - Cs)*Ad + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0020: (Cs - Cs)*F + Cs ==> Cs - { 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 0021: (Cs - Cs)*F + Cd ==> Cd + { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 0021: (Cs - Cs)*F + Cd ==> Cd { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 0022: (Cs - Cs)*F + 0 ==> 0 { BLEND_A_MAX | BLEND_MIX2 , OP_SUBTRACT , CONST_ONE , SRC1_ALPHA} , //*0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As { BLEND_MIX1 , OP_ADD , SRC1_ALPHA , INV_SRC1_ALPHA} , // 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As) @@ -548,13 +548,13 @@ std::array GSDevice::m_blendMap = { BLEND_A_MAX | BLEND_MIX1 , OP_REV_SUBTRACT , CONST_COLOR , CONST_ONE} , //*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F { BLEND_MIX1 , OP_REV_SUBTRACT , CONST_COLOR , CONST_COLOR} , // 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 1100: (Cd - Cd)*As + Cs ==> Cs - { 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 1101: (Cd - Cd)*As + Cd ==> Cd + { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 1101: (Cd - Cd)*As + Cd ==> Cd { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1102: (Cd - Cd)*As + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 1110: (Cd - Cd)*Ad + Cs ==> Cs - { 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 1111: (Cd - Cd)*Ad + Cd ==> Cd + { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 1111: (Cd - Cd)*Ad + Cd ==> Cd { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1112: (Cd - Cd)*Ad + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 1120: (Cd - Cd)*F + Cs ==> Cs - { 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 1121: (Cd - Cd)*F + Cd ==> Cd + { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 1121: (Cd - Cd)*F + Cd ==> Cd { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 1122: (Cd - Cd)*F + 0 ==> 0 { 0 , OP_ADD , CONST_ONE , SRC1_ALPHA} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As { BLEND_C_CLR , OP_ADD , DST_COLOR , SRC1_ALPHA} , //#1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) // ffxii main menu background @@ -584,13 +584,13 @@ std::array GSDevice::m_blendMap = { 0 , OP_ADD , CONST_ZERO , INV_CONST_COLOR} , // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F) { 0 , OP_SUBTRACT , CONST_ONE , CONST_COLOR} , // 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 2200: (0 - 0)*As + Cs ==> Cs - { 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 2201: (0 - 0)*As + Cd ==> Cd + { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 2201: (0 - 0)*As + Cd ==> Cd { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 2202: (0 - 0)*As + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 2210: (0 - 0)*Ad + Cs ==> Cs - { 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 2211: (0 - 0)*Ad + Cd ==> Cd + { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 2211: (0 - 0)*Ad + Cd ==> Cd { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 2212: (0 - 0)*Ad + 0 ==> 0 { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 2220: (0 - 0)*F + Cs ==> Cs - { 0 , OP_ADD , CONST_ZERO , CONST_ONE} , // 2221: (0 - 0)*F + Cd ==> Cd + { BLEND_CD , OP_ADD , CONST_ZERO , CONST_ONE} , // 2221: (0 - 0)*F + Cd ==> Cd { BLEND_NO_REC , OP_ADD , CONST_ZERO , CONST_ZERO} , // 2222: (0 - 0)*F + 0 ==> 0 { 0 , OP_ADD , SRC_ALPHA , INV_SRC_ALPHA} , // extra for merge operation }}; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 7d70b32c92..40cdd53209 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -115,6 +115,7 @@ public: enum HWBlendFlags { // A couple of flag to determine the blending behavior + BLEND_CD = 0x10, // Output is Cd, hw blend can handle it BLEND_MIX1 = 0x20, // Mix of hw and sw, do Cs*F or Cs*As in shader BLEND_MIX2 = 0x40, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader BLEND_MIX3 = 0x80, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader diff --git a/pcsx2/GS/Renderers/HW/GSRendererNew.cpp b/pcsx2/GS/Renderers/HW/GSRendererNew.cpp index 537d2e492f..e51b317685 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererNew.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererNew.cpp @@ -532,6 +532,9 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER) u8 blend_index = u8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D); const int blend_flag = g_gs_device->GetBlendFlags(blend_index); + // HW blend can handle Cd output. + bool color_dest_blend = !!(blend_flag & BLEND_CD); + // Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd bool accumulation_blend = !!(blend_flag & BLEND_ACCU); @@ -544,18 +547,18 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER) const bool blend_mix3 = !!(blend_flag & BLEND_MIX3); bool blend_mix = (blend_mix1 || blend_mix2 || blend_mix3); - // SW Blend is (nearly) free. Let's use it. - const bool impossible_or_free_blend = (blend_flag & BLEND_A_MAX) // Impossible blending - || blend_non_recursive // Free sw blending, doesn't require barriers or reading fb - || accumulation_blend // Mix of hw/sw blending - || (m_prim_overlap == PRIM_OVERLAP_NO) // Blend can be done in a single draw - || (m_conf.require_full_barrier); // Another effect (for example fbmask) already requires a full barrier - // Warning no break on purpose // Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks. bool sw_blending = false; if (g_gs_device->Features().texture_barrier) { + // SW Blend is (nearly) free. Let's use it. + const bool impossible_or_free_blend = (blend_flag & BLEND_A_MAX) // Impossible blending + || blend_non_recursive // Free sw blending, doesn't require barriers or reading fb + || accumulation_blend // Mix of hw/sw blending + || (m_prim_overlap == PRIM_OVERLAP_NO) // Blend can be done in a single draw + || (m_conf.require_full_barrier); // Another effect (for example fbmask) already requires a full barrier + switch (GSConfig.AccurateBlendingUnit) { case AccBlendLevel::Ultra: @@ -606,21 +609,28 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER) free_colclip = blend_non_recursive; GL_DBG("COLCLIP Info (Blending: %d/%d/%d/%d, SW FBMASK: %d, OVERLAP: %d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, sw_fbmask_colclip, m_prim_overlap); - if (free_colclip) + + if (color_dest_blend) + { + // No overflow, disable colclip. + GL_INS("COLCLIP mode DISABLED"); + } + else if (free_colclip) { // The fastest algo that requires a single pass GL_INS("COLCLIP Free mode ENABLED"); - m_conf.ps.colclip = 1; - sw_blending = true; - accumulation_blend = false; // disable the HDR algo - blend_mix = false; + m_conf.ps.colclip = 1; + sw_blending = true; + // Disable the HDR algo + accumulation_blend = false; + blend_mix = false; } else if (accumulation_blend || blend_mix) { // A fast algo that requires 2 passes GL_INS("COLCLIP Fast HDR mode ENABLED"); m_conf.ps.hdr = 1; - sw_blending = true; // Enable sw blending for the HDR algo + sw_blending = true; // Enable sw blending for the HDR algo } else if (sw_blending && g_gs_device->Features().texture_barrier) { @@ -646,9 +656,10 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER) if (g_gs_device->Features().texture_barrier) { // Disable hw/sw blend and do pure sw blend with reading the framebuffer. + color_dest_blend = false; accumulation_blend = false; - blend_mix = false; - m_conf.ps.pabe = 1; + blend_mix = false; + m_conf.ps.pabe = 1; } else { @@ -668,7 +679,13 @@ void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER) GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (drawlist %d) (sw %d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, m_drawlist.size(), sw_blending); #endif - if (sw_blending) + if (color_dest_blend) + { + // Blend output will be Cd, no need to set Af. + m_conf.blend = {blend_index, 0, ALPHA.C == 2, false, false}; + sw_blending = false; // DATE_PRIMID + } + else if (sw_blending) { m_conf.ps.blend_a = ALPHA.A; m_conf.ps.blend_b = ALPHA.B;