GS/HW: Improve Basic blending

- Don't assume no overlap = free. Reduces a number of barriers.
 - Prefer SW blend for Ad blends, except when it's constant. It's
   impossible to emulate accurately in HW, because 128 is 1, not 255.
 - Prefer SW blend for AFIX > 128 on no-overlap sprites.
This commit is contained in:
Stenzek 2023-07-08 22:09:58 +10:00 committed by Connor McLaughlin
parent bf96ceeacc
commit fd1b1d0c24
2 changed files with 51 additions and 16 deletions
pcsx2/GS/Renderers/HW

View File

@ -3345,7 +3345,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
return true;
}
void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass)
void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass)
{
// AA1: Don't enable blending on AA1, not yet implemented on hardware mode,
// it requires coverage sample so it's safer to turn it off instead.
@ -3377,6 +3377,13 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
m_conf.ps.blend_c = ALPHA.C;
m_conf.ps.blend_d = ALPHA.D;
#ifdef ENABLE_OGL_DEBUG
static constexpr const char* col[3] = {"Cs", "Cd", "0"};
static constexpr const char* alpha[3] = {"As", "Ad", "Af"};
GL_INS("EmulateBlending(): (%s - %s) * %s + %s", col[ALPHA.A], col[ALPHA.B], alpha[ALPHA.C], col[ALPHA.D]);
GL_INS("Draw AlphaMinMax: %d-%d, RT AlphaMinMax: %d-%d", GetAlphaMinMax().min, GetAlphaMinMax().max, rt_alpha_min, rt_alpha_max);
#endif
// When AA1 is enabled and Alpha Blending is disabled, alpha blending done with coverage instead of alpha.
// We use a COV value of 128 (full coverage) in triangles (except the edge geometry, which we can't do easily).
if (IsCoverageAlpha())
@ -3396,10 +3403,12 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
const bool alpha_c0_one = (m_conf.ps.blend_c == 0 && (GetAlphaMinMax().min == 128) && (GetAlphaMinMax().max == 128));
const bool alpha_c0_high_min_one = (m_conf.ps.blend_c == 0 && GetAlphaMinMax().min > 128);
const bool alpha_c0_high_max_one = (m_conf.ps.blend_c == 0 && GetAlphaMinMax().max > 128);
const bool alpha_c1_zero = (m_conf.ps.blend_c == 1 && rt_alpha_min == 0 && rt_alpha_max == 0);
const bool alpha_c1_one = (m_conf.ps.blend_c == 1 && rt_alpha_min == 128 && rt_alpha_max == 128);
const bool alpha_c2_zero = (m_conf.ps.blend_c == 2 && AFIX == 0u);
const bool alpha_c2_one = (m_conf.ps.blend_c == 2 && AFIX == 128u);
const bool alpha_c2_high_one = (m_conf.ps.blend_c == 2 && AFIX > 128u);
const bool alpha_one = alpha_c0_one || alpha_c2_one;
const bool alpha_one = alpha_c0_one || alpha_c1_one || alpha_c2_one;
// Optimize blending equations, must be done before index calculation
if ((m_conf.ps.blend_a == m_conf.ps.blend_b) || ((m_conf.ps.blend_b == m_conf.ps.blend_d) && alpha_one))
@ -3418,7 +3427,7 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
m_conf.ps.blend_b = 0;
m_conf.ps.blend_c = 0;
}
else if (alpha_c0_zero || alpha_c2_zero)
else if (alpha_c0_zero || alpha_c1_zero || alpha_c2_zero)
{
// C == 0.0f
// (A - B) * C, result will be 0.0f so set A B to Cs
@ -3444,7 +3453,8 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
// Ad cases, alpha write is masked, one barrier is enough, for d3d11 read the fb
// Replace Ad with As, blend flags will be used from As since we are chaging the blend_index value.
// Must be done before index calculation, after blending equation optimizations
bool blend_ad_alpha_masked = (m_conf.ps.blend_c == 1) && (m_cached_ctx.FRAME.FBMSK & 0xFF000000) == 0xFF000000;
const bool blend_ad = m_conf.ps.blend_c == 1;
bool blend_ad_alpha_masked = blend_ad && (m_cached_ctx.FRAME.FBMSK & 0xFF000000) == 0xFF000000;
if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic) || (COLCLAMP.CLAMP == 0))
&& g_gs_device->Features().texture_barrier && blend_ad_alpha_masked)
m_conf.ps.blend_c = 0;
@ -3506,13 +3516,15 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
const bool prefer_sw_blend = m_conf.require_full_barrier || (one_barrier && m_prim_overlap == PRIM_OVERLAP_NO);
// SW Blend is (nearly) free. Let's use it.
const bool no_prim_overlap = features.framebuffer_fetch ? (m_vt.m_primclass == GS_SPRITE_CLASS) : (m_prim_overlap == PRIM_OVERLAP_NO);
const bool no_prim_overlap = (m_prim_overlap == PRIM_OVERLAP_NO);
const bool impossible_or_free_blend = (blend_flag & BLEND_A_MAX) // Impossible blending
|| blend_non_recursive // Free sw blending, doesn't require barriers or reading fb
|| accumulation_blend // Mix of hw/sw blending
|| no_prim_overlap // Blend can be done in a single draw
|| (m_conf.require_full_barrier) // Another effect (for example fbmask) already requires a full barrier
|| (one_barrier && features.framebuffer_fetch); // On fbfetch, one barrier is like full barrier
|| (m_conf.require_full_barrier) // Another effect (for example fbmask) already requires a full barrier
// Blend can be done in a single draw, and we already need a barrier
// On fbfetch, one barrier is like full barrier
|| (one_barrier && (no_prim_overlap || features.framebuffer_fetch))
|| ((alpha_c2_high_one || alpha_c0_high_max_one) && no_prim_overlap);
switch (GSConfig.AccurateBlendingUnit)
{
@ -3540,6 +3552,8 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
// If prims don't overlap prefer full sw blend on blend_ad_alpha_masked cases.
accumulation_blend &= !(prefer_sw_blend || (blend_ad_alpha_masked && m_prim_overlap == PRIM_OVERLAP_NO));
sw_blending |= impossible_or_free_blend;
// Ad blends are completely wrong without sw blend (Ad is 0.5 not 1 for 128). We can spare a barrier for it.
sw_blending |= blend_ad && no_prim_overlap;
// Try to do hw blend for clr2 case.
sw_blending &= !clr_blend1_2;
// Do not run BLEND MIX if sw blending is already present, it's less accurate
@ -4607,14 +4621,12 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
// Upscaling hack to avoid various line/grid issues
MergeSprite(tex);
const GSDevice::FeatureSupport features(g_gs_device->Features());
if (!features.framebuffer_fetch)
m_prim_overlap = PrimitiveOverlap();
else
m_prim_overlap = PRIM_OVERLAP_UNKNOW;
m_prim_overlap = PrimitiveOverlap();
EmulateTextureShuffleAndFbmask(rt);
const GSDevice::FeatureSupport features = g_gs_device->Features();
// DATE: selection of the algorithm. Must be done before blending because GL42 is not compatible with blending
if (DATE)
{
@ -4711,8 +4723,20 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
}
// Blend
int blend_alpha_min = 0, blend_alpha_max = 255;
if (rt)
{
blend_alpha_min = rt->m_alpha_min;
blend_alpha_max = rt->m_alpha_max;
const bool is_24_bit = (GSLocalMemory::m_psm[rt->m_TEX0.PSM].trbpp == 24);
if (is_24_bit)
{
// C24/Z24 - alpha is 1.
blend_alpha_min = 128;
blend_alpha_max = 128;
}
if (!m_channel_shuffle && !m_texture_shuffle)
{
const int fba_value = m_prev_env.CTXT[m_prev_env.PRIM.CTXT].FBA.FBA * 128;
@ -4734,7 +4758,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
rt->m_alpha_max |= std::max(GetAlphaMinMax().max | fba_value, rt->m_alpha_max);
rt->m_alpha_min = std::min(GetAlphaMinMax().min | fba_value, rt->m_alpha_min);
}
else
else if (!is_24_bit)
{
// If both are zero then we probably don't know what the alpha is.
if (rt->m_alpha_max == 0 && rt->m_alpha_min == 0)
@ -4749,6 +4773,16 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
rt->m_alpha_max = 255;
rt->m_alpha_min = 0;
}
GL_INS("RT Alpha Range: %d-%d => %d-%d", blend_alpha_min, blend_alpha_max, rt->m_alpha_min, rt->m_alpha_max);
// If there's no overlap, the values in the RT before FB write will be the old values.
if (m_prim_overlap != PRIM_OVERLAP_NO)
{
// Otherwise, it may be a mix of the old/new values.
blend_alpha_min = std::min(blend_alpha_min, rt->m_alpha_min);
blend_alpha_max = std::max(blend_alpha_max, rt->m_alpha_max);
}
}
// Not gonna spend too much time with this, it's not likely to be used much, can't be less accurate than it was.
@ -4756,12 +4790,13 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
ds->m_alpha_max = std::max(ds->m_alpha_max, static_cast<int>(m_vt.m_max.p.z) >> 24);
ds->m_alpha_min = std::min(ds->m_alpha_min, static_cast<int>(m_vt.m_min.p.z) >> 24);
GL_INS("New DS Alpha Range: %d-%d", ds->m_alpha_min, ds->m_alpha_max);
}
bool blending_alpha_pass = false;
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && (m_conf.colormask.wrgba & 0x7))
{
EmulateBlending(DATE_PRIMID, DATE_BARRIER, blending_alpha_pass);
EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, blending_alpha_pass);
}
else
{

View File

@ -86,7 +86,7 @@ private:
void SetupIA(float target_scale, float sx, float sy);
void EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt);
bool EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only);
void EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass);
void EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass);
void EmulateTextureSampler(const GSTextureCache::Target* rt, const GSTextureCache::Target* ds,
GSTextureCache::Source* tex, const TextureMinMaxResult& tmm, GSTexture*& src_copy);