GS/HW: Fix up some RTA behaviour and reduce copies

This commit is contained in:
refractionpcsx2 2024-03-31 00:48:28 +00:00
parent 8a73f98b1f
commit effdfd5a22
5 changed files with 55 additions and 36 deletions

View File

@ -139,7 +139,7 @@ PS_OUTPUT ps_rta_correction(PS_INPUT input)
{ {
PS_OUTPUT output; PS_OUTPUT output;
float4 value = sample_c(input.t); float4 value = sample_c(input.t);
output.c = float4(value.rgb, value.a / (127.5f / 255.0f)); output.c = float4(value.rgb, value.a / (128.25f / 255.0f));
return output; return output;
} }

View File

@ -309,7 +309,7 @@ void ps_datm0_rta_correction()
void ps_rta_correction() void ps_rta_correction()
{ {
vec4 value = sample_c(); vec4 value = sample_c();
SV_Target0 = vec4(value.rgb, value.a / (127.5f / 255.0f)); SV_Target0 = vec4(value.rgb, value.a / (128.25f / 255.0f));
} }
#endif #endif

View File

@ -114,7 +114,7 @@ void ps_datm0_rta_correction()
void ps_rta_correction() void ps_rta_correction()
{ {
vec4 value = sample_c(v_tex); vec4 value = sample_c(v_tex);
o_col0 = vec4(value.rgb, value.a / (127.5f / 255.0f)); o_col0 = vec4(value.rgb, value.a / (128.25f / 255.0f));
} }
#endif #endif

View File

@ -4369,9 +4369,19 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
const bool rta_correction = m_can_correct_alpha && !blend_ad_alpha_masked && m_conf.ps.blend_c == 1 && !(blend_flag & BLEND_A_MAX); const bool rta_correction = m_can_correct_alpha && !blend_ad_alpha_masked && m_conf.ps.blend_c == 1 && !(blend_flag & BLEND_A_MAX);
if (rta_correction) if (rta_correction)
{ {
rt->RTACorrect(); const bool afail_always_fb_alpha = m_cached_ctx.TEST.AFAIL == AFAIL_FB_ONLY || (m_cached_ctx.TEST.AFAIL == AFAIL_RGB_ONLY && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp != 32);
const bool always_passing_alpha = !m_cached_ctx.TEST.ATE || afail_always_fb_alpha || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_ALWAYS);
const bool full_cover = (rt->m_valid.rintersect(m_r).eq(rt->m_valid) && PrimitiveCoversWithoutGaps() && !(DATE_PRIMID || DATE_BARRIER || !always_passing_alpha || (m_cached_ctx.TEST.ZTE && m_cached_ctx.TEST.ZTST != ZTST_ALWAYS)) || m_channel_shuffle);
if (!full_cover)
{
rt->RTACorrect();
m_conf.rt = rt->m_texture;
}
else
rt->m_rt_alpha_scale = true;
m_conf.ps.rta_correction = rt->m_rt_alpha_scale; m_conf.ps.rta_correction = rt->m_rt_alpha_scale;
m_conf.rt = rt->m_texture;
} }
// Care for hw blend value, 6 is for hw/sw, sw blending used. // Care for hw blend value, 6 is for hw/sw, sw blending used.
@ -4452,7 +4462,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt,
float scale = tex->GetScale(); float scale = tex->GetScale();
HandleTextureHazards(rt, ds, tex, tmm, source_region, target_region, unscaled_size, scale, src_copy); HandleTextureHazards(rt, ds, tex, tmm, source_region, target_region, unscaled_size, scale, src_copy);
if (tex->m_target && tex->m_from_target && tex->m_target_direct && tex->m_from_target->m_rt_alpha_scale) if ((m_conf.ps.tex_is_fb && rt->m_rt_alpha_scale) || (tex->m_target && tex->m_from_target && tex->m_target_direct && tex->m_from_target->m_rt_alpha_scale))
m_conf.ps.rta_source_correction = 1; m_conf.ps.rta_source_correction = 1;
// Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth. // Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth.
@ -5179,10 +5189,14 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
// Blend // Blend
int blend_alpha_min = 0, blend_alpha_max = 255; int blend_alpha_min = 0, blend_alpha_max = 255;
int rt_new_alpha_min = 0, rt_new_alpha_max = 255;
if (rt) if (rt)
{ {
blend_alpha_min = rt->m_alpha_min; rt_new_alpha_min = rt->m_alpha_min;
blend_alpha_max = rt->m_alpha_max; rt_new_alpha_max = rt->m_alpha_max;
blend_alpha_min = rt_new_alpha_min;
blend_alpha_max = rt_new_alpha_max;
const bool is_24_bit = (GSLocalMemory::m_psm[rt->m_TEX0.PSM].trbpp == 24); const bool is_24_bit = (GSLocalMemory::m_psm[rt->m_TEX0.PSM].trbpp == 24);
// On DX FBMask emulation can be missing on lower blend levels, so we'll do whatever the API does. // On DX FBMask emulation can be missing on lower blend levels, so we'll do whatever the API does.
@ -5210,26 +5224,26 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{ {
if (full_cover) if (full_cover)
{ {
rt->m_alpha_max = s_alpha_max; rt_new_alpha_max = s_alpha_max;
rt->m_alpha_min = s_alpha_min; rt_new_alpha_min = s_alpha_min;
} }
else else
{ {
rt->m_alpha_max = std::max(s_alpha_max, rt->m_alpha_max); rt_new_alpha_max = std::max(s_alpha_max, rt_new_alpha_max);
rt->m_alpha_min = std::min(s_alpha_min, rt->m_alpha_min); rt_new_alpha_min = std::min(s_alpha_min, rt_new_alpha_min);
} }
} }
else if ((fb_mask & alpha_mask) != alpha_mask) // We can't be sure of the alpha if it's partially masked. else if ((fb_mask & alpha_mask) != alpha_mask) // We can't be sure of the alpha if it's partially masked.
{ {
// Any number of bits could be set, so let's be paranoid about it // Any number of bits could be set, so let's be paranoid about it
const u32 new_max_alpha = (s_alpha_max != s_alpha_min) ? (std::min(s_alpha_max, ((1 << (32 - std::countl_zero(static_cast<u32>(s_alpha_max)))) - 1)) & ~fb_mask) : (s_alpha_max & ~fb_mask); const u32 new_max_alpha = (s_alpha_max != s_alpha_min) ? (std::min(s_alpha_max, ((1 << (32 - std::countl_zero(static_cast<u32>(s_alpha_max)))) - 1)) & ~fb_mask) : (s_alpha_max & ~fb_mask);
const u32 curr_max = (rt->m_alpha_max != rt->m_alpha_min && rt->m_alpha_range) ? (((1 << (32 - std::countl_zero(static_cast<u32>(rt->m_alpha_max)))) - 1) & fb_mask) : ((rt->m_alpha_max | rt->m_alpha_min) & fb_mask); const u32 curr_max = (rt_new_alpha_max != rt_new_alpha_min && rt->m_alpha_range) ? (((1 << (32 - std::countl_zero(static_cast<u32>(rt_new_alpha_max)))) - 1) & fb_mask) : ((rt_new_alpha_max | rt_new_alpha_min) & fb_mask);
if (full_cover) if (full_cover)
rt->m_alpha_max = new_max_alpha | curr_max; rt_new_alpha_max = new_max_alpha | curr_max;
else else
rt->m_alpha_max = std::max(static_cast<int>(new_max_alpha | curr_max), rt->m_alpha_max); rt_new_alpha_max = std::max(static_cast<int>(new_max_alpha | curr_max), rt_new_alpha_max);
rt->m_alpha_min = std::min(s_alpha_min, rt->m_alpha_min); rt_new_alpha_min = std::min(s_alpha_min, rt_new_alpha_min);
} }
if (full_cover && (fb_mask & alpha_mask) == 0) if (full_cover && (fb_mask & alpha_mask) == 0)
rt->m_alpha_range = s_alpha_max != s_alpha_min; rt->m_alpha_range = s_alpha_max != s_alpha_min;
@ -5242,33 +5256,33 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
const GSVector4i shuffle_rect = GSVector4i(m_vt.m_min.p.x, m_vt.m_min.p.y, m_vt.m_max.p.x, m_vt.m_max.p.y); const GSVector4i shuffle_rect = GSVector4i(m_vt.m_min.p.x, m_vt.m_min.p.y, m_vt.m_max.p.x, m_vt.m_max.p.y);
if (!rt->m_valid.rintersect(shuffle_rect).eq(rt->m_valid) || (m_cached_ctx.FRAME.FBMSK & 0xFFFC0000)) if (!rt->m_valid.rintersect(shuffle_rect).eq(rt->m_valid) || (m_cached_ctx.FRAME.FBMSK & 0xFFFC0000))
{ {
rt->m_alpha_max = std::max(static_cast<int>((std::max(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) + 127), rt->m_alpha_max) | fba_value; rt_new_alpha_max = std::max(static_cast<int>((std::max(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) + 127), rt_new_alpha_max) | fba_value;
rt->m_alpha_min = std::min(static_cast<int>(std::min(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80), rt->m_alpha_min); rt_new_alpha_min = std::min(static_cast<int>(std::min(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80), rt_new_alpha_min);
} }
else else
{ {
rt->m_alpha_max = (std::max(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) + 127 | fba_value; rt_new_alpha_max = (std::max(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) + 127 | fba_value;
rt->m_alpha_min = (std::min(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) | fba_value; rt_new_alpha_min = (std::min(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) | fba_value;
} }
rt->m_alpha_range = true; rt->m_alpha_range = true;
} }
GL_INS("RT Alpha Range: %d-%d => %d-%d", blend_alpha_min, blend_alpha_max, rt->m_alpha_min, rt->m_alpha_max); GL_INS("RT Alpha Range: %d-%d => %d-%d", blend_alpha_min, blend_alpha_max, rt_new_alpha_min, rt_new_alpha_max);
// If there's no overlap, the values in the RT before FB write will be the old values. // If there's no overlap, the values in the RT before FB write will be the old values.
if (m_prim_overlap != PRIM_OVERLAP_NO) if (m_prim_overlap != PRIM_OVERLAP_NO)
{ {
// Otherwise, it may be a mix of the old/new values. // Otherwise, it may be a mix of the old/new values.
blend_alpha_min = std::min(blend_alpha_min, rt->m_alpha_min); blend_alpha_min = std::min(blend_alpha_min, rt_new_alpha_min);
blend_alpha_max = std::max(blend_alpha_max, rt->m_alpha_max); blend_alpha_max = std::max(blend_alpha_max, rt_new_alpha_max);
} }
if (!rt->m_32_bits_fmt) if (!rt->m_32_bits_fmt)
{ {
rt->m_alpha_max &= 128; rt_new_alpha_max &= 128;
rt->m_alpha_min &= 128; rt_new_alpha_min &= 128;
if (rt->m_alpha_max == rt->m_alpha_min) if (rt_new_alpha_max == rt_new_alpha_min)
rt->m_alpha_range = false; rt->m_alpha_range = false;
} }
} }
@ -5397,12 +5411,12 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
// If we Correct/Decorrect and tex is rt, we will need to update the texture reference // If we Correct/Decorrect and tex is rt, we will need to update the texture reference
const bool req_src_update = tex && rt && tex->m_target && tex->m_target_direct && tex->m_texture == rt->m_texture; const bool req_src_update = tex && rt && tex->m_target && tex->m_target_direct && tex->m_texture == rt->m_texture;
m_can_correct_alpha = !needs_ad && (GSUtil::GetChannelMask(m_cached_ctx.FRAME.PSM) & 0x8); m_can_correct_alpha = !needs_ad && (GSUtil::GetChannelMask(m_cached_ctx.FRAME.PSM) & 0x8) && rt_new_alpha_max <= 128;
if (rt) if (rt)
{ {
const bool partial_fbmask = (m_conf.ps.fbmask && m_conf.cb_ps.FbMask.a != 0xFF && m_conf.cb_ps.FbMask.a != 0); const bool partial_fbmask = (m_conf.ps.fbmask && m_conf.cb_ps.FbMask.a != 0xFF && m_conf.cb_ps.FbMask.a != 0);
const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || (m_conf.colormask.wa && (rt->m_alpha_max > 128 || partial_fbmask)); const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || (m_conf.colormask.wa && (rt_new_alpha_max > 128 || partial_fbmask));
if (rta_decorrection) if (rta_decorrection)
{ {
@ -5438,7 +5452,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
} }
else if (m_channel_shuffle) else if (m_channel_shuffle)
{ {
if (m_conf.ps.tales_of_abyss_hle || (tex && tex->m_from_target && tex->m_from_target == rt && m_conf.ps.channel == ChannelFetch_ALPHA) || partial_fbmask || rt->m_alpha_max > 128) if (m_conf.ps.tales_of_abyss_hle || (tex && tex->m_from_target && tex->m_from_target == rt && m_conf.ps.channel == ChannelFetch_ALPHA) || partial_fbmask || rt_new_alpha_max > 128)
{ {
m_can_correct_alpha = false; m_can_correct_alpha = false;
rt->RTADecorrect(); rt->RTADecorrect();
@ -5464,7 +5478,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
} }
} }
else if (!rt->m_rt_alpha_scale) else if (!rt->m_rt_alpha_scale)
m_can_correct_alpha = rt->m_alpha_max <= 128 && m_can_correct_alpha; m_can_correct_alpha = m_can_correct_alpha;
m_conf.ps.rta_correction = rt->m_rt_alpha_scale; m_conf.ps.rta_correction = rt->m_rt_alpha_scale;
} }
@ -5472,9 +5486,6 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle))) if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle)))
{ {
EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, rt); EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, rt);
if (req_src_update && tex->m_texture != rt->m_texture)
tex->m_texture = rt->m_texture;
} }
else else
{ {
@ -5482,11 +5493,11 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
m_conf.ps.no_color1 = true; m_conf.ps.no_color1 = true;
// Try to avoid palette draws // Try to avoid palette draws
if (rt && m_can_correct_alpha && !rt->m_rt_alpha_scale && rt->m_alpha_max == rt->m_alpha_min) if (rt && m_can_correct_alpha && !rt->m_rt_alpha_scale && rt->m_alpha_max == rt->m_alpha_min )
{ {
const bool afail_always_fb_alpha = m_cached_ctx.TEST.AFAIL == AFAIL_FB_ONLY || (m_cached_ctx.TEST.AFAIL == AFAIL_RGB_ONLY && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp != 32); const bool afail_always_fb_alpha = m_cached_ctx.TEST.AFAIL == AFAIL_FB_ONLY || (m_cached_ctx.TEST.AFAIL == AFAIL_RGB_ONLY && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp != 32);
const bool always_passing_alpha = !m_cached_ctx.TEST.ATE || afail_always_fb_alpha || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_ALWAYS); const bool always_passing_alpha = !m_cached_ctx.TEST.ATE || afail_always_fb_alpha || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_ALWAYS);
const bool full_cover = (rt->m_valid.rintersect(m_r).eq(rt->m_valid) && PrimitiveCoversWithoutGaps() && !(DATE || !always_passing_alpha || (m_cached_ctx.TEST.ZTE && m_cached_ctx.TEST.ZTST != ZTST_ALWAYS)) || m_channel_shuffle); const bool full_cover = rt->m_valid.rintersect(m_r).eq(rt->m_valid) && PrimitiveCoversWithoutGaps() && !(DATE || !always_passing_alpha || (m_cached_ctx.TEST.ZTE && m_cached_ctx.TEST.ZTST != ZTST_ALWAYS));
if (!full_cover) if (!full_cover)
{ {
@ -5500,6 +5511,14 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
} }
} }
if (req_src_update && tex->m_texture != rt->m_texture)
tex->m_texture = rt->m_texture;
if (rt)
{
rt->m_alpha_max = rt_new_alpha_max;
rt->m_alpha_min = rt_new_alpha_min;
}
// Warning must be done after EmulateZbuffer // Warning must be done after EmulateZbuffer
// Depth test is always true so it can be executed in 2 passes (no order required) unlike color. // Depth test is always true so it can be executed in 2 passes (no order required) unlike color.
// The idea is to compute first the color which is independent of the alpha test. And then do a 2nd // The idea is to compute first the color which is independent of the alpha test. And then do a 2nd

View File

@ -129,7 +129,7 @@ fragment float4 ps_primid_rta_init_datm0(float4 p [[position]], DirectReadTextur
fragment float4 ps_rta_correction(ConvertShaderData data [[stage_in]], ConvertPSRes res) fragment float4 ps_rta_correction(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{ {
float4 in = res.sample(data.t); float4 in = res.sample(data.t);
return float4(in.rgb, in.a / (127.5f / 255.0f)); return float4(in.rgb, in.a / (128.25f / 255.0f));
} }
fragment float4 ps_rta_decorrection(ConvertShaderData data [[stage_in]], ConvertPSRes res) fragment float4 ps_rta_decorrection(ConvertShaderData data [[stage_in]], ConvertPSRes res)