mirror of https://github.com/PCSX2/pcsx2.git
GS/HW: Fix up some RTA behaviour and reduce copies
This commit is contained in:
parent
8a73f98b1f
commit
effdfd5a22
|
@ -139,7 +139,7 @@ PS_OUTPUT ps_rta_correction(PS_INPUT input)
|
||||||
{
|
{
|
||||||
PS_OUTPUT output;
|
PS_OUTPUT output;
|
||||||
float4 value = sample_c(input.t);
|
float4 value = sample_c(input.t);
|
||||||
output.c = float4(value.rgb, value.a / (127.5f / 255.0f));
|
output.c = float4(value.rgb, value.a / (128.25f / 255.0f));
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -309,7 +309,7 @@ void ps_datm0_rta_correction()
|
||||||
void ps_rta_correction()
|
void ps_rta_correction()
|
||||||
{
|
{
|
||||||
vec4 value = sample_c();
|
vec4 value = sample_c();
|
||||||
SV_Target0 = vec4(value.rgb, value.a / (127.5f / 255.0f));
|
SV_Target0 = vec4(value.rgb, value.a / (128.25f / 255.0f));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -114,7 +114,7 @@ void ps_datm0_rta_correction()
|
||||||
void ps_rta_correction()
|
void ps_rta_correction()
|
||||||
{
|
{
|
||||||
vec4 value = sample_c(v_tex);
|
vec4 value = sample_c(v_tex);
|
||||||
o_col0 = vec4(value.rgb, value.a / (127.5f / 255.0f));
|
o_col0 = vec4(value.rgb, value.a / (128.25f / 255.0f));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -4369,9 +4369,19 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
|
||||||
const bool rta_correction = m_can_correct_alpha && !blend_ad_alpha_masked && m_conf.ps.blend_c == 1 && !(blend_flag & BLEND_A_MAX);
|
const bool rta_correction = m_can_correct_alpha && !blend_ad_alpha_masked && m_conf.ps.blend_c == 1 && !(blend_flag & BLEND_A_MAX);
|
||||||
if (rta_correction)
|
if (rta_correction)
|
||||||
{
|
{
|
||||||
rt->RTACorrect();
|
const bool afail_always_fb_alpha = m_cached_ctx.TEST.AFAIL == AFAIL_FB_ONLY || (m_cached_ctx.TEST.AFAIL == AFAIL_RGB_ONLY && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp != 32);
|
||||||
|
const bool always_passing_alpha = !m_cached_ctx.TEST.ATE || afail_always_fb_alpha || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_ALWAYS);
|
||||||
|
const bool full_cover = (rt->m_valid.rintersect(m_r).eq(rt->m_valid) && PrimitiveCoversWithoutGaps() && !(DATE_PRIMID || DATE_BARRIER || !always_passing_alpha || (m_cached_ctx.TEST.ZTE && m_cached_ctx.TEST.ZTST != ZTST_ALWAYS)) || m_channel_shuffle);
|
||||||
|
|
||||||
|
if (!full_cover)
|
||||||
|
{
|
||||||
|
rt->RTACorrect();
|
||||||
|
m_conf.rt = rt->m_texture;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
rt->m_rt_alpha_scale = true;
|
||||||
|
|
||||||
m_conf.ps.rta_correction = rt->m_rt_alpha_scale;
|
m_conf.ps.rta_correction = rt->m_rt_alpha_scale;
|
||||||
m_conf.rt = rt->m_texture;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Care for hw blend value, 6 is for hw/sw, sw blending used.
|
// Care for hw blend value, 6 is for hw/sw, sw blending used.
|
||||||
|
@ -4452,7 +4462,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt,
|
||||||
float scale = tex->GetScale();
|
float scale = tex->GetScale();
|
||||||
HandleTextureHazards(rt, ds, tex, tmm, source_region, target_region, unscaled_size, scale, src_copy);
|
HandleTextureHazards(rt, ds, tex, tmm, source_region, target_region, unscaled_size, scale, src_copy);
|
||||||
|
|
||||||
if (tex->m_target && tex->m_from_target && tex->m_target_direct && tex->m_from_target->m_rt_alpha_scale)
|
if ((m_conf.ps.tex_is_fb && rt->m_rt_alpha_scale) || (tex->m_target && tex->m_from_target && tex->m_target_direct && tex->m_from_target->m_rt_alpha_scale))
|
||||||
m_conf.ps.rta_source_correction = 1;
|
m_conf.ps.rta_source_correction = 1;
|
||||||
|
|
||||||
// Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth.
|
// Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth.
|
||||||
|
@ -5179,10 +5189,14 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||||
|
|
||||||
// Blend
|
// Blend
|
||||||
int blend_alpha_min = 0, blend_alpha_max = 255;
|
int blend_alpha_min = 0, blend_alpha_max = 255;
|
||||||
|
int rt_new_alpha_min = 0, rt_new_alpha_max = 255;
|
||||||
if (rt)
|
if (rt)
|
||||||
{
|
{
|
||||||
blend_alpha_min = rt->m_alpha_min;
|
rt_new_alpha_min = rt->m_alpha_min;
|
||||||
blend_alpha_max = rt->m_alpha_max;
|
rt_new_alpha_max = rt->m_alpha_max;
|
||||||
|
|
||||||
|
blend_alpha_min = rt_new_alpha_min;
|
||||||
|
blend_alpha_max = rt_new_alpha_max;
|
||||||
|
|
||||||
const bool is_24_bit = (GSLocalMemory::m_psm[rt->m_TEX0.PSM].trbpp == 24);
|
const bool is_24_bit = (GSLocalMemory::m_psm[rt->m_TEX0.PSM].trbpp == 24);
|
||||||
// On DX FBMask emulation can be missing on lower blend levels, so we'll do whatever the API does.
|
// On DX FBMask emulation can be missing on lower blend levels, so we'll do whatever the API does.
|
||||||
|
@ -5210,26 +5224,26 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||||
{
|
{
|
||||||
if (full_cover)
|
if (full_cover)
|
||||||
{
|
{
|
||||||
rt->m_alpha_max = s_alpha_max;
|
rt_new_alpha_max = s_alpha_max;
|
||||||
rt->m_alpha_min = s_alpha_min;
|
rt_new_alpha_min = s_alpha_min;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
rt->m_alpha_max = std::max(s_alpha_max, rt->m_alpha_max);
|
rt_new_alpha_max = std::max(s_alpha_max, rt_new_alpha_max);
|
||||||
rt->m_alpha_min = std::min(s_alpha_min, rt->m_alpha_min);
|
rt_new_alpha_min = std::min(s_alpha_min, rt_new_alpha_min);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if ((fb_mask & alpha_mask) != alpha_mask) // We can't be sure of the alpha if it's partially masked.
|
else if ((fb_mask & alpha_mask) != alpha_mask) // We can't be sure of the alpha if it's partially masked.
|
||||||
{
|
{
|
||||||
// Any number of bits could be set, so let's be paranoid about it
|
// Any number of bits could be set, so let's be paranoid about it
|
||||||
const u32 new_max_alpha = (s_alpha_max != s_alpha_min) ? (std::min(s_alpha_max, ((1 << (32 - std::countl_zero(static_cast<u32>(s_alpha_max)))) - 1)) & ~fb_mask) : (s_alpha_max & ~fb_mask);
|
const u32 new_max_alpha = (s_alpha_max != s_alpha_min) ? (std::min(s_alpha_max, ((1 << (32 - std::countl_zero(static_cast<u32>(s_alpha_max)))) - 1)) & ~fb_mask) : (s_alpha_max & ~fb_mask);
|
||||||
const u32 curr_max = (rt->m_alpha_max != rt->m_alpha_min && rt->m_alpha_range) ? (((1 << (32 - std::countl_zero(static_cast<u32>(rt->m_alpha_max)))) - 1) & fb_mask) : ((rt->m_alpha_max | rt->m_alpha_min) & fb_mask);
|
const u32 curr_max = (rt_new_alpha_max != rt_new_alpha_min && rt->m_alpha_range) ? (((1 << (32 - std::countl_zero(static_cast<u32>(rt_new_alpha_max)))) - 1) & fb_mask) : ((rt_new_alpha_max | rt_new_alpha_min) & fb_mask);
|
||||||
if (full_cover)
|
if (full_cover)
|
||||||
rt->m_alpha_max = new_max_alpha | curr_max;
|
rt_new_alpha_max = new_max_alpha | curr_max;
|
||||||
else
|
else
|
||||||
rt->m_alpha_max = std::max(static_cast<int>(new_max_alpha | curr_max), rt->m_alpha_max);
|
rt_new_alpha_max = std::max(static_cast<int>(new_max_alpha | curr_max), rt_new_alpha_max);
|
||||||
|
|
||||||
rt->m_alpha_min = std::min(s_alpha_min, rt->m_alpha_min);
|
rt_new_alpha_min = std::min(s_alpha_min, rt_new_alpha_min);
|
||||||
}
|
}
|
||||||
if (full_cover && (fb_mask & alpha_mask) == 0)
|
if (full_cover && (fb_mask & alpha_mask) == 0)
|
||||||
rt->m_alpha_range = s_alpha_max != s_alpha_min;
|
rt->m_alpha_range = s_alpha_max != s_alpha_min;
|
||||||
|
@ -5242,33 +5256,33 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||||
const GSVector4i shuffle_rect = GSVector4i(m_vt.m_min.p.x, m_vt.m_min.p.y, m_vt.m_max.p.x, m_vt.m_max.p.y);
|
const GSVector4i shuffle_rect = GSVector4i(m_vt.m_min.p.x, m_vt.m_min.p.y, m_vt.m_max.p.x, m_vt.m_max.p.y);
|
||||||
if (!rt->m_valid.rintersect(shuffle_rect).eq(rt->m_valid) || (m_cached_ctx.FRAME.FBMSK & 0xFFFC0000))
|
if (!rt->m_valid.rintersect(shuffle_rect).eq(rt->m_valid) || (m_cached_ctx.FRAME.FBMSK & 0xFFFC0000))
|
||||||
{
|
{
|
||||||
rt->m_alpha_max = std::max(static_cast<int>((std::max(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) + 127), rt->m_alpha_max) | fba_value;
|
rt_new_alpha_max = std::max(static_cast<int>((std::max(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) + 127), rt_new_alpha_max) | fba_value;
|
||||||
rt->m_alpha_min = std::min(static_cast<int>(std::min(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80), rt->m_alpha_min);
|
rt_new_alpha_min = std::min(static_cast<int>(std::min(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80), rt_new_alpha_min);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
rt->m_alpha_max = (std::max(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) + 127 | fba_value;
|
rt_new_alpha_max = (std::max(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) + 127 | fba_value;
|
||||||
rt->m_alpha_min = (std::min(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) | fba_value;
|
rt_new_alpha_min = (std::min(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) | fba_value;
|
||||||
}
|
}
|
||||||
rt->m_alpha_range = true;
|
rt->m_alpha_range = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
GL_INS("RT Alpha Range: %d-%d => %d-%d", blend_alpha_min, blend_alpha_max, rt->m_alpha_min, rt->m_alpha_max);
|
GL_INS("RT Alpha Range: %d-%d => %d-%d", blend_alpha_min, blend_alpha_max, rt_new_alpha_min, rt_new_alpha_max);
|
||||||
|
|
||||||
// If there's no overlap, the values in the RT before FB write will be the old values.
|
// If there's no overlap, the values in the RT before FB write will be the old values.
|
||||||
if (m_prim_overlap != PRIM_OVERLAP_NO)
|
if (m_prim_overlap != PRIM_OVERLAP_NO)
|
||||||
{
|
{
|
||||||
// Otherwise, it may be a mix of the old/new values.
|
// Otherwise, it may be a mix of the old/new values.
|
||||||
blend_alpha_min = std::min(blend_alpha_min, rt->m_alpha_min);
|
blend_alpha_min = std::min(blend_alpha_min, rt_new_alpha_min);
|
||||||
blend_alpha_max = std::max(blend_alpha_max, rt->m_alpha_max);
|
blend_alpha_max = std::max(blend_alpha_max, rt_new_alpha_max);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!rt->m_32_bits_fmt)
|
if (!rt->m_32_bits_fmt)
|
||||||
{
|
{
|
||||||
rt->m_alpha_max &= 128;
|
rt_new_alpha_max &= 128;
|
||||||
rt->m_alpha_min &= 128;
|
rt_new_alpha_min &= 128;
|
||||||
|
|
||||||
if (rt->m_alpha_max == rt->m_alpha_min)
|
if (rt_new_alpha_max == rt_new_alpha_min)
|
||||||
rt->m_alpha_range = false;
|
rt->m_alpha_range = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5397,12 +5411,12 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||||
// If we Correct/Decorrect and tex is rt, we will need to update the texture reference
|
// If we Correct/Decorrect and tex is rt, we will need to update the texture reference
|
||||||
const bool req_src_update = tex && rt && tex->m_target && tex->m_target_direct && tex->m_texture == rt->m_texture;
|
const bool req_src_update = tex && rt && tex->m_target && tex->m_target_direct && tex->m_texture == rt->m_texture;
|
||||||
|
|
||||||
m_can_correct_alpha = !needs_ad && (GSUtil::GetChannelMask(m_cached_ctx.FRAME.PSM) & 0x8);
|
m_can_correct_alpha = !needs_ad && (GSUtil::GetChannelMask(m_cached_ctx.FRAME.PSM) & 0x8) && rt_new_alpha_max <= 128;
|
||||||
|
|
||||||
if (rt)
|
if (rt)
|
||||||
{
|
{
|
||||||
const bool partial_fbmask = (m_conf.ps.fbmask && m_conf.cb_ps.FbMask.a != 0xFF && m_conf.cb_ps.FbMask.a != 0);
|
const bool partial_fbmask = (m_conf.ps.fbmask && m_conf.cb_ps.FbMask.a != 0xFF && m_conf.cb_ps.FbMask.a != 0);
|
||||||
const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || (m_conf.colormask.wa && (rt->m_alpha_max > 128 || partial_fbmask));
|
const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || (m_conf.colormask.wa && (rt_new_alpha_max > 128 || partial_fbmask));
|
||||||
|
|
||||||
if (rta_decorrection)
|
if (rta_decorrection)
|
||||||
{
|
{
|
||||||
|
@ -5438,7 +5452,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||||
}
|
}
|
||||||
else if (m_channel_shuffle)
|
else if (m_channel_shuffle)
|
||||||
{
|
{
|
||||||
if (m_conf.ps.tales_of_abyss_hle || (tex && tex->m_from_target && tex->m_from_target == rt && m_conf.ps.channel == ChannelFetch_ALPHA) || partial_fbmask || rt->m_alpha_max > 128)
|
if (m_conf.ps.tales_of_abyss_hle || (tex && tex->m_from_target && tex->m_from_target == rt && m_conf.ps.channel == ChannelFetch_ALPHA) || partial_fbmask || rt_new_alpha_max > 128)
|
||||||
{
|
{
|
||||||
m_can_correct_alpha = false;
|
m_can_correct_alpha = false;
|
||||||
rt->RTADecorrect();
|
rt->RTADecorrect();
|
||||||
|
@ -5464,7 +5478,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (!rt->m_rt_alpha_scale)
|
else if (!rt->m_rt_alpha_scale)
|
||||||
m_can_correct_alpha = rt->m_alpha_max <= 128 && m_can_correct_alpha;
|
m_can_correct_alpha = m_can_correct_alpha;
|
||||||
|
|
||||||
m_conf.ps.rta_correction = rt->m_rt_alpha_scale;
|
m_conf.ps.rta_correction = rt->m_rt_alpha_scale;
|
||||||
}
|
}
|
||||||
|
@ -5472,9 +5486,6 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||||
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle)))
|
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle)))
|
||||||
{
|
{
|
||||||
EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, rt);
|
EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, rt);
|
||||||
|
|
||||||
if (req_src_update && tex->m_texture != rt->m_texture)
|
|
||||||
tex->m_texture = rt->m_texture;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -5482,11 +5493,11 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||||
m_conf.ps.no_color1 = true;
|
m_conf.ps.no_color1 = true;
|
||||||
|
|
||||||
// Try to avoid palette draws
|
// Try to avoid palette draws
|
||||||
if (rt && m_can_correct_alpha && !rt->m_rt_alpha_scale && rt->m_alpha_max == rt->m_alpha_min)
|
if (rt && m_can_correct_alpha && !rt->m_rt_alpha_scale && rt->m_alpha_max == rt->m_alpha_min )
|
||||||
{
|
{
|
||||||
const bool afail_always_fb_alpha = m_cached_ctx.TEST.AFAIL == AFAIL_FB_ONLY || (m_cached_ctx.TEST.AFAIL == AFAIL_RGB_ONLY && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp != 32);
|
const bool afail_always_fb_alpha = m_cached_ctx.TEST.AFAIL == AFAIL_FB_ONLY || (m_cached_ctx.TEST.AFAIL == AFAIL_RGB_ONLY && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp != 32);
|
||||||
const bool always_passing_alpha = !m_cached_ctx.TEST.ATE || afail_always_fb_alpha || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_ALWAYS);
|
const bool always_passing_alpha = !m_cached_ctx.TEST.ATE || afail_always_fb_alpha || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_ALWAYS);
|
||||||
const bool full_cover = (rt->m_valid.rintersect(m_r).eq(rt->m_valid) && PrimitiveCoversWithoutGaps() && !(DATE || !always_passing_alpha || (m_cached_ctx.TEST.ZTE && m_cached_ctx.TEST.ZTST != ZTST_ALWAYS)) || m_channel_shuffle);
|
const bool full_cover = rt->m_valid.rintersect(m_r).eq(rt->m_valid) && PrimitiveCoversWithoutGaps() && !(DATE || !always_passing_alpha || (m_cached_ctx.TEST.ZTE && m_cached_ctx.TEST.ZTST != ZTST_ALWAYS));
|
||||||
|
|
||||||
if (!full_cover)
|
if (!full_cover)
|
||||||
{
|
{
|
||||||
|
@ -5500,6 +5511,14 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (req_src_update && tex->m_texture != rt->m_texture)
|
||||||
|
tex->m_texture = rt->m_texture;
|
||||||
|
|
||||||
|
if (rt)
|
||||||
|
{
|
||||||
|
rt->m_alpha_max = rt_new_alpha_max;
|
||||||
|
rt->m_alpha_min = rt_new_alpha_min;
|
||||||
|
}
|
||||||
// Warning must be done after EmulateZbuffer
|
// Warning must be done after EmulateZbuffer
|
||||||
// Depth test is always true so it can be executed in 2 passes (no order required) unlike color.
|
// Depth test is always true so it can be executed in 2 passes (no order required) unlike color.
|
||||||
// The idea is to compute first the color which is independent of the alpha test. And then do a 2nd
|
// The idea is to compute first the color which is independent of the alpha test. And then do a 2nd
|
||||||
|
|
|
@ -129,7 +129,7 @@ fragment float4 ps_primid_rta_init_datm0(float4 p [[position]], DirectReadTextur
|
||||||
fragment float4 ps_rta_correction(ConvertShaderData data [[stage_in]], ConvertPSRes res)
|
fragment float4 ps_rta_correction(ConvertShaderData data [[stage_in]], ConvertPSRes res)
|
||||||
{
|
{
|
||||||
float4 in = res.sample(data.t);
|
float4 in = res.sample(data.t);
|
||||||
return float4(in.rgb, in.a / (127.5f / 255.0f));
|
return float4(in.rgb, in.a / (128.25f / 255.0f));
|
||||||
}
|
}
|
||||||
|
|
||||||
fragment float4 ps_rta_decorrection(ConvertShaderData data [[stage_in]], ConvertPSRes res)
|
fragment float4 ps_rta_decorrection(ConvertShaderData data [[stage_in]], ConvertPSRes res)
|
||||||
|
|
Loading…
Reference in New Issue