GS/HW: RTA improvements and enhancements

This commit is contained in:
refractionpcsx2 2024-03-22 17:44:14 +00:00
parent d0f070bf97
commit 059ec49389
13 changed files with 128 additions and 56 deletions

View File

@ -139,7 +139,7 @@ PS_OUTPUT ps_rta_correction(PS_INPUT input)
{ {
PS_OUTPUT output; PS_OUTPUT output;
float4 value = sample_c(input.t); float4 value = sample_c(input.t);
output.c = float4(value.rgb, value.a / (128.5f / 255.0f)); output.c = float4(value.rgb, value.a / (127.5f / 255.0f));
return output; return output;
} }
@ -147,7 +147,7 @@ PS_OUTPUT ps_rta_decorrection(PS_INPUT input)
{ {
PS_OUTPUT output; PS_OUTPUT output;
float4 value = sample_c(input.t); float4 value = sample_c(input.t);
output.c = float4(value.rgb, value.a * (128.5f / 255.0f)); output.c = float4(value.rgb, value.a * (128.25f / 255.0f));
return output; return output;
} }

View File

@ -341,7 +341,7 @@ uint4 sample_4_index(float4 uv, float uv_w)
if (PS_RTA_SRC_CORRECTION) if (PS_RTA_SRC_CORRECTION)
{ {
i = uint4(c * 128.25f); // Denormalize value i = uint4(c * 128.55f); // Denormalize value
} }
else else
{ {

View File

@ -337,7 +337,7 @@ void ps_datm0_rta_correction()
void ps_rta_correction() void ps_rta_correction()
{ {
vec4 value = sample_c(); vec4 value = sample_c();
SV_Target0 = vec4(value.rgb, value.a / (128.5f / 255.0f)); SV_Target0 = vec4(value.rgb, value.a / (127.5f / 255.0f));
} }
#endif #endif
@ -345,7 +345,7 @@ void ps_rta_correction()
void ps_rta_decorrection() void ps_rta_decorrection()
{ {
vec4 value = sample_c(); vec4 value = sample_c();
SV_Target0 = vec4(value.rgb, value.a * (128.5f / 255.0f)); SV_Target0 = vec4(value.rgb, value.a * (128.25f / 255.0f));
} }
#endif #endif

View File

@ -287,7 +287,7 @@ uvec4 sample_4_index(vec4 uv)
c.w = sample_c(uv.zw).a; c.w = sample_c(uv.zw).a;
#if PS_RTA_SRC_CORRECTION #if PS_RTA_SRC_CORRECTION
uvec4 i = uvec4(c * 128.25f); // Denormalize value uvec4 i = uvec4(c * 128.55f); // Denormalize value
#else #else
uvec4 i = uvec4(c * 255.5f); // Denormalize value uvec4 i = uvec4(c * 255.5f); // Denormalize value
#endif #endif

View File

@ -114,7 +114,7 @@ void ps_datm0_rta_correction()
void ps_rta_correction() void ps_rta_correction()
{ {
vec4 value = sample_c(v_tex); vec4 value = sample_c(v_tex);
o_col0 = vec4(value.rgb, value.a / (128.5f / 255.0f)); o_col0 = vec4(value.rgb, value.a / (127.5f / 255.0f));
} }
#endif #endif
@ -122,7 +122,7 @@ void ps_rta_correction()
void ps_rta_decorrection() void ps_rta_decorrection()
{ {
vec4 value = sample_c(v_tex); vec4 value = sample_c(v_tex);
o_col0 = vec4(value.rgb, value.a * (128.5f / 255.0f)); o_col0 = vec4(value.rgb, value.a * (128.25f / 255.0f));
} }
#endif #endif

View File

@ -531,7 +531,7 @@ uvec4 sample_4_index(vec4 uv)
// Denormalize value // Denormalize value
#if PS_RTA_SRC_CORRECTION #if PS_RTA_SRC_CORRECTION
uvec4 i = uvec4(c * 128.25f); uvec4 i = uvec4(c * 128.55f);
#else #else
uvec4 i = uvec4(c * 255.5f); uvec4 i = uvec4(c * 255.5f);
#endif #endif

View File

@ -733,7 +733,7 @@ bool GSHwHack::GSC_PolyphonyDigitalGames(GSRendererHW& r, int& skip)
// Alpha is unknown, since it comes from RGB. // Alpha is unknown, since it comes from RGB.
dst->m_alpha_min = 0; dst->m_alpha_min = 0;
dst->m_alpha_max = 255; dst->m_alpha_max = 255;
dst->m_alpha_range = true;
dst->UpdateValidChannels(PSMCT32, fbmsk); dst->UpdateValidChannels(PSMCT32, fbmsk);
dst->UpdateValidity(GSVector4i::loadh(size)); dst->UpdateValidity(GSVector4i::loadh(size));
@ -1016,6 +1016,7 @@ bool GSHwHack::OI_RozenMaidenGebetGarden(GSRendererHW& r, GSTexture* rt, GSTextu
tmp_rt->UpdateDrawn(tmp_rt->m_valid); tmp_rt->UpdateDrawn(tmp_rt->m_valid);
tmp_rt->m_alpha_max = 0; tmp_rt->m_alpha_max = 0;
tmp_rt->m_alpha_min = 0; tmp_rt->m_alpha_min = 0;
tmp_rt->m_alpha_range = false;
} }
return false; return false;

View File

@ -2976,7 +2976,7 @@ void GSRendererHW::Draw()
{ {
const u32 alpha = m_cached_ctx.FRAME.FBMSK >> 24; const u32 alpha = m_cached_ctx.FRAME.FBMSK >> 24;
const u32 alpha_mask = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk >> 24; const u32 alpha_mask = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk >> 24;
rt->Update(m_texture_shuffle || m_channel_shuffle || (alpha != 0 && (alpha & alpha_mask) != alpha_mask) || (!alpha && GetAlphaMinMax().max > 128)); rt->Update(m_texture_shuffle || (alpha != 0 && (alpha & alpha_mask) != alpha_mask) || (!alpha && GetAlphaMinMax().max > 128));
} }
else else
rt->m_age = 0; rt->m_age = 0;
@ -5158,17 +5158,19 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
const int aref = static_cast<int>(m_cached_ctx.TEST.AREF); const int aref = static_cast<int>(m_cached_ctx.TEST.AREF);
if (m_cached_ctx.TEST.ATE && ((fail_type != AFAIL_FB_ONLY && fail_type != AFAIL_RGB_ONLY) || !PRIM->ABE || !IsUsingAsInBlend())) if (m_cached_ctx.TEST.ATE && ((fail_type != AFAIL_FB_ONLY && fail_type != AFAIL_RGB_ONLY) || !PRIM->ABE || !IsUsingAsInBlend()))
CorrectATEAlphaMinMax(m_cached_ctx.TEST.ATST, aref); CorrectATEAlphaMinMax(m_cached_ctx.TEST.ATST, aref);
const bool needs_ad = rt && m_context->ALPHA.C == 1 && rt->m_alpha_min != rt->m_alpha_max && rt->m_alpha_max > 128;
// Blend // Blend
int blend_alpha_min = 0, blend_alpha_max = 255; int blend_alpha_min = 0, blend_alpha_max = 255;
if (rt) if (rt)
{ {
blend_alpha_min = rt->m_alpha_min; blend_alpha_min = rt->m_alpha_min;
blend_alpha_max = rt->m_alpha_max; blend_alpha_max = rt->m_alpha_max;
const bool is_24_bit = (GSLocalMemory::m_psm[rt->m_TEX0.PSM].trbpp == 24); const bool is_24_bit = (GSLocalMemory::m_psm[rt->m_TEX0.PSM].trbpp == 24);
const u32 alpha_mask = GSLocalMemory::m_psm[rt->m_TEX0.PSM].fmsk & 0xFF000000; // On DX FBMask emulation can be missing on lower blend levels, so we'll do whatever the API does.
const u32 fb_mask = m_conf.colormask.wa ? (m_conf.ps.fbmask ? m_conf.cb_ps.FbMask.a : 0) : 0xFF;
const u32 alpha_mask = (GSLocalMemory::m_psm[rt->m_TEX0.PSM].fmsk & 0xFF000000) >> 24;
const int fba_value = m_draw_env->CTXT[m_draw_env->PRIM.CTXT].FBA.FBA * 128; const int fba_value = m_draw_env->CTXT[m_draw_env->PRIM.CTXT].FBA.FBA * 128;
if (is_24_bit) if (is_24_bit)
@ -5178,15 +5180,18 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
blend_alpha_max = 128; blend_alpha_max = 128;
} }
if (GSUtil::GetChannelMask(m_cached_ctx.FRAME.PSM) & 0x8 && !m_channel_shuffle && !m_texture_shuffle) if (GSUtil::GetChannelMask(m_cached_ctx.FRAME.PSM) & 0x8 && !m_texture_shuffle)
{ {
const int s_alpha_max = GetAlphaMinMax().max | fba_value; const int s_alpha_max = GetAlphaMinMax().max | fba_value;
const int s_alpha_min = GetAlphaMinMax().min | fba_value; const int s_alpha_min = GetAlphaMinMax().min | fba_value;
if ((m_cached_ctx.FRAME.FBMSK & alpha_mask) == 0)
const bool afail_always_fb_alpha = m_cached_ctx.TEST.AFAIL == AFAIL_FB_ONLY || (m_cached_ctx.TEST.AFAIL == AFAIL_RGB_ONLY && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp != 32);
const bool always_passing_alpha = !m_cached_ctx.TEST.ATE || afail_always_fb_alpha || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_ALWAYS);
const bool full_cover = (rt->m_valid.rintersect(m_r).eq(rt->m_valid) && PrimitiveCoversWithoutGaps() && !(DATE || !always_passing_alpha || (m_cached_ctx.TEST.ZTE && m_cached_ctx.TEST.ZTST != ZTST_ALWAYS)) || m_channel_shuffle);
if ((fb_mask & alpha_mask) == 0)
{ {
const bool afail_always_fb_alpha = m_cached_ctx.TEST.AFAIL == AFAIL_FB_ONLY || (m_cached_ctx.TEST.AFAIL == AFAIL_RGB_ONLY && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp != 32); if (full_cover)
const bool always_passing_alpha = !m_cached_ctx.TEST.ATE || afail_always_fb_alpha || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_ALWAYS);
if (rt->m_valid.rintersect(m_r).eq(rt->m_valid) && PrimitiveCoversWithoutGaps() && !(DATE || !always_passing_alpha || (m_cached_ctx.TEST.ZTE && m_cached_ctx.TEST.ZTST != ZTST_ALWAYS)))
{ {
rt->m_alpha_max = s_alpha_max; rt->m_alpha_max = s_alpha_max;
rt->m_alpha_min = s_alpha_min; rt->m_alpha_min = s_alpha_min;
@ -5197,16 +5202,38 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
rt->m_alpha_min = std::min(s_alpha_min, rt->m_alpha_min); rt->m_alpha_min = std::min(s_alpha_min, rt->m_alpha_min);
} }
} }
else if ((m_cached_ctx.FRAME.FBMSK & alpha_mask) != alpha_mask) // We can't be sure of the alpha if it's partially masked. else if ((fb_mask & alpha_mask) != alpha_mask) // We can't be sure of the alpha if it's partially masked.
{ {
rt->m_alpha_max |= std::max(s_alpha_max, rt->m_alpha_max); // Any number of bits could be set, so let's be paranoid about it
const u32 new_max_alpha = (s_alpha_max != s_alpha_min) ? (std::min(s_alpha_max, ((1 << (32 - std::countl_zero(static_cast<u32>(s_alpha_max)))) - 1)) & ~fb_mask) : (s_alpha_max & ~fb_mask);
const u32 curr_max = (rt->m_alpha_max != rt->m_alpha_min && rt->m_alpha_range) ? (((1 << (32 - std::countl_zero(static_cast<u32>(rt->m_alpha_max)))) - 1) & fb_mask) : ((rt->m_alpha_max | rt->m_alpha_min) & fb_mask);
if (full_cover)
rt->m_alpha_max = new_max_alpha | curr_max;
else
rt->m_alpha_max = std::max(static_cast<int>(new_max_alpha | curr_max), rt->m_alpha_max);
rt->m_alpha_min = std::min(s_alpha_min, rt->m_alpha_min); rt->m_alpha_min = std::min(s_alpha_min, rt->m_alpha_min);
} }
if (full_cover && (fb_mask & alpha_mask) == 0)
rt->m_alpha_range = s_alpha_max != s_alpha_min;
else
rt->m_alpha_range |= (s_alpha_max & ~fb_mask) != (s_alpha_min & ~fb_mask);
} }
else if ((m_texture_shuffle && m_conf.colormask.wa) || (m_channel_shuffle && (m_cached_ctx.FRAME.FBMSK & alpha_mask) != alpha_mask)) else if ((m_texture_shuffle && m_conf.colormask.wa))
{ {
rt->m_alpha_max = 255; // in shuffles, the alpha top bit values are set according to TEXA
rt->m_alpha_min = 0; const GSVector4i shuffle_rect = GSVector4i(m_vt.m_min.p.x, m_vt.m_min.p.y, m_vt.m_max.p.x, m_vt.m_max.p.y);
if (!rt->m_valid.rintersect(shuffle_rect).eq(rt->m_valid) || (m_cached_ctx.FRAME.FBMSK & 0xFFFC0000))
{
rt->m_alpha_max = std::max(static_cast<int>((std::max(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) + 127), rt->m_alpha_max) | fba_value;
rt->m_alpha_min = std::min(static_cast<int>(std::min(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80), rt->m_alpha_min);
}
else
{
rt->m_alpha_max = (std::max(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) + 127 | fba_value;
rt->m_alpha_min = (std::min(m_draw_env->TEXA.TA1, m_draw_env->TEXA.TA0) & 0x80) | fba_value;
}
rt->m_alpha_range = true;
} }
GL_INS("RT Alpha Range: %d-%d => %d-%d", blend_alpha_min, blend_alpha_max, rt->m_alpha_min, rt->m_alpha_max); GL_INS("RT Alpha Range: %d-%d => %d-%d", blend_alpha_min, blend_alpha_max, rt->m_alpha_min, rt->m_alpha_max);
@ -5223,6 +5250,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{ {
rt->m_alpha_max &= 128; rt->m_alpha_max &= 128;
rt->m_alpha_min &= 128; rt->m_alpha_min &= 128;
if (rt->m_alpha_max == rt->m_alpha_min)
rt->m_alpha_range = false;
} }
} }
@ -5350,11 +5380,12 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
// If we Correct/Decorrect and tex is rt, we will need to update the texture reference // If we Correct/Decorrect and tex is rt, we will need to update the texture reference
const bool req_src_update = tex && rt && tex->m_target && tex->m_target_direct && tex->m_texture == rt->m_texture; const bool req_src_update = tex && rt && tex->m_target && tex->m_target_direct && tex->m_texture == rt->m_texture;
m_can_correct_alpha = true; m_can_correct_alpha = !needs_ad;
if (rt) if (rt)
{ {
const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || ((m_conf.colormask.wrgba & 0x8) && (std::max(blend_alpha_max, rt->m_alpha_max) > 128) || (m_conf.ps.fbmask && m_conf.cb_ps.FbMask.a != 0xFF && m_conf.cb_ps.FbMask.a != 0)); const bool partial_fbmask = (m_conf.ps.fbmask && m_conf.cb_ps.FbMask.a != 0xFF && m_conf.cb_ps.FbMask.a != 0);
const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || (m_conf.colormask.wa && (rt->m_alpha_max > 128 || partial_fbmask));
if (rta_decorrection) if (rta_decorrection)
{ {
@ -5390,7 +5421,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
} }
else if (m_channel_shuffle) else if (m_channel_shuffle)
{ {
if (m_conf.ps.tales_of_abyss_hle || (tex && tex->m_from_target && tex->m_from_target == rt && m_conf.ps.channel == ChannelFetch_ALPHA) || ((m_cached_ctx.FRAME.FBMSK & 0xFF000000) != 0xFF000000)) if (m_conf.ps.tales_of_abyss_hle || (tex && tex->m_from_target && tex->m_from_target == rt && m_conf.ps.channel == ChannelFetch_ALPHA) || partial_fbmask || rt->m_alpha_max > 128)
{ {
m_can_correct_alpha = false; m_can_correct_alpha = false;
rt->RTADecorrect(rt); rt->RTADecorrect(rt);
@ -5416,7 +5447,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
} }
} }
else if (!rt->m_rt_alpha_scale) else if (!rt->m_rt_alpha_scale)
m_can_correct_alpha = std::max(blend_alpha_max, rt->m_alpha_max) <= 128; m_can_correct_alpha = rt->m_alpha_max <= 128 && !needs_ad;
m_conf.ps.rta_correction = rt->m_rt_alpha_scale; m_conf.ps.rta_correction = rt->m_rt_alpha_scale;
} }
@ -5433,6 +5464,24 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{ {
m_conf.blend = {}; // No blending please m_conf.blend = {}; // No blending please
m_conf.ps.no_color1 = true; m_conf.ps.no_color1 = true;
// Try to avoid palette draws
if (rt && m_can_correct_alpha && !rt->m_rt_alpha_scale && rt->m_alpha_max == rt->m_alpha_min)
{
const bool afail_always_fb_alpha = m_cached_ctx.TEST.AFAIL == AFAIL_FB_ONLY || (m_cached_ctx.TEST.AFAIL == AFAIL_RGB_ONLY && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp != 32);
const bool always_passing_alpha = !m_cached_ctx.TEST.ATE || afail_always_fb_alpha || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_ALWAYS);
const bool full_cover = (rt->m_valid.rintersect(m_r).eq(rt->m_valid) && PrimitiveCoversWithoutGaps() && !(DATE || !always_passing_alpha || (m_cached_ctx.TEST.ZTE && m_cached_ctx.TEST.ZTST != ZTST_ALWAYS)) || m_channel_shuffle);
if (!full_cover)
{
rt->RTACorrect(rt);
m_conf.rt = rt->m_texture;
}
else
rt->m_rt_alpha_scale = true;
m_conf.ps.rta_correction = rt->m_rt_alpha_scale;
}
} }
// No point outputting colours if we're just writing depth. // No point outputting colours if we're just writing depth.
@ -5891,10 +5940,11 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
if (HasEEUpload(r)) if (HasEEUpload(r))
return CLUTDrawTestResult::CLUTDrawOnCPU; return CLUTDrawTestResult::CLUTDrawOnCPU;
const GSTextureCache::Target* tgt = g_texture_cache->FindOverlappingTarget( GSTextureCache::Target* tgt = g_texture_cache->FindOverlappingTarget(
m_cached_ctx.TEX0.TBP0, m_cached_ctx.TEX0.TBW, m_cached_ctx.TEX0.PSM, r); m_cached_ctx.TEX0.TBP0, m_cached_ctx.TEX0.TBW, m_cached_ctx.TEX0.PSM, r);
if (tgt) if (tgt)
{ {
tgt->RTADecorrect(tgt);
bool is_dirty = false; bool is_dirty = false;
for (const GSDirtyRect& rc : tgt->m_dirty) for (const GSDirtyRect& rc : tgt->m_dirty)
{ {
@ -6376,6 +6426,7 @@ bool GSRendererHW::TryTargetClear(GSTextureCache::Target* rt, GSTextureCache::Ta
rt->m_alpha_max &= 128; rt->m_alpha_max &= 128;
rt->m_alpha_min &= 128; rt->m_alpha_min &= 128;
} }
rt->m_alpha_range = false;
} }
else else
{ {

View File

@ -931,6 +931,10 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c
CopyRGBFromDepthToColor(t, dst); CopyRGBFromDepthToColor(t, dst);
dst = t; dst = t;
if (GSUtil::GetChannelMask(TEX0.PSM) & 0x8)
t->RTADecorrect(t);
inside_target = false; inside_target = false;
break; break;
} }
@ -2664,20 +2668,24 @@ void GSTextureCache::Target::RTACorrect(Target* rt)
{ {
if (rt && !rt->m_rt_alpha_scale && rt->m_type == RenderTarget) if (rt && !rt->m_rt_alpha_scale && rt->m_type == RenderTarget)
{ {
const GSVector2i rtsize(rt->m_texture->GetSize()); if (rt->m_alpha_max > 0)
const GSVector4i valid_rect = GSVector4i(GSVector4(rt->m_valid) * GSVector4(rt->m_scale));
if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, !GSVector4i::loadh(rtsize).eq(valid_rect)))
{ {
// Only copy up the valid area, since there's no point in "correcting" nothing. const GSVector2i rtsize(rt->m_texture->GetSize());
const GSVector4 dRect(rt->m_texture->GetRect().rintersect(valid_rect)); const GSVector4i valid_rect = GSVector4i(GSVector4(rt->m_valid) * GSVector4(rt->m_scale));
const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy();
g_gs_device->StretchRect(rt->m_texture, sRect, temp_rt, dRect, ShaderConvert::RTA_CORRECTION, false); if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, !GSVector4i::loadh(rtsize).eq(valid_rect)))
g_perfmon.Put(GSPerfMon::TextureCopies, 1); {
g_gs_device->Recycle(rt->m_texture); // Only copy up the valid area, since there's no point in "correcting" nothing.
rt->m_texture = temp_rt; const GSVector4 dRect(rt->m_texture->GetRect().rintersect(valid_rect));
rt->m_rt_alpha_scale = true; const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy();
g_gs_device->StretchRect(rt->m_texture, sRect, temp_rt, dRect, ShaderConvert::RTA_CORRECTION, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
g_gs_device->Recycle(rt->m_texture);
rt->m_texture = temp_rt;
}
} }
rt->m_rt_alpha_scale = true;
} }
} }
@ -2685,20 +2693,24 @@ void GSTextureCache::Target::RTADecorrect(Target* rt)
{ {
if (rt->m_rt_alpha_scale && rt->m_type == RenderTarget) if (rt->m_rt_alpha_scale && rt->m_type == RenderTarget)
{ {
const GSVector2i rtsize(rt->m_texture->GetSize()); if (rt->m_alpha_max > 0)
const GSVector4i valid_rect = GSVector4i(GSVector4(rt->m_valid) * GSVector4(rt->m_scale));
if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, !GSVector4i::loadh(rtsize).eq(valid_rect)))
{ {
// Only copy up the valid area, since there's no point in "correcting" nothing. const GSVector2i rtsize(rt->m_texture->GetSize());
const GSVector4 dRect(rt->m_texture->GetRect().rintersect(valid_rect)); const GSVector4i valid_rect = GSVector4i(GSVector4(rt->m_valid) * GSVector4(rt->m_scale));
const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy();
g_gs_device->StretchRect(rt->m_texture, sRect, temp_rt, dRect, ShaderConvert::RTA_DECORRECTION, false); if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, !GSVector4i::loadh(rtsize).eq(valid_rect)))
g_perfmon.Put(GSPerfMon::TextureCopies, 1); {
g_gs_device->Recycle(rt->m_texture); // Only copy up the valid area, since there's no point in "correcting" nothing.
rt->m_texture = temp_rt; const GSVector4 dRect(rt->m_texture->GetRect().rintersect(valid_rect));
rt->m_rt_alpha_scale = false; const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy();
g_gs_device->StretchRect(rt->m_texture, sRect, temp_rt, dRect, ShaderConvert::RTA_DECORRECTION, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
g_gs_device->Recycle(rt->m_texture);
rt->m_texture = temp_rt;
}
} }
rt->m_rt_alpha_scale = false;
} }
} }
@ -3782,6 +3794,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u
dst->m_valid_alpha_high |= src->m_valid_alpha_high; dst->m_valid_alpha_high |= src->m_valid_alpha_high;
dst->m_alpha_max = src->m_alpha_max; dst->m_alpha_max = src->m_alpha_max;
dst->m_alpha_min = src->m_alpha_min; dst->m_alpha_min = src->m_alpha_min;
dst->m_alpha_range |= src->m_alpha_range;
} }
if (GSLocalMemory::IsPageAligned(src->m_TEX0.PSM, GSVector4i(sx, sy, sx + w, sy + h)) && (w == GSLocalMemory::m_psm[src->m_TEX0.PSM].pgs.x || h == GSLocalMemory::m_psm[src->m_TEX0.PSM].pgs.y)) if (GSLocalMemory::IsPageAligned(src->m_TEX0.PSM, GSVector4i(sx, sy, sx + w, sy + h)) && (w == GSLocalMemory::m_psm[src->m_TEX0.PSM].pgs.x || h == GSLocalMemory::m_psm[src->m_TEX0.PSM].pgs.y))
@ -3890,6 +3903,7 @@ bool GSTextureCache::ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx
// Because we don't know the new alpha value which came from green, just go full paranoid. // Because we don't know the new alpha value which came from green, just go full paranoid.
tgt->m_alpha_min = 0; tgt->m_alpha_min = 0;
tgt->m_alpha_max = 255; tgt->m_alpha_max = 255;
tgt->m_alpha_range = true;
} }
return true; return true;
@ -5393,6 +5407,9 @@ GSTexture* GSTextureCache::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVec
offset = this_offset; offset = this_offset;
*scale = t->m_scale; *scale = t->m_scale;
t->RTADecorrect(t);
return t->m_texture; return t->m_texture;
} }
@ -6146,6 +6163,8 @@ void GSTextureCache::Target::Update(bool cannot_scale)
m_alpha_min = alpha_minmax.first; m_alpha_min = alpha_minmax.first;
m_alpha_max = alpha_minmax.second; m_alpha_max = alpha_minmax.second;
} }
m_alpha_range |= alpha_minmax.first != alpha_minmax.second;
} }
g_gs_device->Recycle(t); g_gs_device->Recycle(t);
m_dirty.clear(); m_dirty.clear();

View File

@ -210,6 +210,7 @@ public:
const int m_type = 0; const int m_type = 0;
int m_alpha_max = 0; int m_alpha_max = 0;
int m_alpha_min = 0; int m_alpha_min = 0;
bool m_alpha_range = false;
// Valid alpha means "we have rendered to the alpha channel of this target". // Valid alpha means "we have rendered to the alpha channel of this target".
// A false value means that the alpha in local memory is still valid/up-to-date. // A false value means that the alpha in local memory is still valid/up-to-date.

View File

@ -129,13 +129,13 @@ fragment float4 ps_primid_rta_init_datm0(float4 p [[position]], DirectReadTextur
fragment float4 ps_rta_correction(float4 p [[position]], DirectReadTextureIn<float> tex) fragment float4 ps_rta_correction(float4 p [[position]], DirectReadTextureIn<float> tex)
{ {
float4 in = tex.read(p); float4 in = tex.read(p);
return float4(in.rgb, in.a / (128.5f / 255.0f)); return float4(in.rgb, in.a / (127.5f / 255.0f));
} }
fragment float4 ps_rta_decorrection(float4 p [[position]], DirectReadTextureIn<float> tex) fragment float4 ps_rta_decorrection(float4 p [[position]], DirectReadTextureIn<float> tex)
{ {
float4 in = tex.read(p); float4 in = tex.read(p);
return float4(in.rgb, in.a * (128.5f / 255.0f)); return float4(in.rgb, in.a * (128.25f / 255.0f));
} }
fragment float4 ps_hdr_init(float4 p [[position]], DirectReadTextureIn<float> tex) fragment float4 ps_hdr_init(float4 p [[position]], DirectReadTextureIn<float> tex)

View File

@ -492,7 +492,7 @@ struct PSMain
if (PS_RTA_SRC_CORRECTION) if (PS_RTA_SRC_CORRECTION)
{ {
i = uint4(c * 128.25f); // Denormalize value i = uint4(c * 128.55f); // Denormalize value
} }
else else
{ {

View File

@ -3,4 +3,4 @@
/// Version number for GS and other shaders. Increment whenever any of the contents of the /// Version number for GS and other shaders. Increment whenever any of the contents of the
/// shaders change, to invalidate the cache. /// shaders change, to invalidate the cache.
static constexpr u32 SHADER_CACHE_VERSION = 42; static constexpr u32 SHADER_CACHE_VERSION = 43;