GS-HW: Fix Download readbacks and limit FB resizing

This commit is contained in:
refractionpcsx2 2023-02-22 01:10:40 +00:00
parent a716e69dc0
commit f70a140f42
3 changed files with 76 additions and 39 deletions

View File

@ -1723,7 +1723,7 @@ void GSRendererHW::Draw()
m_vt.m_max.t = tmax;
}
}
if (rt)
{
// Be sure texture shuffle detection is properly propagated
@ -1732,6 +1732,8 @@ void GSRendererHW::Draw()
rt->m_32_bits_fmt = m_texture_shuffle || (GSLocalMemory::m_psm[context->FRAME.PSM].bpp != 16);
}
const bool is_mem_clear = IsConstantDirectWriteMemClear(false);
const bool can_update_size = !is_mem_clear && !m_texture_shuffle && !m_channel_shuffle;
{
// We still need to make sure the dimensions of the targets match.
const GSVector2 up_s(GetTextureScaleFactor());
@ -1746,13 +1748,18 @@ void GSRendererHW::Draw()
pxAssert(rt->m_texture->GetScale() == up_s);
rt->ResizeTexture(new_w, new_h, up_s);
const GSVector2i tex_size = rt->m_texture->GetSize();
if (!m_texture_shuffle && !m_channel_shuffle)
// Avoid temporary format changes, as this will change the end block and could break things.
if ((old_height != tex_size.y) && can_update_size)
{
const GSVector4i new_valid = GSVector4i(0, 0, new_w / up_s.x, new_h / up_s.y);
const GSVector2 tex_scale = rt->m_texture->GetScale();
const GSVector4i new_valid = GSVector4i(0, 0, tex_size.x / tex_scale.x, tex_size.y / tex_scale.y);
rt->ResizeValidity(new_valid);
}
rt->UpdateValidity(m_r);
GSVector2i resolution = PCRTCDisplays.GetResolution();
// Limit to 2x the vertical height of the resolution (for double buffering)
rt->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2));
// Probably changing to double buffering, so invalidate any old target that was next to it.
// This resolves an issue where the PCRTC will find the old target in FMV's causing flashing.
@ -1782,13 +1789,18 @@ void GSRendererHW::Draw()
pxAssert(ds->m_texture->GetScale() == up_s);
ds->ResizeTexture(new_w, new_h, up_s);
const GSVector2i tex_size = ds->m_texture->GetSize();
if (!m_texture_shuffle && !m_channel_shuffle)
if ((old_height != tex_size.y) && can_update_size)
{
const GSVector4i new_valid = GSVector4i(0, 0, new_w / up_s.x, new_h / up_s.y);
const GSVector2 tex_scale = ds->m_texture->GetScale();
const GSVector4i new_valid = GSVector4i(0, 0, tex_size.x / tex_scale.x, tex_size.y / tex_scale.y);
ds->ResizeValidity(new_valid);
}
ds->UpdateValidity(m_r);
GSVector2i resolution = PCRTCDisplays.GetResolution();
// Limit to 2x the vertical height of the resolution (for double buffering)
ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2));
if (new_height && old_end_block != ds->m_end_block)
{
@ -1938,12 +1950,13 @@ void GSRendererHW::Draw()
// Temporary source *must* be invalidated before normal, because otherwise it'll be double freed.
m_tc->InvalidateTemporarySource();
//
// If it's a mem clear or shuffle we don't want to resize the texture, it can cause textures to take up the entire
// video memory, and that is not good.
if ((fm & fm_mask) != fm_mask && rt)
{
//rt->m_valid = rt->m_valid.runion(r);
rt->UpdateValidity(m_r);
if(can_update_size)
rt->UpdateValidity(m_r);
m_tc->InvalidateVideoMem(context->offset.fb, m_r, false, false);
@ -1953,7 +1966,9 @@ void GSRendererHW::Draw()
if (zm != 0xffffffff && ds)
{
//ds->m_valid = ds->m_valid.runion(r);
ds->UpdateValidity(m_r);
// Shouldn't be a problem as Z will be masked.
if (can_update_size)
ds->UpdateValidity(m_r);
m_tc->InvalidateVideoMem(context->offset.zb, m_r, false, false);

View File

@ -1449,18 +1449,20 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
for (auto it = dss.rbegin(); it != dss.rend(); ++it) // Iterate targets from LRU to MRU.
{
Target* t = *it;
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
if (GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
{
const GSVector4i draw_rect = (t->readbacks_since_draw > 0) ? t->m_drawn_since_read : r.rintersect(t->m_drawn_since_read);
Read(t, draw_rect);
t->readbacks_since_draw++;
if(draw_rect.rintersect(t->m_drawn_since_read).eq(t->m_drawn_since_read))
t->m_drawn_since_read = GSVector4i::zero();
}
}
if (!t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || t->m_age >= 30)
continue;
const bool bpp_match = GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[psm].bpp;
const bool format_match = (bp == t->m_TEX0.TBP0 && bw == t->m_TEX0.TBW && bpp_match);
// Calculate the rect offset if the BP doesn't match.
const GSVector4i targetr = (format_match) ? r.rintersect(t->m_valid) : ComputeSurfaceOffset(bp, bw, psm, r, t).b2a_offset;
const GSVector4i draw_rect = (t->readbacks_since_draw > 0) ? t->m_drawn_since_read : targetr.rintersect(t->m_drawn_since_read);
Read(t, draw_rect);
t->readbacks_since_draw++;
if(draw_rect.rintersect(t->m_drawn_since_read).eq(t->m_drawn_since_read))
t->m_drawn_since_read = GSVector4i::zero();
}
}
return;
@ -1476,16 +1478,22 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
Target* t = *it;
if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S)
{
if (!t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || t->m_age >= 30)
continue;
const bool bpp_match = GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[psm].bpp;
const bool format_match = (bp == t->m_TEX0.TBP0 && bw == t->m_TEX0.TBW && bpp_match);
// Calculate the rect offset if the BP doesn't match.
const GSVector4i targetr = (format_match) ? r.rintersect(t->m_valid) : ComputeSurfaceOffset(bp, bw, psm, r, t).b2a_offset;
// Some games like to offset their GS download memory addresses by
// using overly big source Y position values.
// Checking for targets that overlap with the requested memory region
// instead of just comparing TBPs should fix that.
// For example, this fixes Judgement ring rendering in Shadow Hearts 2.
// Be wary of old targets being misdetected, set a sensible range of 30 frames (like Display source lookups).
if (t->Overlaps(bp, bw, psm, r) && GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && t->m_age <= 30)
if (!targetr.rempty())
{
// Calculate the rect offset if the BP doesn't match.
const GSVector4i targetr = (bp == t->m_TEX0.TBP0 && bw == t->m_TEX0.TBW) ? r : ComputeSurfaceOffset(bp, bw, psm, r, t).b2a_offset;
// GH Note: Read will do a StretchRect and then will sizzle data to the GS memory
// t->m_valid will do the full target texture whereas r.intersect(t->m_valid) will be limited
// to the useful part for the transfer.
@ -1529,10 +1537,10 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
}
else if(!targetr.rintersect(t->m_drawn_since_read).rempty()) // Block level read?
{
// Read full width of drawn area, it's not much slower and makes invalidation easier.
GSVector4i full_lines = GSVector4i(0, targetr.y, t->m_valid.z, targetr.w);
full_lines = full_lines.rintersect(t->m_drawn_since_read);
// Read the width of the draw, reading too much could wipe out dirty memory.
GSVector4i full_lines = GSVector4i(0, targetr.y, t->m_drawn_since_read.z, targetr.w);
full_lines = targetr.rintersect(t->m_drawn_since_read);
Read(t, full_lines);
// After reading, try to cut down our "dirty" rect.
@ -1541,13 +1549,23 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
else
{
// Try to cut down how much we read next, if we can.
if (full_lines.w >= t->m_drawn_since_read.y)
// Fatal Frame reads in vertical strips, SOCOM 2 does horizontal, so we can handle that below.
if (full_lines.width() == t->m_drawn_since_read.width()
&& full_lines.w >= t->m_drawn_since_read.y)
{
if (full_lines.y <= t->m_drawn_since_read.y)
t->m_drawn_since_read.y = full_lines.w;
else if (full_lines.w >= t->m_drawn_since_read.w)
t->m_drawn_since_read.w = full_lines.y;
}
else if (full_lines.height() == t->m_drawn_since_read.height()
&& full_lines.z >= t->m_drawn_since_read.x)
{
if (full_lines.x <= t->m_drawn_since_read.x)
t->m_drawn_since_read.x = full_lines.z;
else if (full_lines.z >= t->m_drawn_since_read.z)
t->m_drawn_since_read.z = full_lines.x;
}
}
t->readbacks_since_draw++;
}
@ -3285,7 +3303,7 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect)
// GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w);
}
void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect)
void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_resize)
{
if (m_valid.runion(rect).eq(m_valid))
{
@ -3293,13 +3311,18 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect)
return;
}
if (m_valid.eq(GSVector4i::zero()))
m_valid = rect;
else
m_valid = m_valid.runion(rect);
if (can_resize)
{
if (m_valid.eq(GSVector4i::zero()))
m_valid = rect;
else
m_valid = m_valid.runion(rect);
}
if (m_drawn_since_read.eq(GSVector4i::zero()))
m_drawn_since_read = rect;
if (m_drawn_since_read.eq(GSVector4i::zero()) || !can_resize)
{
m_drawn_since_read = rect.rintersect(m_valid);
}
else
m_drawn_since_read = m_drawn_since_read.runion(rect);
// Block of the bottom right texel of the validity rectangle, last valid block of the texture
@ -3307,7 +3330,6 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect)
// at the moment, we blow the valid rect out to twice the size. The only thing stopping everything breaking is the fact
// that we clamp the draw rect to the target size in GSRendererHW::Draw().
m_end_block = GSLocalMemory::m_psm[m_TEX0.PSM].info.bn(m_valid.z - 1, m_valid.w - 1, m_TEX0.TBP0, m_TEX0.TBW); // Valid only for color formats
// GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w);
}

View File

@ -249,7 +249,7 @@ public:
~Target();
void ResizeValidity(const GSVector4i& rect);
void UpdateValidity(const GSVector4i& rect);
void UpdateValidity(const GSVector4i& rect, bool can_resize = true);
void Update(bool reset_age);