mirror of https://github.com/PCSX2/pcsx2.git
GS/HW: Don't align dirty rectangles to block sizes when updating
Don't align the area we write to the target to the block size. If the format matches, the writes don't need to be block aligned. We still read the whole thing in, because that's the granularity that ReadTexture operates at, but discard those pixels when updating the framebuffer. Onimusha 2 does this dance where it uploads the left 4 pixels to the middle of the image, then moves it to the left, and because we process the move in hardware, local memory never gets updated, and thus is stale.
This commit is contained in:
parent
b90c2ec385
commit
3e8e1ed596
|
@ -60,7 +60,7 @@ def compare_frames(path1, path2):
|
|||
def extract_stats(file):
|
||||
stats = {}
|
||||
try:
|
||||
with open(file, "r") as f:
|
||||
with open(file, "r", errors="ignore") as f:
|
||||
for line in f.readlines():
|
||||
m = re.match(".*@HWSTAT@ ([^:]+): (.*) \(avg ([^)]+)\)$", line)
|
||||
if m is None:
|
||||
|
|
|
@ -35,7 +35,7 @@ GSDirtyRect::GSDirtyRect(GSVector4i& r, u32 psm, u32 bw, RGBAMask rgba, bool req
|
|||
{
|
||||
}
|
||||
|
||||
GSVector4i GSDirtyRect::GetDirtyRect(GIFRegTEX0 TEX0) const
|
||||
GSVector4i GSDirtyRect::GetDirtyRect(GIFRegTEX0 TEX0, bool align) const
|
||||
{
|
||||
GSVector4i _r;
|
||||
|
||||
|
@ -48,14 +48,13 @@ GSVector4i GSDirtyRect::GetDirtyRect(GIFRegTEX0 TEX0) const
|
|||
_r.top = (r.top * dst.y) / src.y;
|
||||
_r.right = (r.right * dst.x) / src.x;
|
||||
_r.bottom = (r.bottom * dst.y) / src.y;
|
||||
_r = _r.ralign<Align_Outside>(src);
|
||||
}
|
||||
else
|
||||
{
|
||||
_r = r.ralign<Align_Outside>(src);
|
||||
_r = r;
|
||||
}
|
||||
|
||||
return _r;
|
||||
return align ? _r.ralign<Align_Outside>(src) : _r;
|
||||
}
|
||||
|
||||
GSVector4i GSDirtyRectList::GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size) const
|
||||
|
@ -66,7 +65,7 @@ GSVector4i GSDirtyRectList::GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size
|
|||
|
||||
for (auto& dirty_rect : *this)
|
||||
{
|
||||
r = r.runion(dirty_rect.GetDirtyRect(TEX0));
|
||||
r = r.runion(dirty_rect.GetDirtyRect(TEX0, true));
|
||||
}
|
||||
|
||||
const GSVector2i& bs = GSLocalMemory::m_psm[TEX0.PSM].bs;
|
||||
|
@ -92,12 +91,13 @@ u32 GSDirtyRectList::GetDirtyChannels()
|
|||
return channels;
|
||||
}
|
||||
|
||||
GSVector4i GSDirtyRectList::GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp) const
|
||||
GSVector4i GSDirtyRectList::GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp, bool align) const
|
||||
{
|
||||
const GSVector4i r = (*this)[index].GetDirtyRect(TEX0);
|
||||
GSVector4i r = (*this)[index].GetDirtyRect(TEX0, align);
|
||||
const GSVector2i& bs = GSLocalMemory::m_psm[TEX0.PSM].bs;
|
||||
if (align)
|
||||
r = r.ralign<Align_Outside>(bs);
|
||||
|
||||
GSVector2i bs = GSLocalMemory::m_psm[TEX0.PSM].bs;
|
||||
|
||||
return r.ralign<Align_Outside>(bs).rintersect(clamp);
|
||||
return r.rintersect(clamp);
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ public:
|
|||
|
||||
GSDirtyRect();
|
||||
GSDirtyRect(GSVector4i& r, u32 psm, u32 bw, RGBAMask rgba, bool req_linear);
|
||||
GSVector4i GetDirtyRect(GIFRegTEX0 TEX0) const;
|
||||
GSVector4i GetDirtyRect(GIFRegTEX0 TEX0, bool align) const;
|
||||
};
|
||||
|
||||
class GSDirtyRectList : public std::vector<GSDirtyRect>
|
||||
|
@ -49,5 +49,5 @@ public:
|
|||
GSDirtyRectList() {}
|
||||
GSVector4i GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size) const;
|
||||
u32 GetDirtyChannels();
|
||||
GSVector4i GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp) const;
|
||||
GSVector4i GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp, bool align) const;
|
||||
};
|
||||
|
|
|
@ -5497,7 +5497,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
|
|||
bool is_dirty = false;
|
||||
for (const GSDirtyRect& rc : tgt->m_dirty)
|
||||
{
|
||||
if (!rc.GetDirtyRect(m_cached_ctx.TEX0).rintersect(r).rempty())
|
||||
if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(r).rempty())
|
||||
{
|
||||
is_dirty = true;
|
||||
break;
|
||||
|
@ -5597,7 +5597,7 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t
|
|||
const GSVector4i tr(GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage);
|
||||
for (GSDirtyRect& rc : src_target->m_dirty)
|
||||
{
|
||||
if (!rc.GetDirtyRect(m_cached_ctx.TEX0).rintersect(tr).rempty())
|
||||
if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(tr).rempty())
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1169,7 +1169,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
{
|
||||
for (auto& dirty : t->m_dirty)
|
||||
{
|
||||
GSVector4i dirty_rect = dirty.GetDirtyRect(t->m_TEX0);
|
||||
GSVector4i dirty_rect = dirty.GetDirtyRect(t->m_TEX0, true);
|
||||
if (!dirty_rect.rintersect(new_rect).rempty())
|
||||
{
|
||||
rect_clean = false;
|
||||
|
@ -2000,7 +2000,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
|
|||
|
||||
// Don't bother copying the old target in if the whole thing is dirty.
|
||||
if (dst->m_dirty.empty() || (~dst->m_dirty.GetDirtyChannels() & GSUtil::GetChannelMask(TEX0.PSM)) != 0 ||
|
||||
!dst->m_dirty.GetDirtyRect(0, TEX0, dst->GetUnscaledRect()).eq(dst->GetUnscaledRect()))
|
||||
!dst->m_dirty.GetDirtyRect(0, TEX0, dst->GetUnscaledRect(), false).eq(dst->GetUnscaledRect()))
|
||||
{
|
||||
// If the old target was cleared, simply propagate that through.
|
||||
if (dst_match->m_texture->GetState() == GSTexture::State::Cleared)
|
||||
|
@ -2508,7 +2508,7 @@ bool GSTextureCache::CopyRGBFromDepthToColor(Target* dst, Target* depth_src)
|
|||
for (u32 i = 0; i < dst->m_dirty.size(); i++)
|
||||
{
|
||||
GSDirtyRect& dr = dst->m_dirty[i];
|
||||
const GSVector4i drc = dr.GetDirtyRect(dst->m_TEX0);
|
||||
const GSVector4i drc = dr.GetDirtyRect(dst->m_TEX0, false);
|
||||
if (!drc.rintersect(clear_dirty_rc).rempty())
|
||||
{
|
||||
if ((dr.rgba._u32 &= ~0x7) == 0)
|
||||
|
@ -4785,7 +4785,7 @@ GSTexture* GSTextureCache::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVec
|
|||
bool is_dirty = false;
|
||||
for (GSDirtyRect& dirty : t->m_dirty)
|
||||
{
|
||||
if (!dirty.GetDirtyRect(t->m_TEX0).rintersect(clut_rc).rempty())
|
||||
if (!dirty.GetDirtyRect(t->m_TEX0, false).rintersect(clut_rc).rempty())
|
||||
{
|
||||
GL_INS("Dirty rectangle overlaps CLUT rectangle, skipping");
|
||||
is_dirty = true;
|
||||
|
@ -5414,34 +5414,41 @@ void GSTextureCache::Target::Update()
|
|||
const GSOffset off(g_gs_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM));
|
||||
for (size_t i = 0; i < m_dirty.size(); i++)
|
||||
{
|
||||
const GSVector4i r(m_dirty.GetDirtyRect(i, m_TEX0, total_rect));
|
||||
if (r.rempty())
|
||||
// Don't align the area we write to the target to the block size. If the format matches, the writes don't need
|
||||
// to be block aligned. We still read the whole thing in, because that's the granularity that ReadTexture
|
||||
// operates at, but discard those pixels when updating the framebuffer. Onimusha 2 does this dance where it
|
||||
// uploads the left 4 pixels to the middle of the image, then moves it to the left, and because we process
|
||||
// the move in hardware, local memory never gets updated, and thus is stale.
|
||||
const GSVector4i update_r = m_dirty.GetDirtyRect(i, m_TEX0, total_rect, false);
|
||||
if (update_r.rempty())
|
||||
continue;
|
||||
|
||||
const GSVector4i t_r(r - t_offset);
|
||||
const GSVector4i read_r = m_dirty.GetDirtyRect(i, m_TEX0, total_rect, true);
|
||||
const GSVector4i t_r(read_r - t_offset);
|
||||
if (mapped)
|
||||
{
|
||||
g_gs_renderer->m_mem.ReadTexture(
|
||||
off, r, m.bits + t_r.y * static_cast<u32>(m.pitch) + (t_r.x * sizeof(u32)), m.pitch, TEXA);
|
||||
off, read_r, m.bits + t_r.y * static_cast<u32>(m.pitch) + (t_r.x * sizeof(u32)), m.pitch, TEXA);
|
||||
}
|
||||
else
|
||||
{
|
||||
const int pitch = VectorAlign(r.width() * sizeof(u32));
|
||||
g_gs_renderer->m_mem.ReadTexture(off, r, s_unswizzle_buffer, pitch, TEXA);
|
||||
const int pitch = VectorAlign(read_r.width() * sizeof(u32));
|
||||
g_gs_renderer->m_mem.ReadTexture(off, read_r, s_unswizzle_buffer, pitch, TEXA);
|
||||
|
||||
t->Update(t_r, s_unswizzle_buffer, pitch);
|
||||
}
|
||||
|
||||
GSDevice::MultiStretchRect& drect = drects[ndrects++];
|
||||
drect.src = t;
|
||||
drect.src_rect = GSVector4(r - t_offset) / t_sizef;
|
||||
drect.dst_rect = GSVector4(r) * GSVector4(m_scale);
|
||||
drect.src_rect = GSVector4(update_r - t_offset) / t_sizef;
|
||||
drect.dst_rect = GSVector4(update_r) * GSVector4(m_scale);
|
||||
drect.linear = linear && (m_dirty[i].req_linear || override_linear);
|
||||
|
||||
// Copy the new GS memory content into the destination texture.
|
||||
if (m_type == RenderTarget)
|
||||
{
|
||||
GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW, r.x, r.y, r.z, r.w);
|
||||
GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW,
|
||||
update_r.x, update_r.y, update_r.z, update_r.w);
|
||||
drect.wmask = static_cast<u8>(m_dirty[i].rgba._u32);
|
||||
}
|
||||
else if (m_type == DepthStencil)
|
||||
|
@ -5498,7 +5505,7 @@ void GSTextureCache::Target::UpdateIfDirtyIntersects(const GSVector4i& rc)
|
|||
|
||||
for (auto& dirty : m_dirty)
|
||||
{
|
||||
const GSVector4i dirty_rc(dirty.GetDirtyRect(m_TEX0));
|
||||
const GSVector4i dirty_rc(dirty.GetDirtyRect(m_TEX0, false));
|
||||
if (dirty_rc.rintersect(rc).rempty())
|
||||
continue;
|
||||
|
||||
|
|
Loading…
Reference in New Issue