GS: Improve upload TEX overwrite detection

This commit is contained in:
refractionpcsx2 2022-10-20 18:33:17 +01:00
parent d84d4cec4f
commit 32d1105833
4 changed files with 57 additions and 26 deletions

View File

@ -110,7 +110,15 @@ void GSClut::Invalidate()
void GSClut::InvalidateRange(u32 start_block, u32 end_block) void GSClut::InvalidateRange(u32 start_block, u32 end_block)
{ {
if (m_write.TEX0.CBP >= start_block && m_write.TEX0.CBP <= end_block) int blocks = 4;
if (GSLocalMemory::m_psm[m_write.TEX0.CPSM].bpp == 16)
blocks >>= 1;
if (GSLocalMemory::m_psm[m_write.TEX0.PSM].bpp == 4)
blocks >>= 1;
if ((m_write.TEX0.CBP + blocks) >= start_block && m_write.TEX0.CBP <= end_block)
{ {
m_write.dirty = true; m_write.dirty = true;
} }

View File

@ -531,6 +531,17 @@ public:
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0); std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
static u32 GetEndBlock(int bp, int bw, int w, int h, int psm)
{
const GSLocalMemory::psm_t& dpsm = GSLocalMemory::m_psm[psm];
const int page_width = std::max(1, w / dpsm.pgs.x);
const int page_height = std::max(1, h / dpsm.pgs.y);
const int pitch = (std::max(1, bw) * 64) / dpsm.pgs.x;
const u32 end_bp = bp + ((((page_height % dpsm.pgs.y) != 0) ? (page_width << 5) : 0) + ((page_height * pitch) << 5));
return end_bp;
}
// address // address
static u32 BlockNumber32(int x, int y, u32 bp, u32 bw) static u32 BlockNumber32(int x, int y, u32 bp, u32 bw)

View File

@ -653,6 +653,9 @@ void GSState::DumpVertices(const std::string& filename)
case GSFlushReason::UPLOADDIRTYTEX: case GSFlushReason::UPLOADDIRTYTEX:
file << "GS UPLOAD OVERWRITES CURRENT TEXTURE OR CLUT"; file << "GS UPLOAD OVERWRITES CURRENT TEXTURE OR CLUT";
break; break;
case GSFlushReason::LOCALTOLOCALMOVE:
file << "GS LOCAL TO LOCAL OVERWRITES CURRENT TEXTURE OR CLUT";
break;
case GSFlushReason::DOWNLOADFIFO: case GSFlushReason::DOWNLOADFIFO:
file << "DOWNLOAD FIFO"; file << "DOWNLOAD FIFO";
break; break;
@ -2002,13 +2005,20 @@ void GSState::Write(const u8* mem, int len)
if (!m_tr.Update(w, h, psm.trbpp, len)) if (!m_tr.Update(w, h, psm.trbpp, len))
return; return;
// TODO: Not really sufficient if a partial texture update is done outside the block.
// No need to check CLUT here, we can invalidate it below, no need to flush it since TEX0 needs to update, then we can flush.
// Only flush on a NEW transfer if a pending one is using the same address. GIFRegTEX0& prev_tex0 = m_prev_env.CTXT[m_prev_env.PRIM.CTXT].TEX0;
// Check Fast & Furious (Hardare mode) and Assault Suits Valken (either renderer).
if (m_tr.end == 0 && m_index.tail > 0 && m_prev_env.PRIM.TME && const u32 write_end_bp = GSLocalMemory::GetEndBlock(blit.DBP, blit.DBW, w + static_cast<int>(m_env.TRXPOS.DSAX), h + static_cast<int>(m_env.TRXPOS.DSAY), blit.DPSM);
(blit.DBP == m_prev_env.CTXT[m_prev_env.PRIM.CTXT].TEX0.TBP0 || blit.DBP == m_prev_env.CTXT[m_prev_env.PRIM.CTXT].TEX0.CBP)) const u32 tex_end_bp = GSLocalMemory::GetEndBlock(prev_tex0.TBP0, prev_tex0.TBW, 1 << prev_tex0.TW, 1 << prev_tex0.TH, prev_tex0.PSM);
// Only flush on a NEW transfer if a pending one is using the same address or overlap.
// Check Fast & Furious (Hardare mode) and Assault Suits Valken (either renderer) and Tomb Raider - Angel of Darkness menu (TBP != DBP but overlaps).
if (m_tr.end == 0 && m_index.tail > 0 && m_prev_env.PRIM.TME && write_end_bp >= prev_tex0.TBP0 && blit.DBP <= tex_end_bp)
{
Flush(GSFlushReason::UPLOADDIRTYTEX); Flush(GSFlushReason::UPLOADDIRTYTEX);
}
// Invalid the CLUT if it crosses paths.
m_mem.m_clut.InvalidateRange(blit.DBP, write_end_bp);
GL_CACHE("Write! ... => 0x%x W:%d F:%s (DIR %d%d), dPos(%d %d) size(%d %d)", GL_CACHE("Write! ... => 0x%x W:%d F:%s (DIR %d%d), dPos(%d %d) size(%d %d)",
blit.DBP, blit.DBW, psm_str(blit.DPSM), blit.DBP, blit.DBW, psm_str(blit.DPSM),
@ -2042,13 +2052,6 @@ void GSState::Write(const u8* mem, int len)
if (m_tr.end >= m_tr.total) if (m_tr.end >= m_tr.total)
FlushWrite(); FlushWrite();
} }
const int page_width = std::max(1, ((w + static_cast<int>(m_env.TRXPOS.DSAX)) / psm.pgs.x));
const int page_height = std::max(1, ((h + static_cast<int>(m_env.TRXPOS.DSAY)) / psm.pgs.y));
const int pitch = (std::max(1U, blit.DBW) * 64) / psm.pgs.x;
const u32 end_bp = blit.DBP + ((((page_height % psm.pgs.y) != 0) ? (page_width << 5) : 0) + ((page_height * pitch) << 5));
// Try to avoid flushing draws if it doesn't cross paths
m_mem.m_clut.InvalidateRange(blit.DBP, end_bp);
} }
void GSState::InitReadFIFO(u8* mem, int len) void GSState::InitReadFIFO(u8* mem, int len)
@ -2151,6 +2154,20 @@ void GSState::Move()
const GSOffset spo = m_mem.GetOffset(sbp, sbw, m_env.BITBLTBUF.SPSM); const GSOffset spo = m_mem.GetOffset(sbp, sbw, m_env.BITBLTBUF.SPSM);
const GSOffset dpo = m_mem.GetOffset(dbp, dbw, m_env.BITBLTBUF.DPSM); const GSOffset dpo = m_mem.GetOffset(dbp, dbw, m_env.BITBLTBUF.DPSM);
GIFRegTEX0& prev_tex0 = m_prev_env.CTXT[m_prev_env.PRIM.CTXT].TEX0;
const u32 end_bp = GSLocalMemory::GetEndBlock(dbp, dbw, w + static_cast<int>(m_env.TRXPOS.DSAX), h + static_cast<int>(m_env.TRXPOS.DSAY), m_env.BITBLTBUF.DPSM);
const u32 tex_end_bp = GSLocalMemory::GetEndBlock(prev_tex0.TBP0, prev_tex0.TBW, 1 << prev_tex0.TW, 1 << prev_tex0.TH, prev_tex0.PSM);
// Only flush on a NEW transfer if a pending one is using the same address or overlap.
// Unknown if games use this one, but best to be safe.
if (m_index.tail > 0 && m_prev_env.PRIM.TME && end_bp >= prev_tex0.TBP0 && dbp <= static_cast<int>(tex_end_bp))
{
Flush(GSFlushReason::LOCALTOLOCALMOVE);
}
// Invalid the CLUT if it crosses paths.
m_mem.m_clut.InvalidateRange(dbp, end_bp);
auto genericCopy = [=](const GSOffset& dpo, const GSOffset& spo, auto&& getPAHelper, auto&& pxCopyFn) auto genericCopy = [=](const GSOffset& dpo, const GSOffset& spo, auto&& getPAHelper, auto&& pxCopyFn)
{ {
int _sy = sy, _dy = dy; // Faster with local copied variables, compiler optimizations are dumb int _sy = sy, _dy = dy; // Faster with local copied variables, compiler optimizations are dumb
@ -2292,12 +2309,6 @@ void GSState::Move()
(m_mem.*dpsm.wpa)(doff, (m_mem.*spsm.rpa)(soff)); (m_mem.*dpsm.wpa)(doff, (m_mem.*spsm.rpa)(soff));
}); });
} }
const int page_width = std::max(1, ((w + static_cast<int>(m_env.TRXPOS.DSAX)) / dpsm.pgs.x));
const int page_height = std::max(1, ((h + static_cast<int>(m_env.TRXPOS.DSAY)) / dpsm.pgs.y));
const int pitch = (std::max(1, dbw) * 64) / dpsm.pgs.x;
const u32 end_bp = dbp + ((((page_height % dpsm.pgs.y) != 0) ? (page_width << 5) : 0) + ((page_height * pitch) << 5));
// Try to avoid flushing draws if it doesn't cross paths
m_mem.m_clut.InvalidateRange(dbp, end_bp);
} }
void GSState::SoftReset(u32 mask) void GSState::SoftReset(u32 mask)

View File

@ -283,12 +283,13 @@ public:
TEXFLUSH = 1 << 4, TEXFLUSH = 1 << 4,
GSTRANSFER = 1 << 5, GSTRANSFER = 1 << 5,
UPLOADDIRTYTEX = 1 << 6, UPLOADDIRTYTEX = 1 << 6,
DOWNLOADFIFO = 1 << 7, LOCALTOLOCALMOVE = 1 << 7,
SAVESTATE = 1 << 8, DOWNLOADFIFO = 1 << 8,
LOADSTATE = 1 << 9, SAVESTATE = 1 << 9,
AUTOFLUSH = 1 << 10, LOADSTATE = 1 << 10,
VSYNC = 1 << 11, AUTOFLUSH = 1 << 11,
GSREOPEN = 1 << 12, VSYNC = 1 << 12,
GSREOPEN = 1 << 13,
}; };
GSFlushReason m_state_flush_reason; GSFlushReason m_state_flush_reason;