GS-HW: Improve GS read target detection, avoid reading dirty targets.

This commit is contained in:
refractionpcsx2 2023-02-23 10:12:35 +00:00
parent 264086e0aa
commit 5bb3d8e60d
3 changed files with 71 additions and 4 deletions

View File

@ -130,16 +130,23 @@ bool GSUtil::HasSharedBits(u32 spsm, const u32* RESTRICT ptr)
return (ptr[spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
}
// Pixels can NOT coexist in the same 32bits of space.
// Example: Using PSMT8H or PSMT4HL/HH with CT24 would fail this check.
bool GSUtil::HasSharedBits(u32 spsm, u32 dpsm)
{
return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
}
// Pixels can NOT coexist in the same 32bits of space.
// Example: Using PSMT8H or PSMT4HL/HH with CT24 would fail this check.
// SBP and DBO must match.
bool GSUtil::HasSharedBits(u32 sbp, u32 spsm, u32 dbp, u32 dpsm)
{
return ((sbp ^ dbp) | (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f)))) == 0;
}
// Shares bit depths, only detects 16/24/32 bit formats.
// 24/32bit cross compatible, 16bit compatbile with 16bit.
bool GSUtil::HasCompatibleBits(u32 spsm, u32 dpsm)
{
return (s_maps.CompatibleBitsField[spsm][dpsm >> 5] & (1 << (dpsm & 0x1f))) != 0;

View File

@ -795,7 +795,37 @@ void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
{
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", static_cast<int>(g_perfmon.GetFrame()), r.left, r.top, r.right, r.bottom, static_cast<int>(BITBLTBUF.DBP), static_cast<int>(BITBLTBUF.DPSM));
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r, eewrite);
// This is gross, but if the EE write loops, we need to split it on the 2048 border.
GSVector4i rect = r;
bool loop_h = false;
bool loop_w = false;
if (r.w > 2048)
{
rect.w = 2048;
loop_h = true;
}
if (r.z > 2048)
{
rect.z = 2048;
loop_w = true;
}
if (loop_h || loop_w)
{
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), rect, eewrite);
if (loop_h)
{
rect.y = 0;
rect.w = r.w - 2048;
}
if (loop_w)
{
rect.x = 0;
rect.z = r.w - 2048;
}
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), rect, eewrite);
}
else
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r, eewrite);
}
void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
@ -805,6 +835,23 @@ void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
if (clut)
return; // FIXME
u32 incoming_end = GSLocalMemory::m_psm[BITBLTBUF.SPSM].info.bn(r.z - 1, r.w - 1, BITBLTBUF.SBP, BITBLTBUF.SBW);
std::vector<GSState::GSUploadQueue>::iterator iter = GSRendererHW::GetInstance()->m_draw_transfers.end();
// If the EE write overlaps the readback and was done since the last draw, there's no need to read it back.
// Dog's life and Ratchet Gladiator do this.
while (iter != GSRendererHW::GetInstance()->m_draw_transfers.begin())
{
--iter;
u32 ee_write_end = GSLocalMemory::m_psm[iter->blit.DPSM].info.bn(iter->rect.z - 1, iter->rect.w - 1, iter->blit.DBP, iter->blit.DBW);
// If the format, and location doesn't match, but also the upload is at least the size of the target, don't preload.
if (iter->blit.DBP < incoming_end && GSUtil::HasSharedBits(iter->blit.DPSM, BITBLTBUF.SPSM) && ee_write_end > BITBLTBUF.SBP && iter->draw == s_n)
{
DevCon.Warning("Download from same draw as write address %x, skipping invalidation", BITBLTBUF.SBP);
return;
}
}
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
}

View File

@ -1494,18 +1494,31 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
// (Busen0: Wizardry and Chaos Legion).
// Also in a few games the below code ran the Grandia3 case when it shouldn't :p
auto& rts = m_dst[RenderTarget];
for (auto it = rts.rbegin(); it != rts.rend(); ++it) // Iterate targets from LRU to MRU.
for (auto it = rts.rbegin(); it != rts.rend(); it++) // Iterate targets from LRU to MRU.
{
Target* t = *it;
if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S)
{
if (!t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || t->m_age >= 30)
const u32 read_start = GSLocalMemory::m_psm[psm].info.bn(r.x, r.y, bp, bw);
// Check the offset of the read, if they're not pointing at or inside this texture, it's probably not what we want.
const bool expecting_this_tex = (bp < t->m_TEX0.TBP0 && read_start >= t->m_TEX0.TBP0) || bp >= t->m_TEX0.TBP0;
if (!expecting_this_tex || !t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || t->m_age >= 30)
continue;
const bool bpp_match = GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[psm].bpp;
const bool format_match = (bp == t->m_TEX0.TBP0 && bw == t->m_TEX0.TBW && bpp_match);
SurfaceOffsetKey sok;
sok.elems[0].bp = bp;
sok.elems[0].bw = bw;
sok.elems[0].psm = psm;
sok.elems[0].rect = r;
sok.elems[1].bp = t->m_TEX0.TBP0;
sok.elems[1].bw = t->m_TEX0.TBW;
sok.elems[1].psm = t->m_TEX0.PSM;
sok.elems[1].rect = t->m_valid;
// Calculate the rect offset if the BP doesn't match.
const GSVector4i targetr = (format_match) ? r.rintersect(t->m_valid) : ComputeSurfaceOffset(bp, bw, psm, r, t).b2a_offset;
const GSVector4i targetr = (format_match) ? r.rintersect(t->m_valid) : ComputeSurfaceOffset(sok).b2a_offset;
// Some games like to offset their GS download memory addresses by
// using overly big source Y position values.