mirror of https://github.com/PCSX2/pcsx2.git
GS-HW: Improve GS read target detection, avoid reading dirty targets.
This commit is contained in:
parent
264086e0aa
commit
5bb3d8e60d
|
@ -130,16 +130,23 @@ bool GSUtil::HasSharedBits(u32 spsm, const u32* RESTRICT ptr)
|
|||
return (ptr[spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
|
||||
}
|
||||
|
||||
// Pixels can NOT coexist in the same 32bits of space.
|
||||
// Example: Using PSMT8H or PSMT4HL/HH with CT24 would fail this check.
|
||||
bool GSUtil::HasSharedBits(u32 spsm, u32 dpsm)
|
||||
{
|
||||
return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
|
||||
}
|
||||
|
||||
// Pixels can NOT coexist in the same 32bits of space.
|
||||
// Example: Using PSMT8H or PSMT4HL/HH with CT24 would fail this check.
|
||||
// SBP and DBO must match.
|
||||
bool GSUtil::HasSharedBits(u32 sbp, u32 spsm, u32 dbp, u32 dpsm)
|
||||
{
|
||||
return ((sbp ^ dbp) | (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f)))) == 0;
|
||||
}
|
||||
|
||||
// Shares bit depths, only detects 16/24/32 bit formats.
|
||||
// 24/32bit cross compatible, 16bit compatbile with 16bit.
|
||||
bool GSUtil::HasCompatibleBits(u32 spsm, u32 dpsm)
|
||||
{
|
||||
return (s_maps.CompatibleBitsField[spsm][dpsm >> 5] & (1 << (dpsm & 0x1f))) != 0;
|
||||
|
|
|
@ -795,7 +795,37 @@ void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
{
|
||||
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", static_cast<int>(g_perfmon.GetFrame()), r.left, r.top, r.right, r.bottom, static_cast<int>(BITBLTBUF.DBP), static_cast<int>(BITBLTBUF.DPSM));
|
||||
|
||||
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r, eewrite);
|
||||
// This is gross, but if the EE write loops, we need to split it on the 2048 border.
|
||||
GSVector4i rect = r;
|
||||
bool loop_h = false;
|
||||
bool loop_w = false;
|
||||
if (r.w > 2048)
|
||||
{
|
||||
rect.w = 2048;
|
||||
loop_h = true;
|
||||
}
|
||||
if (r.z > 2048)
|
||||
{
|
||||
rect.z = 2048;
|
||||
loop_w = true;
|
||||
}
|
||||
if (loop_h || loop_w)
|
||||
{
|
||||
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), rect, eewrite);
|
||||
if (loop_h)
|
||||
{
|
||||
rect.y = 0;
|
||||
rect.w = r.w - 2048;
|
||||
}
|
||||
if (loop_w)
|
||||
{
|
||||
rect.x = 0;
|
||||
rect.z = r.w - 2048;
|
||||
}
|
||||
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), rect, eewrite);
|
||||
}
|
||||
else
|
||||
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r, eewrite);
|
||||
}
|
||||
|
||||
void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
|
||||
|
@ -805,6 +835,23 @@ void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
if (clut)
|
||||
return; // FIXME
|
||||
|
||||
u32 incoming_end = GSLocalMemory::m_psm[BITBLTBUF.SPSM].info.bn(r.z - 1, r.w - 1, BITBLTBUF.SBP, BITBLTBUF.SBW);
|
||||
std::vector<GSState::GSUploadQueue>::iterator iter = GSRendererHW::GetInstance()->m_draw_transfers.end();
|
||||
|
||||
// If the EE write overlaps the readback and was done since the last draw, there's no need to read it back.
|
||||
// Dog's life and Ratchet Gladiator do this.
|
||||
while (iter != GSRendererHW::GetInstance()->m_draw_transfers.begin())
|
||||
{
|
||||
--iter;
|
||||
u32 ee_write_end = GSLocalMemory::m_psm[iter->blit.DPSM].info.bn(iter->rect.z - 1, iter->rect.w - 1, iter->blit.DBP, iter->blit.DBW);
|
||||
// If the format, and location doesn't match, but also the upload is at least the size of the target, don't preload.
|
||||
if (iter->blit.DBP < incoming_end && GSUtil::HasSharedBits(iter->blit.DPSM, BITBLTBUF.SPSM) && ee_write_end > BITBLTBUF.SBP && iter->draw == s_n)
|
||||
{
|
||||
DevCon.Warning("Download from same draw as write address %x, skipping invalidation", BITBLTBUF.SBP);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
|
||||
}
|
||||
|
||||
|
|
|
@ -1494,18 +1494,31 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
|
|||
// (Busen0: Wizardry and Chaos Legion).
|
||||
// Also in a few games the below code ran the Grandia3 case when it shouldn't :p
|
||||
auto& rts = m_dst[RenderTarget];
|
||||
for (auto it = rts.rbegin(); it != rts.rend(); ++it) // Iterate targets from LRU to MRU.
|
||||
for (auto it = rts.rbegin(); it != rts.rend(); it++) // Iterate targets from LRU to MRU.
|
||||
{
|
||||
Target* t = *it;
|
||||
if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S)
|
||||
{
|
||||
if (!t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || t->m_age >= 30)
|
||||
const u32 read_start = GSLocalMemory::m_psm[psm].info.bn(r.x, r.y, bp, bw);
|
||||
// Check the offset of the read, if they're not pointing at or inside this texture, it's probably not what we want.
|
||||
const bool expecting_this_tex = (bp < t->m_TEX0.TBP0 && read_start >= t->m_TEX0.TBP0) || bp >= t->m_TEX0.TBP0;
|
||||
|
||||
if (!expecting_this_tex || !t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || t->m_age >= 30)
|
||||
continue;
|
||||
|
||||
const bool bpp_match = GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[psm].bpp;
|
||||
const bool format_match = (bp == t->m_TEX0.TBP0 && bw == t->m_TEX0.TBW && bpp_match);
|
||||
SurfaceOffsetKey sok;
|
||||
sok.elems[0].bp = bp;
|
||||
sok.elems[0].bw = bw;
|
||||
sok.elems[0].psm = psm;
|
||||
sok.elems[0].rect = r;
|
||||
sok.elems[1].bp = t->m_TEX0.TBP0;
|
||||
sok.elems[1].bw = t->m_TEX0.TBW;
|
||||
sok.elems[1].psm = t->m_TEX0.PSM;
|
||||
sok.elems[1].rect = t->m_valid;
|
||||
// Calculate the rect offset if the BP doesn't match.
|
||||
const GSVector4i targetr = (format_match) ? r.rintersect(t->m_valid) : ComputeSurfaceOffset(bp, bw, psm, r, t).b2a_offset;
|
||||
const GSVector4i targetr = (format_match) ? r.rintersect(t->m_valid) : ComputeSurfaceOffset(sok).b2a_offset;
|
||||
|
||||
// Some games like to offset their GS download memory addresses by
|
||||
// using overly big source Y position values.
|
||||
|
|
Loading…
Reference in New Issue