diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index b243b41d6b..ac49c08f4f 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -1766,7 +1766,15 @@ void GSState::Write(const u8* mem, int len) r.bottom = r.top + m_env.TRXREG.RRH; // Store the transfer for preloading new RT's. - if (m_draw_transfers.size() == 0 || (m_draw_transfers.size() > 0 && blit.DBP != m_draw_transfers.back().blit.DBP)) + if ((m_draw_transfers.size() > 0 && blit.DBP == m_draw_transfers.back().blit.DBP)) + { + // Same BP, let's update the rect. + GSUploadQueue transfer = m_draw_transfers.back(); + m_draw_transfers.pop_back(); + transfer.rect = transfer.rect.runion(r); + m_draw_transfers.push_back(transfer); + } + else { GSUploadQueue new_transfer = { blit, r, s_n }; m_draw_transfers.push_back(new_transfer); @@ -1915,16 +1923,22 @@ void GSState::Move() Flush(GSFlushReason::LOCALTOLOCALMOVE); } + GSVector4i r; + r.left = m_env.TRXPOS.DSAX; + r.top = m_env.TRXPOS.DSAY; + r.right = r.left + m_env.TRXREG.RRW; + r.bottom = r.top + m_env.TRXREG.RRH; // Store the transfer for preloading new RT's. - if (m_draw_transfers.size() == 0 || (m_draw_transfers.size() > 0 && dbp != m_draw_transfers.back().blit.DBP)) + if ((m_draw_transfers.size() > 0 && m_env.BITBLTBUF.DBP == m_draw_transfers.back().blit.DBP)) + { + // Same BP, let's update the rect. + GSUploadQueue transfer = m_draw_transfers.back(); + m_draw_transfers.pop_back(); + transfer.rect = transfer.rect.runion(r); + m_draw_transfers.push_back(transfer); + } + else { - GSVector4i r; - - r.left = m_env.TRXPOS.DSAX; - r.top = m_env.TRXPOS.DSAY; - r.right = r.left + m_env.TRXREG.RRW; - r.bottom = r.top + m_env.TRXREG.RRH; - GSUploadQueue new_transfer = { m_env.BITBLTBUF, r, s_n }; m_draw_transfers.push_back(new_transfer); } diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index c6f3ec4ef3..a49475a96b 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -834,24 +834,51 @@ void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS if (clut) return; // FIXME - u32 incoming_end = GSLocalMemory::m_psm[BITBLTBUF.SPSM].info.bn(r.z - 1, r.w - 1, BITBLTBUF.SBP, BITBLTBUF.SBW); + const u32 incoming_end = GSLocalMemory::m_psm[BITBLTBUF.SPSM].info.bn(r.z - 1, r.w - 1, BITBLTBUF.SBP, BITBLTBUF.SBW); std::vector::iterator iter = GSRendererHW::GetInstance()->m_draw_transfers.end(); + bool skip = false; // If the EE write overlaps the readback and was done since the last draw, there's no need to read it back. // Dog's life and Ratchet Gladiator do this. while (iter != GSRendererHW::GetInstance()->m_draw_transfers.begin()) { --iter; - u32 ee_write_end = GSLocalMemory::m_psm[iter->blit.DPSM].info.bn(iter->rect.z - 1, iter->rect.w - 1, iter->blit.DBP, iter->blit.DBW); + + if (!GSUtil::HasSharedBits(iter->blit.DPSM, BITBLTBUF.SPSM) || iter->draw != s_n) + continue; + + // Make sure write covers the read area. + const u32 ee_write_end = GSLocalMemory::m_psm[iter->blit.DPSM].info.bn(iter->rect.z - 1, iter->rect.w - 1, iter->blit.DBP, iter->blit.DBW); + if (!(iter->blit.DBP < incoming_end && ee_write_end > BITBLTBUF.SBP)) + continue; + + GSTextureCache::SurfaceOffsetKey sok; + sok.elems[0].bp = BITBLTBUF.SBP; + sok.elems[0].bw = BITBLTBUF.SBW; + sok.elems[0].psm = BITBLTBUF.SPSM; + sok.elems[0].rect = r; + sok.elems[1].bp = iter->blit.DBP; + sok.elems[1].bw = iter->blit.DBW; + sok.elems[1].psm = iter->blit.DPSM; + sok.elems[1].rect = iter->rect; + + // Calculate the rect offset if the BP doesn't match. + const GSVector4i targetr = GSUtil::HasCompatibleBits(iter->blit.DPSM, BITBLTBUF.SPSM) ? r : m_tc->ComputeSurfaceOffset(sok).b2a_offset; + + // Possibly incompatible or missed, we don't know, so let's assume it's a fail. + if (targetr.rempty()) + continue; + + //u32 ee_write_end = GSLocalMemory::m_psm[iter->blit.DPSM].info.bn(iter->rect.z - 1, iter->rect.w - 1, iter->blit.DBP, iter->blit.DBW); // If the format, and location doesn't match, but also the upload is at least the size of the target, don't preload. - if (iter->blit.DBP < incoming_end && GSUtil::HasSharedBits(iter->blit.DPSM, BITBLTBUF.SPSM) && ee_write_end > BITBLTBUF.SBP && iter->draw == s_n) + if (iter->rect.rintersect(targetr).eq(targetr)) { - //DevCon.Warning("Download from same draw as write address %x, skipping invalidation", BITBLTBUF.SBP); - return; + skip = true; } } - m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); + if(!skip) + m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); } void GSRendererHW::Move() diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 7b26fb7cb4..921c3cba70 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -833,27 +833,56 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con { const bool forced_preload = GSRendererHW::GetInstance()->m_force_preload > 0; const GSVector4i newrect = GSVector4i(0, 0, real_w, real_h); + const u32 rect_end = GSLocalMemory::m_psm[TEX0.PSM].info.bn(newrect.z - 1, newrect.w - 1, TEX0.TBP0, TEX0.TBW); if (!is_frame && !forced_preload && !preload) { std::vector::iterator iter; + GSVector4i eerect = GSVector4i::zero(); for (iter = GSRendererHW::GetInstance()->m_draw_transfers.begin(); iter != GSRendererHW::GetInstance()->m_draw_transfers.end(); ) { - // If the format, and location doesn't match, but also the upload is at least the size of the target, don't preload. - if (iter->blit.DBP == TEX0.TBP0 && GSUtil::HasCompatibleBits(iter->blit.DPSM, TEX0.PSM) && iter->rect.rintersect(newrect).eq(newrect)) + // If the format, and location doesn't overlap + if (iter->blit.DBP >= TEX0.TBP0 && iter->blit.DBP <= rect_end && GSUtil::HasCompatibleBits(iter->blit.DPSM, TEX0.PSM)) { - GSRendererHW::GetInstance()->m_draw_transfers.erase(iter); - GL_INS("Preloading the RT DATA"); - AddDirtyRectTarget(dst, newrect, TEX0.PSM, TEX0.TBW); - dst->Update(true); - break; + GSTextureCache::SurfaceOffsetKey sok; + sok.elems[0].bp = iter->blit.DBP; + sok.elems[0].bw = iter->blit.DBW; + sok.elems[0].psm = iter->blit.DPSM; + sok.elems[0].rect = iter->rect; + sok.elems[1].bp = TEX0.TBP0; + sok.elems[1].bw = TEX0.TBW; + sok.elems[1].psm = TEX0.PSM; + sok.elems[1].rect = newrect; + + // Calculate the rect offset if the BP doesn't match. + const GSVector4i targetr = (iter->blit.DBP == TEX0.TBP0 && GSUtil::HasCompatibleBits(iter->blit.DPSM, TEX0.PSM)) ? iter->rect : ComputeSurfaceOffset(sok).b2a_offset; + if (eerect.rempty()) + eerect = targetr; + else + eerect = eerect.runion(targetr); + + iter = GSRendererHW::GetInstance()->m_draw_transfers.erase(iter); + + if (eerect.rintersect(newrect).eq(newrect)) + break; + else + continue; } iter++; } + + if (!eerect.rempty()) + { + GL_INS("Preloading the RT DATA"); + dst->UpdateValidity(eerect); + AddDirtyRectTarget(dst, eerect, TEX0.PSM, TEX0.TBW); + dst->Update(true); + } } else { GL_INS("Preloading the RT DATA"); + dst->UpdateValidity(newrect); AddDirtyRectTarget(dst, newrect, TEX0.PSM, TEX0.TBW); dst->Update(true); } @@ -1441,6 +1470,9 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r const u32 bp = off.bp(); const u32 psm = off.psm(); [[maybe_unused]] const u32 bw = off.bw(); + const u32 read_start = GSLocalMemory::m_psm[psm].info.bn(r.x, r.y, bp, bw); + const u32 read_end = GSLocalMemory::m_psm[psm].info.bn(r.x, r.y, bp, bw); + const bool read_paltex = GSLocalMemory::m_psm[psm].pal > 0; GL_CACHE("TC: InvalidateLocalMem off(0x%x, %u, %s) r(%d, %d => %d, %d)", bp, @@ -1468,7 +1500,9 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r { Target* t = *it; - if (!t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || t->m_age >= 30) + // Check the offset of the read, if they're not pointing at or inside this texture, it's probably not what we want. + const bool expecting_this_tex = ((bp < t->m_TEX0.TBP0 && read_start >= t->m_TEX0.TBP0) || bp >= t->m_TEX0.TBP0) && read_end <= t->m_end_block; + if (!expecting_this_tex || !t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || ((bp != t->m_TEX0.TBP0) && !GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))) continue; const bool bpp_match = GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[psm].bpp; @@ -1496,11 +1530,29 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r Target* t = *it; if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S) { - const u32 read_start = GSLocalMemory::m_psm[psm].info.bn(r.x, r.y, bp, bw); - // Check the offset of the read, if they're not pointing at or inside this texture, it's probably not what we want. - const bool expecting_this_tex = (bp < t->m_TEX0.TBP0 && read_start >= t->m_TEX0.TBP0) || bp >= t->m_TEX0.TBP0; + // propagate the format from the result of a channel effect + // texture is 16/8 bit but the real data is 32 + // common use for shuffling is moving data into the alpha channel + // the game can then draw using 8H format + // in the case of silent hill blit 8H -> 8P + // this will matter later when the data ends up in GS memory in the wrong format + // Be careful to avoid 24 bit textures which are technically 32bit, as you could lose alpha (8H) data. + if (t->m_32_bits_fmt && t->m_TEX0.PSM > PSM_PSMCT24) + t->m_TEX0.PSM = PSM_PSMCT32; - if (!expecting_this_tex || !t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || t->m_age >= 30) + // Check the offset of the read, if they're not pointing at or inside this texture, it's probably not what we want. + const bool expecting_this_tex = ((bp < t->m_TEX0.TBP0 && read_start >= t->m_TEX0.TBP0) || bp >= t->m_TEX0.TBP0) && read_end <= t->m_end_block; + const bool target_paltex = GSLocalMemory::m_psm[t->m_TEX0.PSM].pal > 0; + // Only allow an indexed format on a 32bit colour, if it's alpha channel. + const bool alpha_read = t->m_TEX0.PSM == PSM_PSMCT32 && psm >= PSM_PSMT8H; + + // Okay this is a nightmare of a check, so these are the conditions: + // 1. Check if it's expecting this texture, so the read must be inside this texture, even if the BP doesn't match. + // 2. It must overlap (okay maybe redundant). + // 3. If it's a paltex (indexed format), the target must also be paltex, unless it's only reading the alpha channel. + // 4. They share bits in some capacity. + // 5. If the BP doesn't match, make sure the formats are compatible, at very least (like CT32 + CT24 for example). + if (!expecting_this_tex || !t->Overlaps(bp, bw, psm, r) || (read_paltex != target_paltex && !alpha_read) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || (bp != t->m_TEX0.TBP0 && !GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))) continue; const bool bpp_match = GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[psm].bpp; @@ -1514,15 +1566,10 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r sok.elems[1].bw = t->m_TEX0.TBW; sok.elems[1].psm = t->m_TEX0.PSM; sok.elems[1].rect = t->m_valid; - // Calculate the rect offset if the BP doesn't match. - const GSVector4i targetr = (format_match) ? r.rintersect(t->m_valid) : ComputeSurfaceOffset(sok).b2a_offset; - // Some games like to offset their GS download memory addresses by - // using overly big source Y position values. - // Checking for targets that overlap with the requested memory region - // instead of just comparing TBPs should fix that. - // For example, this fixes Judgement ring rendering in Shadow Hearts 2. - // Be wary of old targets being misdetected, set a sensible range of 30 frames (like Display source lookups). + // Calculate the rect offset if the BP doesn't match. + const GSVector4i targetr = GSVector4i((format_match) ? r.rintersect(t->m_valid) : ComputeSurfaceOffset(sok).b2a_offset).rintersect(t->m_drawn_since_read); + if (!targetr.rempty()) { // GH Note: Read will do a StretchRect and then will sizzle data to the GS memory @@ -1537,21 +1584,10 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r // Read(t, t->m_valid) works in all tested games but is very slow in GUST titles >< // Update: 18/02/2023: Chaos legion breaks because it reads the width at half of the real width. // Surface offset deals with this. - // If the game has been spamming downloads, we've already read the whole texture back at this point. - if (t->m_drawn_since_read.rempty()) + if (t->m_drawn_since_read.rempty() || !t->m_dirty.empty()) continue; - // propagate the format from the result of a channel effect - // texture is 16/8 bit but the real data is 32 - // common use for shuffling is moving data into the alpha channel - // the game can then draw using 8H format - // in the case of silent hill blit 8H -> 8P - // this will matter later when the data ends up in GS memory in the wrong format - // Be careful to avoid 24 bit textures which are technically 32bit, as you could lose alpha (8H) data. - if (t->m_32_bits_fmt && t->m_TEX0.PSM > PSM_PSMCT24) - t->m_TEX0.PSM = PSM_PSMCT32; - if (GSConfig.HWDownloadMode != GSHardwareDownloadMode::Enabled) { const GSVector4i rb_rc((!GSConfig.UserHacks_DisablePartialInvalidation && targetr.x == 0 && targetr.y == 0) ? t->m_valid : targetr.rintersect(t->m_valid)); @@ -1569,33 +1605,30 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r else if(!targetr.rintersect(t->m_drawn_since_read).rempty()) // Block level read? { // Read the width of the draw, reading too much could wipe out dirty memory. - GSVector4i full_lines = GSVector4i(0, targetr.y, t->m_drawn_since_read.z, targetr.w); - full_lines = targetr.rintersect(t->m_drawn_since_read); - - Read(t, full_lines); + Read(t, targetr); // After reading, try to cut down our "dirty" rect. - if (full_lines.rintersect(t->m_drawn_since_read).eq(t->m_drawn_since_read)) + if (targetr.rintersect(t->m_drawn_since_read).eq(t->m_drawn_since_read)) t->m_drawn_since_read = GSVector4i::zero(); else { // Try to cut down how much we read next, if we can. // Fatal Frame reads in vertical strips, SOCOM 2 does horizontal, so we can handle that below. - if (full_lines.width() == t->m_drawn_since_read.width() - && full_lines.w >= t->m_drawn_since_read.y) + if (targetr.width() == t->m_drawn_since_read.width() + && targetr.w >= t->m_drawn_since_read.y) { - if (full_lines.y <= t->m_drawn_since_read.y) - t->m_drawn_since_read.y = full_lines.w; - else if (full_lines.w >= t->m_drawn_since_read.w) - t->m_drawn_since_read.w = full_lines.y; + if (targetr.y <= t->m_drawn_since_read.y) + t->m_drawn_since_read.y = targetr.w; + else if (targetr.w >= t->m_drawn_since_read.w) + t->m_drawn_since_read.w = targetr.y; } - else if (full_lines.height() == t->m_drawn_since_read.height() - && full_lines.z >= t->m_drawn_since_read.x) + else if (targetr.height() == t->m_drawn_since_read.height() + && targetr.z >= t->m_drawn_since_read.x) { - if (full_lines.x <= t->m_drawn_since_read.x) - t->m_drawn_since_read.x = full_lines.z; - else if (full_lines.z >= t->m_drawn_since_read.z) - t->m_drawn_since_read.z = full_lines.x; + if (targetr.x <= t->m_drawn_since_read.x) + t->m_drawn_since_read.x = targetr.z; + else if (targetr.z >= t->m_drawn_since_read.z) + t->m_drawn_since_read.z = targetr.x; } } t->readbacks_since_draw++;