diff --git a/pcsx2/GS/GSUtil.cpp b/pcsx2/GS/GSUtil.cpp index b195f0b196..0a1e31c9e5 100644 --- a/pcsx2/GS/GSUtil.cpp +++ b/pcsx2/GS/GSUtil.cpp @@ -152,6 +152,22 @@ bool GSUtil::HasCompatibleBits(u32 spsm, u32 dpsm) return (s_maps.CompatibleBitsField[spsm][dpsm >> 5] & (1 << (dpsm & 0x1f))) != 0; } +u32 GSUtil::GetChannelMask(u32 spsm) +{ + switch (spsm) + { + case PSM_PSMCT24: + case PSM_PSMZ24: + return 0x7; + case PSM_PSMT8H: + case PSM_PSMT4HH: // This sucks, I'm sorry, but we don't have a way to do half channels + case PSM_PSMT4HL: // So uuhh TODO I guess. + return 0x8; + default: + return 0xf; + } +} + CRCHackLevel GSUtil::GetRecommendedCRCHackLevel(GSRendererType type) { return (type == GSRendererType::DX11 || type == GSRendererType::DX12) ? CRCHackLevel::Full : CRCHackLevel::Partial; diff --git a/pcsx2/GS/GSUtil.h b/pcsx2/GS/GSUtil.h index 3c1e228112..47d4e7f80b 100644 --- a/pcsx2/GS/GSUtil.h +++ b/pcsx2/GS/GSUtil.h @@ -32,6 +32,7 @@ public: static bool HasSharedBits(u32 spsm, u32 dpsm); static bool HasSharedBits(u32 sbp, u32 spsm, u32 dbp, u32 dpsm); static bool HasCompatibleBits(u32 spsm, u32 dpsm); + static u32 GetChannelMask(u32 spsm); static CRCHackLevel GetRecommendedCRCHackLevel(GSRendererType type); static GSRendererType GetPreferredRenderer(); diff --git a/pcsx2/GS/Renderers/Common/GSDirtyRect.cpp b/pcsx2/GS/Renderers/Common/GSDirtyRect.cpp index b7ff27d06f..2a19ee08b2 100644 --- a/pcsx2/GS/Renderers/Common/GSDirtyRect.cpp +++ b/pcsx2/GS/Renderers/Common/GSDirtyRect.cpp @@ -20,14 +20,16 @@ GSDirtyRect::GSDirtyRect() : r(GSVector4i::zero()), psm(PSM_PSMCT32), - bw(1) + bw(1), + rgba({}) { } -GSDirtyRect::GSDirtyRect(GSVector4i& r, u32 psm, u32 bw) : +GSDirtyRect::GSDirtyRect(GSVector4i& r, u32 psm, u32 bw, RGBAMask rgba) : r(r), psm(psm), - bw(bw) + bw(bw), + rgba(rgba) { } @@ -73,6 +75,21 @@ GSVector4i GSDirtyRectList::GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size return GSVector4i::zero(); } +u32 GSDirtyRectList::GetDirtyChannels() +{ + u32 channels = 0; + + if (!empty()) + { + for (auto& dirty_rect : *this) + { + channels |= dirty_rect.rgba._u32; + } + } + + return channels; +} + GSVector4i GSDirtyRectList::GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp) const { const GSVector4i r = (*this)[index].GetDirtyRect(TEX0); diff --git a/pcsx2/GS/Renderers/Common/GSDirtyRect.h b/pcsx2/GS/Renderers/Common/GSDirtyRect.h index 150fa9606b..1933582f1c 100644 --- a/pcsx2/GS/Renderers/Common/GSDirtyRect.h +++ b/pcsx2/GS/Renderers/Common/GSDirtyRect.h @@ -17,15 +17,28 @@ #include "GS/GSLocalMemory.h" +union RGBAMask +{ + struct + { + u32 r : 1; + u32 g : 1; + u32 b : 1; + u32 a : 1; + } c; + u32 _u32; +}; + class GSDirtyRect { public: GSVector4i r; u32 psm; u32 bw; + RGBAMask rgba; GSDirtyRect(); - GSDirtyRect(GSVector4i& r, u32 psm, u32 bw); + GSDirtyRect(GSVector4i& r, u32 psm, u32 bw, RGBAMask rgba); GSVector4i GetDirtyRect(GIFRegTEX0 TEX0) const; }; @@ -34,5 +47,6 @@ class GSDirtyRectList : public std::vector public: GSDirtyRectList() {} GSVector4i GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size) const; + u32 GetDirtyChannels(); GSVector4i GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp) const; }; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 921c3cba70..6c78f5f0b6 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -83,15 +83,16 @@ void GSTextureCache::RemoveAll() m_surface_offset_cache.clear(); } -void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm, u32 bw) +void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm, u32 bw, RGBAMask rgba) { bool skipdirty = false; bool canskip = true; + std::vector::iterator it = target->m_dirty.end(); while (it != target->m_dirty.begin()) { --it; - if (it[0].bw == bw && it[0].psm == psm) + if (it[0].bw == bw && it[0].psm == psm && it[0].rgba._u32 == rgba._u32) { if (it[0].r.rintersect(rect).eq(rect) && canskip) { @@ -112,7 +113,7 @@ void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm } if (!skipdirty) - target->m_dirty.push_back(GSDirtyRect(rect, psm, bw)); + target->m_dirty.push_back(GSDirtyRect(rect, psm, bw, rgba)); } GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, bool palette) @@ -382,7 +383,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con // Solution: consider the RT as 32 bits if the alpha was used in the past const u32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM; - const bool t_clean = t->m_dirty.empty(); + const u32 channels = t->m_dirty.GetDirtyChannels() & GSUtil::GetChannelMask(psm); + // If not all channels are clean/dirty, we need to update the target. + if (channels != 0 && channels != GSUtil::GetChannelMask(psm)) + t->Update(false); + const bool t_clean = (t->m_dirty.GetDirtyChannels() & GSUtil::GetChannelMask(psm)) == 0; const bool t_wraps = t->m_end_block > GSTextureCache::MAX_BP; // Match if we haven't already got a tex in rt @@ -473,6 +478,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con if (tex_in_rt) { GSVector2i size_delta = { ((x_offset + (1 << TEX0.TW)) - dst->m_valid.z), ((y_offset + (1 << TEX0.TH)) - dst->m_valid.w) }; + RGBAMask rgba; + rgba._u32 = GSUtil::GetChannelMask(psm); if (size_delta.x > 0) { @@ -483,7 +490,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con { dst->UpdateValidity(dirty_rect); - AddDirtyRectTarget(dst, dirty_rect, dst->m_TEX0.PSM, dst->m_TEX0.TBW); + AddDirtyRectTarget(dst, dirty_rect, dst->m_TEX0.PSM, dst->m_TEX0.TBW, rgba); } } @@ -496,7 +503,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con { dst->UpdateValidity(dirty_rect); - AddDirtyRectTarget(dst, dirty_rect, dst->m_TEX0.PSM, dst->m_TEX0.TBW); + AddDirtyRectTarget(dst, dirty_rect, dst->m_TEX0.PSM, dst->m_TEX0.TBW, rgba); } } const GSVector2 up_s(dst->m_texture->GetScale()); @@ -834,11 +841,14 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con const bool forced_preload = GSRendererHW::GetInstance()->m_force_preload > 0; const GSVector4i newrect = GSVector4i(0, 0, real_w, real_h); const u32 rect_end = GSLocalMemory::m_psm[TEX0.PSM].info.bn(newrect.z - 1, newrect.w - 1, TEX0.TBP0, TEX0.TBW); + RGBAMask rgba; + rgba._u32 = GSUtil::GetChannelMask(TEX0.PSM); if (!is_frame && !forced_preload && !preload) { std::vector::iterator iter; GSVector4i eerect = GSVector4i::zero(); + for (iter = GSRendererHW::GetInstance()->m_draw_transfers.begin(); iter != GSRendererHW::GetInstance()->m_draw_transfers.end(); ) { // If the format, and location doesn't overlap @@ -853,9 +863,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con sok.elems[1].bw = TEX0.TBW; sok.elems[1].psm = TEX0.PSM; sok.elems[1].rect = newrect; - + // Calculate the rect offset if the BP doesn't match. - const GSVector4i targetr = (iter->blit.DBP == TEX0.TBP0 && GSUtil::HasCompatibleBits(iter->blit.DPSM, TEX0.PSM)) ? iter->rect : ComputeSurfaceOffset(sok).b2a_offset; + const GSVector4i targetr = (iter->blit.DBP == TEX0.TBP0) ? iter->rect : ComputeSurfaceOffset(sok).b2a_offset; if (eerect.rempty()) eerect = targetr; else @@ -874,8 +884,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con if (!eerect.rempty()) { GL_INS("Preloading the RT DATA"); - dst->UpdateValidity(eerect); - AddDirtyRectTarget(dst, eerect, TEX0.PSM, TEX0.TBW); + eerect = eerect.rintersect(newrect); + dst->UpdateValidity(newrect); + AddDirtyRectTarget(dst, eerect, TEX0.PSM, TEX0.TBW, rgba); dst->Update(true); } } @@ -883,7 +894,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con { GL_INS("Preloading the RT DATA"); dst->UpdateValidity(newrect); - AddDirtyRectTarget(dst, newrect, TEX0.PSM, TEX0.TBW); + AddDirtyRectTarget(dst, newrect, TEX0.PSM, TEX0.TBW, rgba); dst->Update(true); } } @@ -961,21 +972,23 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb, g_gs_device->Recycle(old_texture); t->m_texture = new_texture; + RGBAMask rgba; + rgba._u32 = GSUtil::GetChannelMask(t->m_TEX0.PSM); // We unconditionally preload the frame here, because otherwise we'll end up with blackness for one frame (when the expand happens). const int preload_width = t->m_TEX0.TBW * 64; if (old_width < preload_width && old_height < needed_height) { const GSVector4i right(old_width, 0, preload_width, needed_height); const GSVector4i bottom(0, old_height, old_width, needed_height); - AddDirtyRectTarget(t, right, t->m_TEX0.PSM, t->m_TEX0.TBW); - AddDirtyRectTarget(t, bottom, t->m_TEX0.PSM, t->m_TEX0.TBW); + AddDirtyRectTarget(t, right, t->m_TEX0.PSM, t->m_TEX0.TBW, rgba); + AddDirtyRectTarget(t, bottom, t->m_TEX0.PSM, t->m_TEX0.TBW, rgba); } else { const GSVector4i newrect = GSVector4i((old_height < scaled_needed_height) ? 0 : old_width, (old_width < preload_width) ? 0 : old_height, preload_width, needed_height); - AddDirtyRectTarget(t, newrect, t->m_TEX0.PSM, t->m_TEX0.TBW); + AddDirtyRectTarget(t, newrect, t->m_TEX0.PSM, t->m_TEX0.TBW, rgba); } // Inject the new height back into the cache. @@ -1011,6 +1024,8 @@ void GSTextureCache::ExpandTarget(const GIFRegBITBLTBUF& BITBLTBUF, const GSVect TEX0.PSM = BITBLTBUF.DPSM; Target* dst = nullptr; auto& list = m_dst[RenderTarget]; + RGBAMask rgba; + rgba._u32 = GSUtil::GetChannelMask(TEX0.PSM); for (auto i = list.begin(); i != list.end(); ++i) { @@ -1043,7 +1058,7 @@ void GSTextureCache::ExpandTarget(const GIFRegBITBLTBUF& BITBLTBUF, const GSVect // which are being expanded one-line-at-a-time. if (dst->ResizeTexture(upsc_width, upsc_height, false)) { - AddDirtyRectTarget(dst, r, TEX0.PSM, TEX0.TBW); + AddDirtyRectTarget(dst, r, TEX0.PSM, TEX0.TBW, rgba); GetTargetHeight(TEX0.TBP0, TEX0.TBW, TEX0.PSM, aligned_height); dst->UpdateValidity(r); dst->UpdateValidBits(GSLocalMemory::m_psm[TEX0.PSM].fmsk); @@ -1055,7 +1070,7 @@ void GSTextureCache::ExpandTarget(const GIFRegBITBLTBUF& BITBLTBUF, const GSVect const GSVector4i clamped_r( r.rintersect(GSVector4i(0, 0, static_cast(dst->m_texture->GetWidth() / dst->m_texture->GetScale().x), static_cast(dst->m_texture->GetHeight() / dst->m_texture->GetScale().y)))); - AddDirtyRectTarget(dst, clamped_r, TEX0.PSM, TEX0.TBW); + AddDirtyRectTarget(dst, clamped_r, TEX0.PSM, TEX0.TBW, rgba); dst->UpdateValidity(clamped_r); dst->UpdateValidBits(GSLocalMemory::m_psm[TEX0.PSM].fmsk); } @@ -1241,6 +1256,8 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r // Ideally in the future we can turn this on unconditionally, but for now it breaks too much. const bool check_inside_target = (GSConfig.UserHacks_TargetPartialInvalidation || GSConfig.UserHacks_TextureInsideRt); + RGBAMask rgba; + rgba._u32 = GSUtil::GetChannelMask(psm); for (int type = 0; type < 2; type++) { @@ -1275,7 +1292,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r if (eewrite) t->m_age = 0; - AddDirtyRectTarget(t, r, psm, bw); + AddDirtyRectTarget(t, r, psm, bw, rgba); } else { @@ -1290,7 +1307,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r // Compatible formats and same width, probably updating the same texture, so just mark it as dirty. if (bw == t->m_TEX0.TBW && GSLocalMemory::m_psm[psm].bpp == GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) { - AddDirtyRectTarget(t, rect, psm, bw); + AddDirtyRectTarget(t, rect, psm, bw, rgba); GL_CACHE("TC: Direct Dirty in the middle [aggressive] of Target(%s) %d [PSM:%s BP:0x%x->0x%x BW:%u rect(%d,%d=>%d,%d)] write[PSM:%s BP:0x%x BW:%u rect(%d,%d=>%d,%d)]", to_string(type), t->m_texture ? t->m_texture->GetID() : 0, @@ -1324,7 +1341,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r SurfaceOffset so = ComputeSurfaceOffset(off, r, t); if (so.is_valid) { - AddDirtyRectTarget(t, so.b2a_offset, psm, bw); + AddDirtyRectTarget(t, so.b2a_offset, psm, bw, rgba); GL_CACHE("TC: Dirty in the middle [aggressive] of Target(%s) %d [PSM:%s BP:0x%x->0x%x BW:%u rect(%d,%d=>%d,%d)] write[PSM:%s BP:0x%x BW:%u rect(%d,%d=>%d,%d)]", to_string(type), t->m_texture ? t->m_texture->GetID() : 0, @@ -1401,7 +1418,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r t->m_age = 0; const GSVector4i dirty_r = GSVector4i(r.left, r.top - y, r.right, r.bottom - y); - AddDirtyRectTarget(t, dirty_r, psm, bw); + AddDirtyRectTarget(t, dirty_r, psm, bw, rgba); continue; } } @@ -1432,7 +1449,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r t->m_age = 0; const GSVector4i dirty_r = GSVector4i(r.left, r.top + y, r.right, r.bottom + y); - AddDirtyRectTarget(t, dirty_r, psm, bw); + AddDirtyRectTarget(t, dirty_r, psm, bw, rgba); continue; } } @@ -1454,7 +1471,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r if (eewrite) t->m_age = 0; - AddDirtyRectTarget(t, so.b2a_offset, t->m_TEX0.PSM, t->m_TEX0.TBW); + AddDirtyRectTarget(t, so.b2a_offset, t->m_TEX0.PSM, t->m_TEX0.TBW, rgba); } } #endif @@ -1809,7 +1826,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Make sure the copy doesn't go out of bounds (it shouldn't). if ((scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight()) return false; - + GL_CACHE("HW Move 0x%x to 0x%x <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, DBP, sx, sy, sx + w, sy, h, dx, dy, dx + w, dy + h); g_gs_device->CopyRect(src->m_texture, dst->m_texture, GSVector4i(scaled_sx, scaled_sy, scaled_sx + scaled_w, scaled_sy + scaled_h), scaled_dx, scaled_dy); @@ -3315,19 +3332,25 @@ void GSTextureCache::Target::Update(bool reset_age) for (size_t i = 0; i < m_dirty.size(); i++) { const GSVector4i r(m_dirty.GetDirtyRect(i, m_TEX0, total_rect)); + if (r.rempty()) continue; const GSVector4 sRect(GSVector4(r - t_offset) / t_sizef); const GSVector4 dRect(GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy()); - // Copy the new GS memory content into the destination texture. if (m_type == RenderTarget) { GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW, r.x, r.y, r.z, r.w); - - // Bilinear filtering this is probably not a good thing, at least in native, but upscaling Nearest can be gross and messy. - g_gs_device->StretchRect(t, sRect, m_texture, dRect, ShaderConvert::COPY, g_gs_renderer->CanUpscale()); + if (m_dirty[i].rgba._u32 != 0xf) + { + g_gs_device->StretchRect(t, sRect, m_texture, dRect, m_dirty[i].rgba.c.r, m_dirty[i].rgba.c.g, m_dirty[i].rgba.c.b, m_dirty[i].rgba.c.a); + } + else + { + // Bilinear filtering this is probably not a good thing, at least in native, but upscaling Nearest can be gross and messy. + g_gs_device->StretchRect(t, sRect, m_texture, dRect, ShaderConvert::COPY, g_gs_renderer->CanUpscale()); + } } else if (m_type == DepthStencil) { diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index c869c1d36d..b7d236b54f 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -408,7 +408,7 @@ public: void Read(Source* t, const GSVector4i& r); void RemoveAll(); void ReadbackAll(); - void AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm, u32 bw); + void AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm, u32 bw, RGBAMask rgba); GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size);