diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 000e751dc5..ffd54a8f2c 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -313,7 +313,8 @@ public: virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {} virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) {} - void Move(); + virtual void Move(); + void Write(const u8* mem, int len); void Read(u8* mem, int len); void InitReadFIFO(u8* mem, int len); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 239e4f7cb3..dd32f1671a 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -799,6 +799,26 @@ void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); } +void GSRendererHW::Move() +{ + int sx = m_env.TRXPOS.SSAX; + int sy = m_env.TRXPOS.SSAY; + int dx = m_env.TRXPOS.DSAX; + int dy = m_env.TRXPOS.DSAY; + + const int w = m_env.TRXREG.RRW; + const int h = m_env.TRXREG.RRH; + + if (m_tc->Move(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, sx, sy, + m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, dx, dy, w, h)) + { + // Handled entirely in TC, no need to update local memory. + return; + } + + GSRenderer::Move(); +} + u16 GSRendererHW::Interpolate_UV(float alpha, int t0, int t1) { const float t = (1.0f - alpha) * t0 + alpha * t1; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 1734150d95..68fbea4798 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -190,6 +190,7 @@ public: GSTexture* GetFeedbackOutput() override; void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) override; void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) override; + void Move() override; void Draw() override; void PurgeTextureCache() override; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 26ecf116e6..be79c44464 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1045,6 +1045,68 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r // TODO: ds } +bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u32 DBW, u32 DPSM, int dx, int dy, int w, int h) +{ + // TODO: In theory we could do channel swapping on the GPU, but we haven't found anything which needs it so far. + // Same with SBP == DBP, but this behavior could change based on direction? + if (SPSM != DPSM || SBP == DBP) + { + GL_CACHE("Skipping HW move from 0x%X to 0x%X with SPSM=%u DPSM=%u", SBP, DBP, SPSM, DPSM); + return false; + } + + // DX11/12 is a bit lame and can't partial copy depth targets. We could do this with a blit instead, + // but so far haven't seen anything which needs it. + if (GSConfig.Renderer == GSRendererType::DX11 || GSConfig.Renderer == GSRendererType::DX12) + { + if (GSLocalMemory::m_psm[SPSM].depth || GSLocalMemory::m_psm[DPSM].depth) + return false; + } + + // Look for an exact match on the targets. + GSTextureCache::Target* src = GetExactTarget(SBP, SBW, SPSM); + GSTextureCache::Target* dst = GetExactTarget(DBP, DBW, DPSM); + if (!src || !dst || src->m_texture->GetScale() != dst->m_texture->GetScale()) + return false; + + // Scale coordinates. + const GSVector2 scale(src->m_texture->GetScale()); + const int scaled_sx = static_cast(sx * scale.x); + const int scaled_sy = static_cast(sy * scale.y); + const int scaled_dx = static_cast(dx * scale.x); + const int scaled_dy = static_cast(dy * scale.y); + const int scaled_w = static_cast(w * scale.x); + const int scaled_h = static_cast(h * scale.y); + + // Make sure the copy doesn't go out of bounds (it shouldn't). + if ((scaled_sx + scaled_w) > src->m_texture->GetWidth() || (scaled_sy + scaled_h) > src->m_texture->GetHeight() || + (scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight()) + { + return false; + } + + g_gs_device->CopyRect(src->m_texture, dst->m_texture, + GSVector4i(scaled_sx, scaled_sy, scaled_sx + scaled_w, scaled_sy + scaled_h), + scaled_dx, scaled_dy); + + // Invalidate any sources that overlap with the target (since they're now stale). + InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(DBP, DBW, DPSM), GSVector4i(dx, dy, dx + w, dy + h), false); + return true; +} + +GSTextureCache::Target* GSTextureCache::GetExactTarget(u32 BP, u32 BW, u32 PSM) const +{ + auto& rts = m_dst[GSLocalMemory::m_psm[PSM].depth ? DepthStencil : RenderTarget]; + for (auto it = rts.begin(); it != rts.end(); ++it) // Iterate targets from MRU to LRU. + { + Target* t = *it; + if (t->m_TEX0.TBP0 == BP && t->m_TEX0.TBW == BW && t->m_TEX0.PSM == PSM) + return t; + } + + return nullptr; +} + // Hack: remove Target that are strictly included in current rt. Typically uses for FMV // For example, game is rendered at 0x800->0x1000, fmv will be uploaded to 0x0->0x2800 // FIXME In theory, we ought to report the data from the sub rt to the main rt. But let's diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 4843880c09..32c4fcc691 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -283,6 +283,9 @@ protected: Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false, int x_offset = 0, int y_offset = 0, const GSVector2i* lod = nullptr, const GSVector4i* src_range = nullptr); Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear); + /// Looks up a target in the cache, and only returns it if the BP/BW/PSM match exactly. + Target* GetExactTarget(u32 BP, u32 BW, u32 PSM) const; + HashCacheEntry* LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod); static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level); @@ -312,6 +315,7 @@ public: void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt); void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true); void InvalidateLocalMem(const GSOffset& off, const GSVector4i& r); + bool Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u32 DBW, u32 DPSM, int dx, int dy, int w, int h); void IncAge();