From 1ef9bc464dbb5cbca327f5ca91460b5eaf5e4e68 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sat, 3 Jun 2023 18:24:37 +0100 Subject: [PATCH] GS-HW: Only preload targets when data is needed --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 26 +++-- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 133 ++++++++++++----------- pcsx2/GS/Renderers/HW/GSTextureCache.h | 2 +- 3 files changed, 88 insertions(+), 73 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 9cdd58eb9c..7424e30699 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -1667,8 +1667,15 @@ void GSRendererHW::Draw() return; } + // The rectangle of the draw rounded up. + const GSVector4 rect = m_vt.m_min.p.upld(m_vt.m_max.p + GSVector4::cxpr(0.5f)); + m_r = GSVector4i(rect).rintersect(context->scissor.in); + + const bool process_texture = PRIM->TME && !(PRIM->ABE && m_context->ALPHA.IsBlack() && !m_cached_ctx.TEX0.TCC); + const u32 frame_end_bp = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r); // SW CLUT Render enable. bool force_preload = GSConfig.PreloadFrameWithGSData; + bool preload_uploads = false; if (GSConfig.UserHacks_CPUCLUTRender > 0 || GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled) { const CLUTDrawTestResult result = (GSConfig.UserHacks_CPUCLUTRender == 2) ? PossibleCLUTDrawAggressive() : PossibleCLUTDraw(); @@ -1693,10 +1700,12 @@ void GSRendererHW::Draw() } } } - - // The rectangle of the draw rounded up. - const GSVector4 rect = m_vt.m_min.p.upld(m_vt.m_max.p + GSVector4::cxpr(0.5f)); - m_r = GSVector4i(rect).rintersect(context->scissor.in); + else if (((fm & fm_mask) != 0) || // Some channels masked + !IsDiscardingDstColor() || !PrimitiveCoversWithoutGaps() || // Using Dst Color or draw has gaps + (process_texture && m_cached_ctx.TEX0.TBP0 >= m_cached_ctx.FRAME.Block() && m_cached_ctx.TEX0.TBP0 < frame_end_bp)) // Tex is RT + { + preload_uploads = true; + } if (!m_channel_shuffle && m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && IsPossibleChannelShuffle()) @@ -1843,7 +1852,6 @@ void GSRendererHW::Draw() GSTextureCache::Source* src = nullptr; TextureMinMaxResult tmm; - const bool process_texture = PRIM->TME && !(PRIM->ABE && m_context->ALPHA.IsBlack() && !m_cached_ctx.TEX0.TCC); // Disable texture mapping if the blend is black and using alpha from vertex. if (process_texture) { @@ -1964,9 +1972,13 @@ void GSRendererHW::Draw() // Estimate size based on the scissor rectangle and height cache. const GSVector2i t_size = GetTargetSize(src); + const GSVector4i t_size_rect = GSVector4i::loadh(t_size); // Ensure draw rect is clamped to framebuffer size. Necessary for updating valid area. - m_r = m_r.rintersect(GSVector4i::loadh(t_size)); + m_r = m_r.rintersect(t_size_rect); + + // Ensure areas not drawn to are filled in by preloads. Test case: Okami + preload_uploads |= !m_r.eq(t_size_rect); float target_scale = GetTextureScaleFactor(); @@ -1996,7 +2008,7 @@ void GSRendererHW::Draw() const bool is_square = (t_size.y == t_size.x) && m_r.w >= 1023 && PrimitiveCoversWithoutGaps(); const bool is_clear = is_possible_mem_clear && is_square; rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, target_scale, GSTextureCache::RenderTarget, true, - fm, false, is_clear, force_preload); + fm, false, is_clear, force_preload, preload_uploads); // Draw skipped because it was a clear and there was no target. if (!rt) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index d388c4fb1f..066542d80a 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1199,7 +1199,7 @@ GSTextureCache::Target* GSTextureCache::FindTargetOverlap(u32 bp, u32 end_block, } GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, - bool used, u32 fbmask, bool is_frame, bool is_clear, bool preload) + bool used, u32 fbmask, bool is_frame, bool is_clear, bool preload, bool preload_uploads) { const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; const u32 bp = TEX0.TBP0; @@ -1470,81 +1470,84 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe if (!is_frame && !forced_preload && !preload) { - std::vector::iterator iter; - GSVector4i eerect = GSVector4i::zero(); - - for (iter = GSRendererHW::GetInstance()->m_draw_transfers.begin(); iter != GSRendererHW::GetInstance()->m_draw_transfers.end(); ) + if (preload_uploads) { - // If the format, and location doesn't overlap - if (iter->blit.DBP >= TEX0.TBP0 && iter->blit.DBP <= rect_end && GSUtil::HasCompatibleBits(iter->blit.DPSM, TEX0.PSM)) - { - GSVector4i targetr = {}; - const bool can_translate = CanTranslate(iter->blit.DBP, iter->blit.DBW, iter->blit.DPSM, iter->rect, TEX0.TBP0, TEX0.PSM, TEX0.TBW); - const bool swizzle_match = GSLocalMemory::m_psm[iter->blit.DPSM].depth == GSLocalMemory::m_psm[TEX0.PSM].depth; - if (can_translate) - { - if (swizzle_match) - { - targetr = TranslateAlignedRectByPage(dst, iter->blit.DBP, iter->blit.DPSM, iter->blit.DBW, iter->rect, true); - } - else - { - // If it's not page aligned, grab the whole pages it covers, to be safe. - GSVector4i new_rect = iter->rect; - const GSVector2i page_size = GSLocalMemory::m_psm[iter->blit.DPSM].pgs; + std::vector::iterator iter; + GSVector4i eerect = GSVector4i::zero(); - if (GSLocalMemory::m_psm[iter->blit.DPSM].bpp != GSLocalMemory::m_psm[TEX0.PSM].bpp) + for (iter = GSRendererHW::GetInstance()->m_draw_transfers.begin(); iter != GSRendererHW::GetInstance()->m_draw_transfers.end(); ) + { + // If the format, and location doesn't overlap + if (iter->blit.DBP >= TEX0.TBP0 && iter->blit.DBP <= rect_end && GSUtil::HasCompatibleBits(iter->blit.DPSM, TEX0.PSM)) + { + GSVector4i targetr = {}; + const bool can_translate = CanTranslate(iter->blit.DBP, iter->blit.DBW, iter->blit.DPSM, iter->rect, TEX0.TBP0, TEX0.PSM, TEX0.TBW); + const bool swizzle_match = GSLocalMemory::m_psm[iter->blit.DPSM].depth == GSLocalMemory::m_psm[TEX0.PSM].depth; + if (can_translate) + { + if (swizzle_match) { - const GSVector2i dst_page_size = GSLocalMemory::m_psm[iter->blit.DPSM].pgs; - new_rect = GSVector4i(new_rect.x / page_size.x, new_rect.y / page_size.y, (new_rect.z + (page_size.x - 1)) / page_size.x, (new_rect.w + (page_size.y - 1)) / page_size.y); - new_rect = GSVector4i(new_rect.x * dst_page_size.x, new_rect.y * dst_page_size.y, new_rect.z * dst_page_size.x, new_rect.w * dst_page_size.y); + targetr = TranslateAlignedRectByPage(dst, iter->blit.DBP, iter->blit.DPSM, iter->blit.DBW, iter->rect, true); } else { - new_rect.x &= ~(page_size.x - 1); - new_rect.y &= ~(page_size.y - 1); - new_rect.z = (new_rect.z + (page_size.x - 1)) & ~(page_size.x - 1); - new_rect.w = (new_rect.w + (page_size.y - 1)) & ~(page_size.y - 1); + // If it's not page aligned, grab the whole pages it covers, to be safe. + GSVector4i new_rect = iter->rect; + const GSVector2i page_size = GSLocalMemory::m_psm[iter->blit.DPSM].pgs; + + if (GSLocalMemory::m_psm[iter->blit.DPSM].bpp != GSLocalMemory::m_psm[TEX0.PSM].bpp) + { + const GSVector2i dst_page_size = GSLocalMemory::m_psm[iter->blit.DPSM].pgs; + new_rect = GSVector4i(new_rect.x / page_size.x, new_rect.y / page_size.y, (new_rect.z + (page_size.x - 1)) / page_size.x, (new_rect.w + (page_size.y - 1)) / page_size.y); + new_rect = GSVector4i(new_rect.x * dst_page_size.x, new_rect.y * dst_page_size.y, new_rect.z * dst_page_size.x, new_rect.w * dst_page_size.y); + } + else + { + new_rect.x &= ~(page_size.x - 1); + new_rect.y &= ~(page_size.y - 1); + new_rect.z = (new_rect.z + (page_size.x - 1)) & ~(page_size.x - 1); + new_rect.w = (new_rect.w + (page_size.y - 1)) & ~(page_size.y - 1); + } + targetr = TranslateAlignedRectByPage(dst, iter->blit.DBP & ~((1 << 5) - 1), iter->blit.DPSM, iter->blit.DBW, new_rect, true); } - targetr = TranslateAlignedRectByPage(dst, iter->blit.DBP & ~((1 << 5) - 1), iter->blit.DPSM, iter->blit.DBW, new_rect, true); } + else + { + GSTextureCache::SurfaceOffsetKey sok; + sok.elems[0].bp = iter->blit.DBP; + sok.elems[0].bw = iter->blit.DBW; + sok.elems[0].psm = iter->blit.DPSM; + sok.elems[0].rect = iter->rect; + sok.elems[1].bp = TEX0.TBP0; + sok.elems[1].bw = TEX0.TBW; + sok.elems[1].psm = TEX0.PSM; + sok.elems[1].rect = newrect; + + // Calculate the rect offset if the BP doesn't match. + targetr = (iter->blit.DBP == TEX0.TBP0) ? iter->rect : ComputeSurfaceOffset(sok).b2a_offset; + } + + if (eerect.rempty()) + eerect = targetr; + else + eerect = eerect.runion(targetr); + + iter = GSRendererHW::GetInstance()->m_draw_transfers.erase(iter); + + if (eerect.rintersect(newrect).eq(newrect)) + break; + else + continue; } - else - { - GSTextureCache::SurfaceOffsetKey sok; - sok.elems[0].bp = iter->blit.DBP; - sok.elems[0].bw = iter->blit.DBW; - sok.elems[0].psm = iter->blit.DPSM; - sok.elems[0].rect = iter->rect; - sok.elems[1].bp = TEX0.TBP0; - sok.elems[1].bw = TEX0.TBW; - sok.elems[1].psm = TEX0.PSM; - sok.elems[1].rect = newrect; - - // Calculate the rect offset if the BP doesn't match. - targetr = (iter->blit.DBP == TEX0.TBP0) ? iter->rect : ComputeSurfaceOffset(sok).b2a_offset; - } - - if (eerect.rempty()) - eerect = targetr; - else - eerect = eerect.runion(targetr); - - iter = GSRendererHW::GetInstance()->m_draw_transfers.erase(iter); - - if (eerect.rintersect(newrect).eq(newrect)) - break; - else - continue; + iter++; } - iter++; - } - if (!eerect.rempty()) - { - GL_INS("Preloading the RT DATA from updated GS Memory"); - eerect = eerect.rintersect(newrect); - AddDirtyRectTarget(dst, eerect, TEX0.PSM, TEX0.TBW, rgba, GSLocalMemory::m_psm[TEX0.PSM].trbpp >= 16); + if (!eerect.rempty()) + { + GL_INS("Preloading the RT DATA from updated GS Memory"); + eerect = eerect.rintersect(newrect); + AddDirtyRectTarget(dst, eerect, TEX0.PSM, TEX0.TBW, rgba, GSLocalMemory::m_psm[TEX0.PSM].trbpp >= 16); + } } } else diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index eb4d0d0f0f..5e8e9106cf 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -452,7 +452,7 @@ public: Target* FindTargetOverlap(u32 bp, u32 end_block, int type, int psm); Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0, - bool is_frame = false, bool is_clear = false, bool preload = GSConfig.PreloadFrameWithGSData); + bool is_frame = false, bool is_clear = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preload_uploads = true); Target* LookupDisplayTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale); /// Looks up a target in the cache, and only returns it if the BP/BW match exactly.