GS-HW: Only preload targets when data is needed

This commit is contained in:
refractionpcsx2 2023-06-03 18:24:37 +01:00
parent ba3a7fc11a
commit 1ef9bc464d
3 changed files with 88 additions and 73 deletions

View File

@ -1667,8 +1667,15 @@ void GSRendererHW::Draw()
return;
}
// The rectangle of the draw rounded up.
const GSVector4 rect = m_vt.m_min.p.upld(m_vt.m_max.p + GSVector4::cxpr(0.5f));
m_r = GSVector4i(rect).rintersect(context->scissor.in);
const bool process_texture = PRIM->TME && !(PRIM->ABE && m_context->ALPHA.IsBlack() && !m_cached_ctx.TEX0.TCC);
const u32 frame_end_bp = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r);
// SW CLUT Render enable.
bool force_preload = GSConfig.PreloadFrameWithGSData;
bool preload_uploads = false;
if (GSConfig.UserHacks_CPUCLUTRender > 0 || GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled)
{
const CLUTDrawTestResult result = (GSConfig.UserHacks_CPUCLUTRender == 2) ? PossibleCLUTDrawAggressive() : PossibleCLUTDraw();
@ -1693,10 +1700,12 @@ void GSRendererHW::Draw()
}
}
}
// The rectangle of the draw rounded up.
const GSVector4 rect = m_vt.m_min.p.upld(m_vt.m_max.p + GSVector4::cxpr(0.5f));
m_r = GSVector4i(rect).rintersect(context->scissor.in);
else if (((fm & fm_mask) != 0) || // Some channels masked
!IsDiscardingDstColor() || !PrimitiveCoversWithoutGaps() || // Using Dst Color or draw has gaps
(process_texture && m_cached_ctx.TEX0.TBP0 >= m_cached_ctx.FRAME.Block() && m_cached_ctx.TEX0.TBP0 < frame_end_bp)) // Tex is RT
{
preload_uploads = true;
}
if (!m_channel_shuffle && m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 &&
IsPossibleChannelShuffle())
@ -1843,7 +1852,6 @@ void GSRendererHW::Draw()
GSTextureCache::Source* src = nullptr;
TextureMinMaxResult tmm;
const bool process_texture = PRIM->TME && !(PRIM->ABE && m_context->ALPHA.IsBlack() && !m_cached_ctx.TEX0.TCC);
// Disable texture mapping if the blend is black and using alpha from vertex.
if (process_texture)
{
@ -1964,9 +1972,13 @@ void GSRendererHW::Draw()
// Estimate size based on the scissor rectangle and height cache.
const GSVector2i t_size = GetTargetSize(src);
const GSVector4i t_size_rect = GSVector4i::loadh(t_size);
// Ensure draw rect is clamped to framebuffer size. Necessary for updating valid area.
m_r = m_r.rintersect(GSVector4i::loadh(t_size));
m_r = m_r.rintersect(t_size_rect);
// Ensure areas not drawn to are filled in by preloads. Test case: Okami
preload_uploads |= !m_r.eq(t_size_rect);
float target_scale = GetTextureScaleFactor();
@ -1996,7 +2008,7 @@ void GSRendererHW::Draw()
const bool is_square = (t_size.y == t_size.x) && m_r.w >= 1023 && PrimitiveCoversWithoutGaps();
const bool is_clear = is_possible_mem_clear && is_square;
rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, target_scale, GSTextureCache::RenderTarget, true,
fm, false, is_clear, force_preload);
fm, false, is_clear, force_preload, preload_uploads);
// Draw skipped because it was a clear and there was no target.
if (!rt)

View File

@ -1199,7 +1199,7 @@ GSTextureCache::Target* GSTextureCache::FindTargetOverlap(u32 bp, u32 end_block,
}
GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type,
bool used, u32 fbmask, bool is_frame, bool is_clear, bool preload)
bool used, u32 fbmask, bool is_frame, bool is_clear, bool preload, bool preload_uploads)
{
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
const u32 bp = TEX0.TBP0;
@ -1470,81 +1470,84 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
if (!is_frame && !forced_preload && !preload)
{
std::vector<GSState::GSUploadQueue>::iterator iter;
GSVector4i eerect = GSVector4i::zero();
for (iter = GSRendererHW::GetInstance()->m_draw_transfers.begin(); iter != GSRendererHW::GetInstance()->m_draw_transfers.end(); )
if (preload_uploads)
{
// If the format, and location doesn't overlap
if (iter->blit.DBP >= TEX0.TBP0 && iter->blit.DBP <= rect_end && GSUtil::HasCompatibleBits(iter->blit.DPSM, TEX0.PSM))
{
GSVector4i targetr = {};
const bool can_translate = CanTranslate(iter->blit.DBP, iter->blit.DBW, iter->blit.DPSM, iter->rect, TEX0.TBP0, TEX0.PSM, TEX0.TBW);
const bool swizzle_match = GSLocalMemory::m_psm[iter->blit.DPSM].depth == GSLocalMemory::m_psm[TEX0.PSM].depth;
if (can_translate)
{
if (swizzle_match)
{
targetr = TranslateAlignedRectByPage(dst, iter->blit.DBP, iter->blit.DPSM, iter->blit.DBW, iter->rect, true);
}
else
{
// If it's not page aligned, grab the whole pages it covers, to be safe.
GSVector4i new_rect = iter->rect;
const GSVector2i page_size = GSLocalMemory::m_psm[iter->blit.DPSM].pgs;
std::vector<GSState::GSUploadQueue>::iterator iter;
GSVector4i eerect = GSVector4i::zero();
if (GSLocalMemory::m_psm[iter->blit.DPSM].bpp != GSLocalMemory::m_psm[TEX0.PSM].bpp)
for (iter = GSRendererHW::GetInstance()->m_draw_transfers.begin(); iter != GSRendererHW::GetInstance()->m_draw_transfers.end(); )
{
// If the format, and location doesn't overlap
if (iter->blit.DBP >= TEX0.TBP0 && iter->blit.DBP <= rect_end && GSUtil::HasCompatibleBits(iter->blit.DPSM, TEX0.PSM))
{
GSVector4i targetr = {};
const bool can_translate = CanTranslate(iter->blit.DBP, iter->blit.DBW, iter->blit.DPSM, iter->rect, TEX0.TBP0, TEX0.PSM, TEX0.TBW);
const bool swizzle_match = GSLocalMemory::m_psm[iter->blit.DPSM].depth == GSLocalMemory::m_psm[TEX0.PSM].depth;
if (can_translate)
{
if (swizzle_match)
{
const GSVector2i dst_page_size = GSLocalMemory::m_psm[iter->blit.DPSM].pgs;
new_rect = GSVector4i(new_rect.x / page_size.x, new_rect.y / page_size.y, (new_rect.z + (page_size.x - 1)) / page_size.x, (new_rect.w + (page_size.y - 1)) / page_size.y);
new_rect = GSVector4i(new_rect.x * dst_page_size.x, new_rect.y * dst_page_size.y, new_rect.z * dst_page_size.x, new_rect.w * dst_page_size.y);
targetr = TranslateAlignedRectByPage(dst, iter->blit.DBP, iter->blit.DPSM, iter->blit.DBW, iter->rect, true);
}
else
{
new_rect.x &= ~(page_size.x - 1);
new_rect.y &= ~(page_size.y - 1);
new_rect.z = (new_rect.z + (page_size.x - 1)) & ~(page_size.x - 1);
new_rect.w = (new_rect.w + (page_size.y - 1)) & ~(page_size.y - 1);
// If it's not page aligned, grab the whole pages it covers, to be safe.
GSVector4i new_rect = iter->rect;
const GSVector2i page_size = GSLocalMemory::m_psm[iter->blit.DPSM].pgs;
if (GSLocalMemory::m_psm[iter->blit.DPSM].bpp != GSLocalMemory::m_psm[TEX0.PSM].bpp)
{
const GSVector2i dst_page_size = GSLocalMemory::m_psm[iter->blit.DPSM].pgs;
new_rect = GSVector4i(new_rect.x / page_size.x, new_rect.y / page_size.y, (new_rect.z + (page_size.x - 1)) / page_size.x, (new_rect.w + (page_size.y - 1)) / page_size.y);
new_rect = GSVector4i(new_rect.x * dst_page_size.x, new_rect.y * dst_page_size.y, new_rect.z * dst_page_size.x, new_rect.w * dst_page_size.y);
}
else
{
new_rect.x &= ~(page_size.x - 1);
new_rect.y &= ~(page_size.y - 1);
new_rect.z = (new_rect.z + (page_size.x - 1)) & ~(page_size.x - 1);
new_rect.w = (new_rect.w + (page_size.y - 1)) & ~(page_size.y - 1);
}
targetr = TranslateAlignedRectByPage(dst, iter->blit.DBP & ~((1 << 5) - 1), iter->blit.DPSM, iter->blit.DBW, new_rect, true);
}
targetr = TranslateAlignedRectByPage(dst, iter->blit.DBP & ~((1 << 5) - 1), iter->blit.DPSM, iter->blit.DBW, new_rect, true);
}
else
{
GSTextureCache::SurfaceOffsetKey sok;
sok.elems[0].bp = iter->blit.DBP;
sok.elems[0].bw = iter->blit.DBW;
sok.elems[0].psm = iter->blit.DPSM;
sok.elems[0].rect = iter->rect;
sok.elems[1].bp = TEX0.TBP0;
sok.elems[1].bw = TEX0.TBW;
sok.elems[1].psm = TEX0.PSM;
sok.elems[1].rect = newrect;
// Calculate the rect offset if the BP doesn't match.
targetr = (iter->blit.DBP == TEX0.TBP0) ? iter->rect : ComputeSurfaceOffset(sok).b2a_offset;
}
if (eerect.rempty())
eerect = targetr;
else
eerect = eerect.runion(targetr);
iter = GSRendererHW::GetInstance()->m_draw_transfers.erase(iter);
if (eerect.rintersect(newrect).eq(newrect))
break;
else
continue;
}
else
{
GSTextureCache::SurfaceOffsetKey sok;
sok.elems[0].bp = iter->blit.DBP;
sok.elems[0].bw = iter->blit.DBW;
sok.elems[0].psm = iter->blit.DPSM;
sok.elems[0].rect = iter->rect;
sok.elems[1].bp = TEX0.TBP0;
sok.elems[1].bw = TEX0.TBW;
sok.elems[1].psm = TEX0.PSM;
sok.elems[1].rect = newrect;
// Calculate the rect offset if the BP doesn't match.
targetr = (iter->blit.DBP == TEX0.TBP0) ? iter->rect : ComputeSurfaceOffset(sok).b2a_offset;
}
if (eerect.rempty())
eerect = targetr;
else
eerect = eerect.runion(targetr);
iter = GSRendererHW::GetInstance()->m_draw_transfers.erase(iter);
if (eerect.rintersect(newrect).eq(newrect))
break;
else
continue;
iter++;
}
iter++;
}
if (!eerect.rempty())
{
GL_INS("Preloading the RT DATA from updated GS Memory");
eerect = eerect.rintersect(newrect);
AddDirtyRectTarget(dst, eerect, TEX0.PSM, TEX0.TBW, rgba, GSLocalMemory::m_psm[TEX0.PSM].trbpp >= 16);
if (!eerect.rempty())
{
GL_INS("Preloading the RT DATA from updated GS Memory");
eerect = eerect.rintersect(newrect);
AddDirtyRectTarget(dst, eerect, TEX0.PSM, TEX0.TBW, rgba, GSLocalMemory::m_psm[TEX0.PSM].trbpp >= 16);
}
}
}
else

View File

@ -452,7 +452,7 @@ public:
Target* FindTargetOverlap(u32 bp, u32 end_block, int type, int psm);
Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0,
bool is_frame = false, bool is_clear = false, bool preload = GSConfig.PreloadFrameWithGSData);
bool is_frame = false, bool is_clear = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preload_uploads = true);
Target* LookupDisplayTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale);
/// Looks up a target in the cache, and only returns it if the BP/BW match exactly.