GS/HW: Improve detection of clears

- Detect "normal" target clears and HLE accordingly
 - Rewrite double-half clears to mask Z or FRAME, reducing false target creation.
 - Handle split single-page-wide clears, and attempt to get a real FBW.
 - Propogate clear values between targets, instead of blitting.
This commit is contained in:
Stenzek 2023-06-03 23:52:26 +10:00 committed by Connor McLaughlin
parent ce7c466041
commit 96fad124ac
4 changed files with 596 additions and 340 deletions

File diff suppressed because it is too large Load Diff

View File

@ -44,9 +44,13 @@ private:
// Require special argument
bool OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* t, const GSVector4i& r_draw);
bool OI_GsMemClear(); // always on
void OI_DoGsMemClear(const GSOffset& off, const GSVector4i& r, u32 vert_color);
void OI_DoubleHalfClear(GSTextureCache::Target*& rt, GSTextureCache::Target*& ds); // always on
bool TryGSMemClear(bool no_rt, bool no_ds);
void ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, u32 vert_color);
bool DetectDoubleHalfClear(bool& no_rt, bool& no_ds);
bool DetectStripedDoubleClear(bool& no_rt, bool& no_ds);
bool TryTargetClear(GSTextureCache::Target* rt, GSTextureCache::Target* ds, bool preserve_rt_color, bool preserve_depth);
void SetNewFRAME(u32 bp, u32 bw, u32 psm);
void SetNewZBUF(u32 bp, u32 psm);
u16 Interpolate_UV(float alpha, int t0, int t1);
float alpha0(int L, int X0, int X1);
@ -54,6 +58,8 @@ private:
void SwSpriteRender();
bool CanUseSwSpriteRender();
bool IsConstantDirectWriteMemClear();
u32 GetConstantDirectWriteMemClearColor() const;
bool IsReallyDithered() const;
bool IsDiscardingDstColor();
bool PrimitiveCoversWithoutGaps();
@ -97,6 +103,9 @@ private:
bool IsSplitTextureShuffle();
GSVector4i GetSplitTextureShuffleDrawRect() const;
static GSVector4i GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages);
bool TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw);
bool IsSplitClearActive() const;
bool CheckNextDrawForSplitClear(const GSVector4i& r, u32* pages_covered_by_this_draw) const;
bool IsStartingSplitClear();
@ -144,6 +153,7 @@ private:
u32 m_last_channel_shuffle_fbmsk = 0;
GIFRegFRAME m_split_clear_start = {};
GIFRegZBUF m_split_clear_start_Z = {};
u32 m_split_clear_pages = 0; // if zero, inactive
u32 m_split_clear_color = 0;

View File

@ -1199,7 +1199,7 @@ GSTextureCache::Target* GSTextureCache::FindTargetOverlap(u32 bp, u32 end_block,
}
GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type,
bool used, u32 fbmask, bool is_frame, bool preload, bool preload_uploads)
bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_target, const GSVector4i draw_rect)
{
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
const u32 bp = TEX0.TBP0;
@ -1354,6 +1354,20 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst->m_scale = scale;
dst->m_unscaled_size = new_size;
}
// Drop dirty rect if we're overwriting the whole target.
if (!preserve_target && draw_rect.rintersect(dst->m_valid).eq(dst->m_valid))
{
if (!dst->m_dirty.empty())
{
GL_INS("TC: Clearing dirty list for %s[%x] because we're overwriting the whole target.", to_string(type), dst->m_TEX0.TBP0);
dst->m_dirty.clear();
}
// And invalidate the target, we're drawing over it so we don't care what's there.
GL_INS("TC: Invalidating target %s[%x] because it's completely overwritten.", to_string(type), dst->m_TEX0.TBP0);
g_gs_device->InvalidateRenderTarget(dst->m_texture);
}
}
else if (!is_frame && !GSConfig.UserHacks_DisableDepthSupport)
{
@ -1407,21 +1421,49 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
shader = (fmt_16_bits) ? ShaderConvert::FLOAT16_TO_RGB5A1 : ShaderConvert::FLOAT32_TO_RGBA8;
}
// The old target's going to get invalidated (at least until we handle concurrent frame+depth at the same BP),
// so just move the dirty rects across.
dst->m_dirty = std::move(dst_match->m_dirty);
dst_match->m_dirty = {};
// Don't bother copying the old target in if the whole thing is dirty.
if (dst->m_dirty.empty() || (~dst->m_dirty.GetDirtyChannels() & GSUtil::GetChannelMask(TEX0.PSM)) != 0 ||
!dst->m_dirty.GetDirtyRect(0, TEX0, dst->GetUnscaledRect()).eq(dst->GetUnscaledRect()))
if (!preserve_target && draw_rect.rintersect(dst_match->m_valid).eq(dst_match->m_valid))
{
g_gs_device->StretchRect(dst_match->m_texture, sRect, dst->m_texture, dRect, shader, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
GL_INS("TC: Not converting existing %s[%x] because it's fully overwritten.", to_string(!type), dst->m_TEX0.TBP0);
}
else
{
// The old target's going to get invalidated (at least until we handle concurrent frame+depth at the same BP),
// so just move the dirty rects across.
dst->m_dirty = std::move(dst_match->m_dirty);
dst_match->m_dirty = {};
// Now pull in any dirty areas in the new format.
dst->Update();
// Don't bother copying the old target in if the whole thing is dirty.
if (dst->m_dirty.empty() || (~dst->m_dirty.GetDirtyChannels() & GSUtil::GetChannelMask(TEX0.PSM)) != 0 ||
!dst->m_dirty.GetDirtyRect(0, TEX0, dst->GetUnscaledRect()).eq(dst->GetUnscaledRect()))
{
// If the old target was cleared, simply propagate that through.
if (dst_match->m_texture->GetState() == GSTexture::State::Cleared)
{
if (type == DepthStencil)
{
const u32 cc = dst_match->m_texture->GetClearColor();
const float cd = ConvertColorToDepth(cc, shader);
GL_INS("TC: Convert clear color[%08X] to depth[%f]", cc, cd);
g_gs_device->ClearDepth(dst->m_texture, cd);
}
else
{
const float cd = dst_match->m_texture->GetClearDepth();
const u32 cc = ConvertDepthToColor(cd, shader);
GL_INS("TC: Convert clear depth[%f] to color[%08X]", cd, cc);
g_gs_device->ClearRenderTarget(dst->m_texture, cc);
}
}
else if (dst_match->m_texture->GetState() == GSTexture::State::Dirty)
{
g_gs_device->StretchRect(dst_match->m_texture, sRect, dst->m_texture, dRect, shader, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
}
}
// Now pull in any dirty areas in the new format.
dst->Update();
}
}
}
@ -1441,7 +1483,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
}
GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type,
bool used, u32 fbmask, bool is_frame, bool preload, bool preload_uploads)
bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_target, const GSVector4i draw_rect)
{
const u32 bp = TEX0.TBP0;
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
@ -1485,7 +1527,7 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe
if (!is_frame && !forced_preload && !preload)
{
if (preload_uploads)
if (preserve_target || !draw_rect.eq(dst->m_valid))
{
std::vector<GSState::GSUploadQueue>::iterator iter;
GSVector4i eerect = GSVector4i::zero();
@ -1680,6 +1722,50 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb,
GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, 0, static_cast<u32>(needed_height));
}
float GSTextureCache::ConvertColorToDepth(u32 c, ShaderConvert convert)
{
const float mult = std::exp2(g_gs_device->Features().clip_control ? -32.0f : -24.0f);
switch (convert)
{
case ShaderConvert::RGB5A1_TO_FLOAT16:
return static_cast<float>(((c & 0xF8u) >> 3) | (((c >> 8) & 0xF8u) << 2) | (((c >> 16) & 0xF8u) << 7) |
(((c >> 24) & 0x80u) << 8)) *
mult;
case ShaderConvert::RGBA8_TO_FLOAT16:
return static_cast<float>(c & 0x0000FFFF) * mult;
case ShaderConvert::RGBA8_TO_FLOAT24:
return static_cast<float>(c & 0x00FFFFFF) * mult;
case ShaderConvert::RGBA8_TO_FLOAT32:
default:
return static_cast<float>(c) * mult;
}
}
u32 GSTextureCache::ConvertDepthToColor(float d, ShaderConvert convert)
{
const float mult = std::exp2(g_gs_device->Features().clip_control ? 32.0f : 24.0f);
switch (convert)
{
case ShaderConvert::FLOAT16_TO_RGB5A1:
{
const u32 cc = static_cast<u32>(d * mult);
// Truely awful.
const GSVector4i vcc = GSVector4i(
GSVector4(GSVector4i(cc & 0x1Fu, (cc >> 5) & 0x1Fu, (cc >> 10) & 0x1Fu, (cc >> 15) & 0x01u)) *
GSVector4::cxpr(255.0f / 31.0f));
return (vcc.r | (vcc.g << 8) | (vcc.b << 16) | (vcc.a << 24));
}
case ShaderConvert::FLOAT32_TO_RGBA8:
default:
return static_cast<u32>(d * mult);
}
}
bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Format format, std::unique_ptr<GSDownloadTexture>* tex)
{
GSDownloadTexture* ctex = tex->get();

View File

@ -457,9 +457,11 @@ public:
Target* FindTargetOverlap(u32 bp, u32 end_block, int type, int psm);
Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0,
bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preload_uploads = true);
bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true,
const GSVector4i draw_rc = GSVector4i::zero());
Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0,
bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preload_uploads = true);
bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true,
const GSVector4i draw_rc = GSVector4i::zero());
Target* LookupDisplayTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale);
/// Looks up a target in the cache, and only returns it if the BP/BW match exactly.
@ -481,6 +483,12 @@ public:
void ReplaceSourceTexture(Source* s, GSTexture* new_texture, float new_scale, const GSVector2i& new_unscaled_size,
HashCacheEntry* hc_entry, bool new_texture_is_shared);
/// Converts single color value to depth using the specified shader expression.
static float ConvertColorToDepth(u32 c, ShaderConvert convert);
/// Converts single depth value to colour using the specified shader expression.
static u32 ConvertDepthToColor(float d, ShaderConvert convert);
bool Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u32 DBW, u32 DPSM, int dx, int dy, int w, int h);
bool ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx, int dy, int w, int h);
bool PageMove(u32 SBP, u32 DBP, u32 BW, u32 PSM, int sx, int sy, int dx, int dy, int w, int h);