mirror of https://github.com/PCSX2/pcsx2.git
GS/HW: Detect row/page-based split clears
And turn them into a single mem clear.
This commit is contained in:
parent
830db2b326
commit
38d9aa5e73
|
@ -892,6 +892,112 @@ GSVector4i GSRendererHW::GetSplitTextureShuffleDrawRect() const
|
|||
return r.insert64<0>(0).ralign<Align_Outside>(frame_psm.pgs);
|
||||
}
|
||||
|
||||
bool GSRendererHW::IsSplitClearActive() const
|
||||
{
|
||||
return (m_split_clear_pages != 0);
|
||||
}
|
||||
|
||||
bool GSRendererHW::IsStartingSplitClear()
|
||||
{
|
||||
// Mem clear conditions have already been checked by the caller, except for Z.
|
||||
// We _could_ handle the split for both colour and depth, but nothing I've seen hits it yet.
|
||||
if (!m_cached_ctx.ZBUF.ZMSK)
|
||||
return false;
|
||||
|
||||
u32 pages_covered;
|
||||
if (!CheckNextDrawForSplitClear(m_r, &pages_covered))
|
||||
return false;
|
||||
|
||||
m_split_clear_start = m_cached_ctx.FRAME;
|
||||
m_split_clear_pages = pages_covered;
|
||||
m_split_clear_color = m_vertex.buff[1].RGBAQ.U32[0];
|
||||
if (PRIM->ABE && m_context->ALPHA.IsBlack())
|
||||
m_split_clear_color &= ~0xFF000000;
|
||||
|
||||
GL_INS("Starting split clear at FBP %x FBW %u PSM %s with %dx%d rect covering %u pages",
|
||||
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, psm_str(m_cached_ctx.FRAME.PSM),
|
||||
m_r.width(), m_r.height(), pages_covered);
|
||||
|
||||
// Remove any targets which are directly at the start.
|
||||
const u32 bp = m_cached_ctx.FRAME.Block();
|
||||
g_texture_cache->InvalidateVideoMemType(GSTextureCache::RenderTarget, bp);
|
||||
g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, bp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::ContinueSplitClear()
|
||||
{
|
||||
// Should be a mem clear type draw.
|
||||
if (!IsConstantDirectWriteMemClear())
|
||||
return false;
|
||||
|
||||
// Shouldn't be writing Z, in theory we could track this too and clear both though.
|
||||
if (!m_cached_ctx.ZBUF.ZMSK)
|
||||
return false;
|
||||
|
||||
// Shouldn't have gaps.
|
||||
if (m_vt.m_eq.rgba != 0xFFFF || !PrimitiveCoversWithoutGaps())
|
||||
return false;
|
||||
|
||||
// Remove any targets which are directly at the start, since we checked this draw in the last.
|
||||
const u32 bp = m_cached_ctx.FRAME.Block();
|
||||
g_texture_cache->InvalidateVideoMemType(GSTextureCache::RenderTarget, bp);
|
||||
g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, bp);
|
||||
|
||||
// Check next draw.
|
||||
u32 pages_covered;
|
||||
const bool skip = CheckNextDrawForSplitClear(m_r, &pages_covered);
|
||||
|
||||
// We might've found the end, but this draw still counts.
|
||||
m_split_clear_pages += pages_covered;
|
||||
return skip;
|
||||
}
|
||||
|
||||
bool GSRendererHW::CheckNextDrawForSplitClear(const GSVector4i& r, u32* pages_covered_by_this_draw) const
|
||||
{
|
||||
const u32 end_block = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, r);
|
||||
if (pages_covered_by_this_draw)
|
||||
*pages_covered_by_this_draw = ((end_block - m_cached_ctx.FRAME.Block()) + (BLOCKS_PER_PAGE)) / BLOCKS_PER_PAGE;
|
||||
|
||||
// must be changing FRAME
|
||||
if (m_backed_up_ctx < 0 || (m_dirty_gs_regs & (1u << DIRTY_REG_FRAME)) == 0)
|
||||
return false;
|
||||
|
||||
// rect width should match the FBW (page aligned)
|
||||
if (r.width() != m_cached_ctx.FRAME.FBW * 64)
|
||||
return false;
|
||||
|
||||
// next FBP should point to the end of the rect
|
||||
const GSDrawingContext& next_ctx = m_env.CTXT[m_backed_up_ctx];
|
||||
if (next_ctx.FRAME.Block() != ((end_block + 1) % MAX_BLOCKS) || next_ctx.FRAME.FBW != m_cached_ctx.FRAME.FBW ||
|
||||
next_ctx.FRAME.PSM != m_cached_ctx.FRAME.PSM)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void GSRendererHW::FinishSplitClear()
|
||||
{
|
||||
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[m_split_clear_start.PSM];
|
||||
const GSOffset clear_off = GSOffset(psm_s.info, m_split_clear_start.Block(), m_split_clear_start.FBW, m_split_clear_start.PSM);
|
||||
const GSVector4i rect = GSVector4i(0, 0, m_split_clear_start.FBW * 64, (m_split_clear_pages * psm_s.pgs.y) / m_split_clear_start.FBW);
|
||||
|
||||
GL_INS("FinishSplitClear(): Start %x FBW %u PSM %s, %u pages, %08X color", m_split_clear_start.Block(), m_split_clear_start.FBW,
|
||||
psm_str(m_split_clear_start.PSM), m_split_clear_pages, m_split_clear_color);
|
||||
|
||||
OI_DoGsMemClear(clear_off, rect, m_split_clear_color);
|
||||
|
||||
// Invalidate any targets in this range.
|
||||
g_texture_cache->InvalidateVideoMem(clear_off, rect, false, true);
|
||||
|
||||
m_split_clear_start.U64 = 0;
|
||||
m_split_clear_pages = 0;
|
||||
m_split_clear_color = 0;
|
||||
}
|
||||
|
||||
bool GSRendererHW::IsTBPFrameOrZ(u32 tbp) const
|
||||
{
|
||||
const bool is_frame = (m_cached_ctx.FRAME.Block() == tbp);
|
||||
|
@ -1600,6 +1706,18 @@ void GSRendererHW::Draw()
|
|||
m_channel_shuffle = true;
|
||||
m_last_channel_shuffle_fbmsk = m_context->FRAME.FBMSK;
|
||||
}
|
||||
else if (IsSplitClearActive())
|
||||
{
|
||||
if (ContinueSplitClear())
|
||||
{
|
||||
GL_INS("Skipping due to continued split clear, FBP %x FBW %u", m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
FinishSplitClear();
|
||||
}
|
||||
}
|
||||
|
||||
m_texture_shuffle = false;
|
||||
m_copy_16bit_to_target_shuffle = false;
|
||||
|
@ -1711,6 +1829,13 @@ void GSRendererHW::Draw()
|
|||
cleanup_draw();
|
||||
return;
|
||||
}
|
||||
else if (!clear_height_valid && is_zero_clear && IsStartingSplitClear())
|
||||
{
|
||||
// Currently we only allow this for zero, because what we clear won't get preloaded back to the RT.
|
||||
// But, if we did, then we could allow it for non-zero clears, also same for the above case.
|
||||
cleanup_draw();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5118,66 +5243,71 @@ bool GSRendererHW::OI_GsMemClear()
|
|||
if (m_r.width() < ((static_cast<int>(m_cached_ctx.FRAME.FBW) - 1) * 64) || r.height() <= 128)
|
||||
return false;
|
||||
|
||||
GL_INS("OI_GsMemClear (%d,%d => %d,%d)", r.x, r.y, r.z, r.w);
|
||||
const int format = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmt;
|
||||
|
||||
// Take the vertex colour, but check if the blending would make it black.
|
||||
u32 vert_color = m_vertex.buff[1].RGBAQ.U32[0];
|
||||
if (PRIM->ABE && m_context->ALPHA.IsBlack())
|
||||
vert_color &= ~0xFF000000;
|
||||
|
||||
const u32 color = (format == 0) ? vert_color : (vert_color & ~0xFF000000);
|
||||
// FIXME: loop can likely be optimized with AVX/SSE. Pixels aren't
|
||||
// linear but the value will be done for all pixels of a block.
|
||||
// FIXME: maybe we could limit the write to the top and bottom row page.
|
||||
if (format == 0)
|
||||
{
|
||||
// Based on WritePixel32
|
||||
for (int y = r.top; y < r.bottom; y++)
|
||||
{
|
||||
auto pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(m_mem.vm32(), 0, y);
|
||||
|
||||
for (int x = r.left; x < r.right; x++)
|
||||
{
|
||||
*pa.value(x) = color; // Here the constant color
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (format == 1)
|
||||
{
|
||||
// Based on WritePixel24
|
||||
for (int y = r.top; y < r.bottom; y++)
|
||||
{
|
||||
auto pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(m_mem.vm32(), 0, y);
|
||||
|
||||
for (int x = r.left; x < r.right; x++)
|
||||
{
|
||||
*pa.value(x) &= 0xff000000; // Clear the color
|
||||
*pa.value(x) |= color; // OR in our constant
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (format == 2)
|
||||
{
|
||||
const u16 converted_color = ((vert_color >> 16) & 0x8000) | ((vert_color >> 9) & 0x7C00) | ((vert_color >> 6) & 0x7E0) | ((vert_color >> 3) & 0x1F);
|
||||
|
||||
// Based on WritePixel16
|
||||
for (int y = r.top; y < r.bottom; y++)
|
||||
{
|
||||
auto pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(m_mem.vm16(), 0, y);
|
||||
|
||||
for (int x = r.left; x < r.right; x++)
|
||||
{
|
||||
*pa.value(x) = converted_color; // Here the constant color
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OI_DoGsMemClear(off, r, vert_color);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void GSRendererHW::OI_DoGsMemClear(const GSOffset& off, const GSVector4i& r, u32 vert_color)
|
||||
{
|
||||
GL_INS("OI_DoGsMemClear (%d,%d => %d,%d)", r.x, r.y, r.z, r.w);
|
||||
const int format = GSLocalMemory::m_psm[off.psm()].fmt;
|
||||
|
||||
const u32 color = (format == 0) ? vert_color : (vert_color & ~0xFF000000);
|
||||
// FIXME: loop can likely be optimized with AVX/SSE. Pixels aren't
|
||||
// linear but the value will be done for all pixels of a block.
|
||||
// FIXME: maybe we could limit the write to the top and bottom row page.
|
||||
if (format == 0)
|
||||
{
|
||||
// Based on WritePixel32
|
||||
for (int y = r.top; y < r.bottom; y++)
|
||||
{
|
||||
auto pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(m_mem.vm32(), 0, y);
|
||||
|
||||
for (int x = r.left; x < r.right; x++)
|
||||
{
|
||||
*pa.value(x) = color; // Here the constant color
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (format == 1)
|
||||
{
|
||||
// Based on WritePixel24
|
||||
for (int y = r.top; y < r.bottom; y++)
|
||||
{
|
||||
auto pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(m_mem.vm32(), 0, y);
|
||||
|
||||
for (int x = r.left; x < r.right; x++)
|
||||
{
|
||||
*pa.value(x) &= 0xff000000; // Clear the color
|
||||
*pa.value(x) |= color; // OR in our constant
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (format == 2)
|
||||
{
|
||||
const u16 converted_color = ((vert_color >> 16) & 0x8000) | ((vert_color >> 9) & 0x7C00) | ((vert_color >> 6) & 0x7E0) | ((vert_color >> 3) & 0x1F);
|
||||
|
||||
// Based on WritePixel16
|
||||
for (int y = r.top; y < r.bottom; y++)
|
||||
{
|
||||
auto pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(m_mem.vm16(), 0, y);
|
||||
|
||||
for (int x = r.left; x < r.right; x++)
|
||||
{
|
||||
*pa.value(x) = converted_color; // Here the constant color
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw)
|
||||
{
|
||||
if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && PRIM->TME && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0)
|
||||
|
|
|
@ -45,6 +45,7 @@ private:
|
|||
// Require special argument
|
||||
bool OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* t, const GSVector4i& r_draw);
|
||||
bool OI_GsMemClear(); // always on
|
||||
void OI_DoGsMemClear(const GSOffset& off, const GSVector4i& r, u32 vert_color);
|
||||
void OI_DoubleHalfClear(GSTextureCache::Target*& rt, GSTextureCache::Target*& ds); // always on
|
||||
|
||||
u16 Interpolate_UV(float alpha, int t0, int t1);
|
||||
|
@ -96,6 +97,12 @@ private:
|
|||
bool IsSplitTextureShuffle();
|
||||
GSVector4i GetSplitTextureShuffleDrawRect() const;
|
||||
|
||||
bool IsSplitClearActive() const;
|
||||
bool CheckNextDrawForSplitClear(const GSVector4i& r, u32* pages_covered_by_this_draw) const;
|
||||
bool IsStartingSplitClear();
|
||||
bool ContinueSplitClear();
|
||||
void FinishSplitClear();
|
||||
|
||||
GSVector4i m_r = {};
|
||||
|
||||
// We modify some of the context registers to optimize away unnecessary operations.
|
||||
|
@ -136,6 +143,10 @@ private:
|
|||
|
||||
u32 m_last_channel_shuffle_fbmsk = 0;
|
||||
|
||||
GIFRegFRAME m_split_clear_start = {};
|
||||
u32 m_split_clear_pages = 0; // if zero, inactive
|
||||
u32 m_split_clear_color = 0;
|
||||
|
||||
bool m_userhacks_tcoffset = false;
|
||||
float m_userhacks_tcoffset_x = 0.0f;
|
||||
float m_userhacks_tcoffset_y = 0.0f;
|
||||
|
|
Loading…
Reference in New Issue