GS/HW: Remove targets which lie completely within the invalidate range

Fixes part of Burnout 3's car reflections.
This commit is contained in:
Stenzek 2023-06-11 20:02:22 +10:00 committed by Connor McLaughlin
parent fbd837eadb
commit c87dd99824
7 changed files with 69 additions and 16 deletions

View File

@ -453,6 +453,22 @@ bool GSLocalMemory::IsPageAligned(u32 psm, const GSVector4i& rc)
return (rc & pgmsk).eq(GSVector4i::zero()); return (rc & pgmsk).eq(GSVector4i::zero());
} }
u32 GSLocalMemory::GetStartBlockAddress(u32 bp, u32 bw, u32 psm, GSVector4i rect)
{
u32 result = m_psm[psm].info.bn(rect.x, rect.y, bp, bw); // Valid only for color formats
// If rect is page aligned, we can assume it's the start of the page. Z formats don't place block 0
// in the top-left, so we have to round them down.
const GSVector2i page_size = GSLocalMemory::m_psm[psm].pgs;
if ((rect.x & (page_size.x - 1)) == 0 && (rect.y & (page_size.y - 1)) == 0)
{
constexpr u32 page_mask = (1 << 5) - 1;
result &= ~page_mask;
}
return result;
}
u32 GSLocalMemory::GetEndBlockAddress(u32 bp, u32 bw, u32 psm, GSVector4i rect) u32 GSLocalMemory::GetEndBlockAddress(u32 bp, u32 bw, u32 psm, GSVector4i rect)
{ {
u32 result = m_psm[psm].info.bn(rect.z - 1, rect.w - 1, bp, bw); // Valid only for color formats u32 result = m_psm[psm].info.bn(rect.z - 1, rect.w - 1, bp, bw); // Valid only for color formats

View File

@ -547,6 +547,7 @@ public:
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0); std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
static bool IsPageAligned(u32 psm, const GSVector4i& rc); static bool IsPageAligned(u32 psm, const GSVector4i& rc);
static u32 GetStartBlockAddress(u32 bp, u32 bw, u32 psm, GSVector4i rect);
static u32 GetEndBlockAddress(u32 bp, u32 bw, u32 psm, GSVector4i rect); static u32 GetEndBlockAddress(u32 bp, u32 bw, u32 psm, GSVector4i rect);
// address // address

View File

@ -135,7 +135,7 @@ bool GSHwHack::GSC_SacredBlaze(GSRendererHW& r, int& skip)
if ((RFBP == 0x2680 || RFBP == 0x26c0 || RFBP == 0x2780 || RFBP == 0x2880 || RFBP == 0x2a80) && RTPSM == PSMCT32 && RFBW <= 2 && if ((RFBP == 0x2680 || RFBP == 0x26c0 || RFBP == 0x2780 || RFBP == 0x2880 || RFBP == 0x2a80) && RTPSM == PSMCT32 && RFBW <= 2 &&
(!RTME || (RTBP0 == 0x0 || RTBP0 == 0xe00 || RTBP0 == 0x3e00))) (!RTME || (RTBP0 == 0x0 || RTBP0 == 0xe00 || RTBP0 == 0x3e00)))
{ {
r.SwPrimRender(r, RTBP0 > 0x1000); r.SwPrimRender(r, RTBP0 > 0x1000, false);
skip = 1; skip = 1;
} }
} }
@ -253,7 +253,7 @@ bool GSHwHack::GSC_BlackAndBurnoutSky(GSRendererHW& r, int& skip)
// the clouds on top of the sky at each frame. // the clouds on top of the sky at each frame.
// Burnout 3 PAL 50Hz: 0x3ba0 => 0x1e80. // Burnout 3 PAL 50Hz: 0x3ba0 => 0x1e80.
GL_INS("OO_BurnoutGames - Readback clouds renderered from TEX0.TBP0 = 0x%04x (TEX0.CBP = 0x%04x) to FBP = 0x%04x", TEX0.TBP0, TEX0.CBP, FRAME.Block()); GL_INS("OO_BurnoutGames - Readback clouds renderered from TEX0.TBP0 = 0x%04x (TEX0.CBP = 0x%04x) to FBP = 0x%04x", TEX0.TBP0, TEX0.CBP, FRAME.Block());
r.SwPrimRender(r, true); r.SwPrimRender(r, true, false);
skip = 1; skip = 1;
} }
if (TEX0.TBW == 2 && TEX0.TW == 7 && ((TEX0.PSM == PSMT4 && FRAME.FBW == 3) || (TEX0.PSM == PSMT8 && FRAME.FBW == 2)) && TEX0.TH == 6 && (FRAME.FBMSK & 0xFFFFFF) == 0xFFFFFF) if (TEX0.TBW == 2 && TEX0.TW == 7 && ((TEX0.PSM == PSMT4 && FRAME.FBW == 3) || (TEX0.PSM == PSMT8 && FRAME.FBW == 2)) && TEX0.TH == 6 && (FRAME.FBMSK & 0xFFFFFF) == 0xFFFFFF)
@ -261,7 +261,7 @@ bool GSHwHack::GSC_BlackAndBurnoutSky(GSRendererHW& r, int& skip)
// Rendering of the glass smashing effect and some chassis decal in to the alpha channel of the FRAME on boot (before the menu). // Rendering of the glass smashing effect and some chassis decal in to the alpha channel of the FRAME on boot (before the menu).
// This gets ejected from the texture cache due to old age, but never gets written back. // This gets ejected from the texture cache due to old age, but never gets written back.
GL_INS("OO_BurnoutGames - Render glass smash from TEX0.TBP0 = 0x%04x (TEX0.CBP = 0x%04x) to FBP = 0x%04x", TEX0.TBP0, TEX0.CBP, FRAME.Block()); GL_INS("OO_BurnoutGames - Render glass smash from TEX0.TBP0 = 0x%04x (TEX0.CBP = 0x%04x) to FBP = 0x%04x", TEX0.TBP0, TEX0.CBP, FRAME.Block());
r.SwPrimRender(r, true); r.SwPrimRender(r, true, false);
skip = 1; skip = 1;
} }
} }
@ -652,7 +652,7 @@ bool GSHwHack::GSC_BlueTongueGames(GSRendererHW& r, int& skip)
// Also used for Nicktoons Unite, same engine it appears. // Also used for Nicktoons Unite, same engine it appears.
if ((context->FRAME.PSM == PSMCT16S || context->FRAME.PSM <= PSMCT24) && context->FRAME.FBW <= 5) if ((context->FRAME.PSM == PSMCT16S || context->FRAME.PSM <= PSMCT24) && context->FRAME.FBW <= 5)
{ {
r.SwPrimRender(r, true); r.SwPrimRender(r, true, false);
skip = 1; skip = 1;
return true; return true;
} }
@ -661,7 +661,7 @@ bool GSHwHack::GSC_BlueTongueGames(GSRendererHW& r, int& skip)
// rendered on both. // rendered on both.
if (context->FRAME.FBW == 8 && r.m_index.tail == 32 && r.PRIM->TME && context->TEX0.TBW == 1) if (context->FRAME.FBW == 8 && r.m_index.tail == 32 && r.PRIM->TME && context->TEX0.TBW == 1)
{ {
r.SwPrimRender(r, false); r.SwPrimRender(r, false, false);
return false; return false;
} }

View File

@ -1663,7 +1663,7 @@ void GSRendererHW::Draw()
const bool draw_sprite_tex = PRIM->TME && (m_vt.m_primclass == GS_SPRITE_CLASS); const bool draw_sprite_tex = PRIM->TME && (m_vt.m_primclass == GS_SPRITE_CLASS);
// We trigger the sw prim render here super early, to avoid creating superfluous render targets. // We trigger the sw prim render here super early, to avoid creating superfluous render targets.
if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender(*this, true)) if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender(*this, true, true))
{ {
GL_CACHE("Possible texture decompression, drawn with SwPrimRender() (BP %x BW %u TBP0 %x TBW %u)", GL_CACHE("Possible texture decompression, drawn with SwPrimRender() (BP %x BW %u TBP0 %x TBW %u)",
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBMSK, m_cached_ctx.TEX0.TBP0, m_cached_ctx.TEX0.TBW); m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBMSK, m_cached_ctx.TEX0.TBP0, m_cached_ctx.TEX0.TBW);
@ -1700,7 +1700,7 @@ void GSRendererHW::Draw()
m_mem.m_clut.ClearDrawInvalidity(); m_mem.m_clut.ClearDrawInvalidity();
if (result == CLUTDrawTestResult::CLUTDrawOnCPU && GSConfig.UserHacks_CPUCLUTRender > 0) if (result == CLUTDrawTestResult::CLUTDrawOnCPU && GSConfig.UserHacks_CPUCLUTRender > 0)
{ {
if (SwPrimRender(*this, true)) if (SwPrimRender(*this, true, true))
{ {
GL_CACHE("Possible clut draw, drawn with SwPrimRender()"); GL_CACHE("Possible clut draw, drawn with SwPrimRender()");
return; return;

View File

@ -68,7 +68,7 @@ private:
CLUTDrawTestResult PossibleCLUTDraw(); CLUTDrawTestResult PossibleCLUTDraw();
CLUTDrawTestResult PossibleCLUTDrawAggressive(); CLUTDrawTestResult PossibleCLUTDrawAggressive();
bool CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex); bool CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex);
bool (*SwPrimRender)(GSRendererHW&, bool invalidate_tc); bool (*SwPrimRender)(GSRendererHW&, bool invalidate_tc, bool add_ee_transfer);
template <bool linear> template <bool linear>
void RoundSpriteOffset(); void RoundSpriteOffset();

View File

@ -21,7 +21,7 @@
class CURRENT_ISA::GSRendererHWFunctions class CURRENT_ISA::GSRendererHWFunctions
{ {
public: public:
static bool SwPrimRender(GSRendererHW& hw, bool invalidate_tc); static bool SwPrimRender(GSRendererHW& hw, bool invalidate_tc, bool add_ee_transfer);
static void Populate(GSRendererHW& renderer) static void Populate(GSRendererHW& renderer)
{ {
@ -40,7 +40,7 @@ void CURRENT_ISA::GSRendererHWPopulateFunctions(GSRendererHW& renderer)
static GSVector4i s_dimx_storage[8]; static GSVector4i s_dimx_storage[8];
static GIFRegDIMX s_last_dimx; static GIFRegDIMX s_last_dimx;
bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc) bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, bool add_ee_transfer)
{ {
GSVertexTrace& vt = hw.m_vt; GSVertexTrace& vt = hw.m_vt;
const GIFRegPRIM* PRIM = hw.PRIM; const GIFRegPRIM* PRIM = hw.PRIM;
@ -68,7 +68,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc)
GSVertexSW::s_cvb[vt.m_primclass][PRIM->TME][PRIM->FST][q_div](context, data.vertex, hw.m_vertex.buff, hw.m_vertex.next); GSVertexSW::s_cvb[vt.m_primclass][PRIM->TME][PRIM->FST][q_div](context, data.vertex, hw.m_vertex.buff, hw.m_vertex.next);
GSVector4i scissor = context->scissor.in; GSVector4i scissor = context->scissor.in;
GSVector4i bbox = GSVector4i(vt.m_min.p.floor().xyxy(vt.m_max.p.ceil())); GSVector4i bbox = GSVector4i(vt.m_min.p.floor().xyxy(vt.m_max.p.ceil())).rintersect(scissor);
// Points and lines may have zero area bbox (single line: 0, 0 - 256, 0) // Points and lines may have zero area bbox (single line: 0, 0 - 256, 0)
@ -557,5 +557,18 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc)
if (invalidate_tc) if (invalidate_tc)
g_texture_cache->InvalidateVideoMem(context->offset.fb, bbox); g_texture_cache->InvalidateVideoMem(context->offset.fb, bbox);
// Jak does sw prim render, then draws to the same target, and it needs to be uploaded.
if (add_ee_transfer)
{
GSRendererHW::GSUploadQueue uq;
uq.blit.U64 = 0;
uq.blit.DBP = hw.m_cached_ctx.FRAME.Block();
uq.blit.DBW = hw.m_cached_ctx.FRAME.FBW;
uq.blit.DPSM = hw.m_cached_ctx.FRAME.PSM;
uq.draw = GSState::s_n;
uq.rect = bbox;
hw.m_draw_transfers.push_back(uq);
}
return true; return true;
} }

View File

@ -1711,9 +1711,9 @@ void GSTextureCache::InvalidateVideoMemType(int type, u32 bp)
// Called each time you want to write to the GS memory // Called each time you want to write to the GS memory
void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& rect, bool eewrite, bool target) void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& rect, bool eewrite, bool target)
{ {
u32 bp = off.bp(); const u32 bp = off.bp();
u32 bw = off.bw(); const u32 bw = off.bw();
u32 psm = off.psm(); const u32 psm = off.psm();
if (!target) if (!target)
{ {
@ -1823,8 +1823,11 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
if (!target) if (!target)
return; return;
// Handle the case where the transfer wrapped around the end of GS memory. // Get the bounds that we're invalidating in blocks, so we can remove any targets which are completely contained.
const u32 end_bp = off.bnNoWrap(rect.z - 1, rect.w - 1); // Unfortunately sometimes the draw rect is incorrect, and since the end block gets the rect -1, it'll underflow,
// so we need to prevent that from happening. Just make it a single block in that case, and hope for the best.
const u32 start_bp = GSLocalMemory::GetStartBlockAddress(off.bp(), off.bw(), off.psm(), rect);
const u32 end_bp = rect.rempty() ? start_bp : GSLocalMemory::GetEndBlockAddress(off.bp(), off.bw(), off.psm(), rect);
// Ideally in the future we can turn this on unconditionally, but for now it breaks too much. // Ideally in the future we can turn this on unconditionally, but for now it breaks too much.
const bool check_inside_target = (GSConfig.UserHacks_TargetPartialInvalidation || const bool check_inside_target = (GSConfig.UserHacks_TargetPartialInvalidation ||
@ -2063,6 +2066,26 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
// TODO Use ComputeSurfaceOffset below. // TODO Use ComputeSurfaceOffset below.
if (GSUtil::HasSharedBits(psm, t->m_TEX0.PSM)) if (GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{ {
if (t->m_TEX0.TBP0 >= start_bp && t->m_end_block <= end_bp)
{
// If we're clearing C24 but the target is C32, then we need to dirty instead.
if (rgba._u32 != GSUtil::GetChannelMask(t->m_TEX0.PSM))
{
GL_CACHE("TC: Dirty whole target(%s) (0x%x) due to being contained within the invalidate range",
to_string(type), t->m_TEX0.TBP0);
AddDirtyRectTarget(t, t->GetUnscaledRect(), t->m_TEX0.PSM, t->m_TEX0.TBW, rgba);
continue;
}
else
{
i = list.erase(j);
GL_CACHE("TC: Remove Target(%s) (0x%x) due to being contained within the invalidate range",
to_string(type), t->m_TEX0.TBP0);
delete t;
continue;
}
}
if (bp < t->m_TEX0.TBP0) if (bp < t->m_TEX0.TBP0)
{ {
const u32 rowsize = bw * 8192; const u32 rowsize = bw * 8192;