GS-HW: Fix bugs in CLUT draw heuristics, fixes up Driver Parallel Lines

This commit is contained in:
refractionpcsx2 2022-10-30 23:58:11 +00:00
parent fe53a3f52c
commit fc87b54439
4 changed files with 48 additions and 18 deletions

View File

@ -108,11 +108,24 @@ u8 GSClut::IsInvalid()
return m_write.dirty; return m_write.dirty;
} }
void GSClut::ClearDrawInvalidity()
{
if (m_write.dirty & 2)
{
m_write.dirty = 1;
}
}
u32 GSClut::GetCLUTCBP() u32 GSClut::GetCLUTCBP()
{ {
return m_write.TEX0.CBP; return m_write.TEX0.CBP;
} }
u32 GSClut::GetCLUTCPSM()
{
return m_write.TEX0.CPSM;
}
void GSClut::SetNextCLUTTEX0(u64 TEX0) void GSClut::SetNextCLUTTEX0(u64 TEX0)
{ {
m_write.next_tex0 = TEX0; m_write.next_tex0 = TEX0;
@ -120,7 +133,7 @@ void GSClut::SetNextCLUTTEX0(u64 TEX0)
bool GSClut::InvalidateRange(u32 start_block, u32 end_block, bool is_draw) bool GSClut::InvalidateRange(u32 start_block, u32 end_block, bool is_draw)
{ {
if (m_write.dirty) if (m_write.dirty & 2)
return m_write.dirty; return m_write.dirty;
GIFRegTEX0 next_cbp; GIFRegTEX0 next_cbp;

View File

@ -103,7 +103,9 @@ public:
bool InvalidateRange(u32 start_block, u32 end_block, bool is_draw = false); bool InvalidateRange(u32 start_block, u32 end_block, bool is_draw = false);
u8 IsInvalid(); u8 IsInvalid();
void ClearDrawInvalidity();
u32 GetCLUTCBP(); u32 GetCLUTCBP();
u32 GetCLUTCPSM();
void SetNextCLUTTEX0(u64 CBP); void SetNextCLUTTEX0(u64 CBP);
bool WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); bool WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); void Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);

View File

@ -1071,6 +1071,11 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
if (wt) if (wt)
{ {
m_mem.m_clut.SetNextCLUTTEX0(TEX0.U64); m_mem.m_clut.SetNextCLUTTEX0(TEX0.U64);
if (TEX0.CBP != m_mem.m_clut.GetCLUTCBP())
{
m_mem.m_clut.ClearDrawInvalidity();
CLUTAutoFlush();
}
Flush(GSFlushReason::CLUTCHANGE); Flush(GSFlushReason::CLUTCHANGE);
} }
@ -2998,7 +3003,7 @@ __forceinline void GSState::CLUTAutoFlush()
{ {
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_context->FRAME.PSM];
if ((m_context->FRAME.FBMSK & psm.fmsk) != psm.fmsk) if ((m_context->FRAME.FBMSK & psm.fmsk) != psm.fmsk && GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp == psm.bpp)
{ {
const u32 startbp = psm.info.bn(temp_draw_rect.x, temp_draw_rect.y, m_context->FRAME.Block(), m_context->FRAME.FBW); const u32 startbp = psm.info.bn(temp_draw_rect.x, temp_draw_rect.y, m_context->FRAME.Block(), m_context->FRAME.FBW);
@ -3008,7 +3013,7 @@ __forceinline void GSState::CLUTAutoFlush()
if (PRIM->PRIM != GS_POINTLIST || (m_index.tail > 1)) if (PRIM->PRIM != GS_POINTLIST || (m_index.tail > 1))
endbp = psm.info.bn(temp_draw_rect.z - 1, temp_draw_rect.w - 1, m_context->FRAME.Block(), m_context->FRAME.FBW); endbp = psm.info.bn(temp_draw_rect.z - 1, temp_draw_rect.w - 1, m_context->FRAME.Block(), m_context->FRAME.FBW);
m_mem.m_clut.InvalidateRange(startbp, endbp); m_mem.m_clut.InvalidateRange(startbp, endbp, true);
} }
} }
} }

View File

@ -1395,6 +1395,7 @@ void GSRendererHW::Draw()
if (GSConfig.UserHacks_CPUCLUTRender > 0) if (GSConfig.UserHacks_CPUCLUTRender > 0)
{ {
bool result = (GSConfig.UserHacks_CPUCLUTRender == 1) ? PossibleCLUTDraw() : PossibleCLUTDrawAggressive(); bool result = (GSConfig.UserHacks_CPUCLUTRender == 1) ? PossibleCLUTDraw() : PossibleCLUTDrawAggressive();
m_mem.m_clut.ClearDrawInvalidity();
if (result) if (result)
{ {
if (SwPrimRender()) if (SwPrimRender())
@ -3896,31 +3897,32 @@ void GSRendererHW::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
bool GSRendererHW::PossibleCLUTDraw() bool GSRendererHW::PossibleCLUTDraw()
{ {
// No shuffles.
if (m_channel_shuffle || m_texture_shuffle) if (m_channel_shuffle || m_texture_shuffle)
return false; return false;
// Keep the draws simple, no alpha testing, blending, mipmapping, Z writes, and make sure it's flat. // Keep the draws simple, no alpha testing, blending, mipmapping, Z writes, and make sure it's flat.
const bool fb_only = m_context->TEST.ATE && m_context->TEST.AFAIL == 1 && m_context->TEST.ATST == ATST_NEVER; const bool fb_only = m_context->TEST.ATE && m_context->TEST.AFAIL == 1 && m_context->TEST.ATST == ATST_NEVER;
// No Z writes.
if (!m_context->ZBUF.ZMSK && !fb_only) if (!m_context->ZBUF.ZMSK && !fb_only)
return false; return false;
// Make sure it's flat.
if (m_vt.m_eq.z != 0x1) if (m_vt.m_eq.z != 0x1)
return false; return false;
// No mipmapping, please never be any mipmapping...
if (m_context->TEX1.MXL) if (m_context->TEX1.MXL)
return false; return false;
if (m_vt.m_min.p.x < 0 || m_vt.m_min.p.y < 0)
return false;
// Writing to the framebuffer for output. We're not interested. - Note: This stops NFS HP2 Busted screens working, but they're glitchy anyway // Writing to the framebuffer for output. We're not interested. - Note: This stops NFS HP2 Busted screens working, but they're glitchy anyway
// what NFS HP2 really needs is a kind of shuffle with mask, 32bit target is interpreted as 16bit and masked. // what NFS HP2 really needs is a kind of shuffle with mask, 32bit target is interpreted as 16bit and masked.
if ((m_regs->DISP[0].DISPFB.Block() == m_context->FRAME.Block()) || (m_regs->DISP[1].DISPFB.Block() == m_context->FRAME.Block())) if ((m_regs->DISP[0].DISPFB.Block() == m_context->FRAME.Block()) || (m_regs->DISP[1].DISPFB.Block() == m_context->FRAME.Block()))
return false; return false;
// Ignore recursive/shuffle effects. // Ignore recursive/shuffle effects, but possible it will recursively draw, but make sure it's staying in page width
if (PRIM->TME && m_context->TEX0.TBP0 == m_context->FRAME.Block()) if (PRIM->TME && m_context->TEX0.TBP0 == m_context->FRAME.Block() && (m_context->FRAME.FBW != 1 && m_context->TEX0.TBW == m_context->FRAME.FBW))
return false; return false;
// Hopefully no games draw a CLUT with a CLUT, that would be evil, most likely a channel shuffle. // Hopefully no games draw a CLUT with a CLUT, that would be evil, most likely a channel shuffle.
@ -3929,6 +3931,10 @@ bool GSRendererHW::PossibleCLUTDraw()
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_context->FRAME.PSM];
// Make sure the CLUT formats are matching.
if (GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp != psm.bpp)
return false;
// Max size for a CLUT/Current page size. // Max size for a CLUT/Current page size.
constexpr float clut_width = 16.0f; constexpr float clut_width = 16.0f;
constexpr float clut_height = 16.0f; constexpr float clut_height = 16.0f;
@ -3937,23 +3943,23 @@ bool GSRendererHW::PossibleCLUTDraw()
const float page_width = static_cast<float>(psm.pgs.x); const float page_width = static_cast<float>(psm.pgs.x);
const float page_height = static_cast<float>(psm.pgs.y); const float page_height = static_cast<float>(psm.pgs.y);
// If the coordinates aren't starting within the page, it's likely not a CLUT draw.
if (floor(m_vt.m_min.p.x) < 0 || floor(m_vt.m_min.p.y) < 0 || floor(m_vt.m_min.p.x) > page_width || floor(m_vt.m_min.p.y) > page_height)
return false;
// Make sure it's kinda CLUT sized, at least. Be wary, it can draw a line at a time (Guitar Hero - Metallica) // Make sure it's kinda CLUT sized, at least. Be wary, it can draw a line at a time (Guitar Hero - Metallica)
// Driver Parallel Lines draws a bunch of CLUT's at once, ending up as a 64x256 draw, very annoying.
const float draw_width = (m_vt.m_max.p.x - m_vt.m_min.p.x); const float draw_width = (m_vt.m_max.p.x - m_vt.m_min.p.x);
const float draw_height = (m_vt.m_max.p.y - m_vt.m_min.p.y); const float draw_height = (m_vt.m_max.p.y - m_vt.m_min.p.y);
const bool valid_size =((draw_width >= min_clut_width || draw_height >= min_clut_height) && const bool valid_size =((draw_width >= min_clut_width || draw_height >= min_clut_height) &&
m_vt.m_max.p.x <= page_width && m_vt.m_max.p.y <= page_height); (m_vt.m_max.p.x <= page_width));
// Klonoa draws a clut with a full page of triangles instead of a sprite, but we need to make sure it doesn't intefere with normal triangle draws.
if (m_vt.m_primclass == GS_TRIANGLE_CLASS)
{
if (draw_width != page_width || draw_height != page_height)
return false;
}
// Make sure the draw hits the next CLUT and it's marked as invalid (kind of a sanity check). // Make sure the draw hits the next CLUT and it's marked as invalid (kind of a sanity check).
// We can also allow draws which are of a sensible size within the page, as they could also be CLUT draws (or gradients for the CLUT). // We can also allow draws which are of a sensible size within the page, as they could also be CLUT draws (or gradients for the CLUT).
if (!(valid_size || (m_mem.m_clut.IsInvalid() & 2))) if (!valid_size)
{
return false; return false;
}
if (PRIM->TME) if (PRIM->TME)
{ {
@ -3967,7 +3973,11 @@ bool GSRendererHW::PossibleCLUTDraw()
InvalidateLocalMem(BITBLTBUF, r); InvalidateLocalMem(BITBLTBUF, r);
} }
//DevCon.Warning("Draw width %f height %f page width %f height %f TPSM %x TBP0 %x FPSM %x FBP %x valid size %d Invalid %d DISPFB0 %x DISPFB1 %x", draw_width, draw_height, page_width, page_height, m_context->TEX0.PSM, m_context->TEX0.TBP0, m_context->FRAME.PSM, m_context->FRAME.Block(), valid_size, m_mem.m_clut.IsInvalid(), m_regs->DISP[0].DISPFB.Block(), m_regs->DISP[1].DISPFB.Block()); // Debugging stuff..
//const u32 startbp = psm.info.bn(m_vt.m_min.p.x, m_vt.m_min.p.y, m_context->FRAME.Block(), m_context->FRAME.FBW);
//const u32 endbp = psm.info.bn(m_vt.m_max.p.x, m_vt.m_max.p.y, m_context->FRAME.Block(), m_context->FRAME.FBW);
//DevCon.Warning("Draw width %f height %f page width %f height %f TPSM %x TBP0 %x FPSM %x FBP %x CBP %x valid size %d Invalid %d DISPFB0 %x DISPFB1 %x start %x end %x draw %d", draw_width, draw_height, page_width, page_height, m_context->TEX0.PSM, m_context->TEX0.TBP0, m_context->FRAME.PSM, m_context->FRAME.Block(), m_mem.m_clut.GetCLUTCBP(), valid_size, m_mem.m_clut.IsInvalid(), m_regs->DISP[0].DISPFB.Block(), m_regs->DISP[1].DISPFB.Block(), startbp, endbp, s_n);
return true; return true;
} }