GS: Only flush on PRIM if a new draw is requested

This commit is contained in:
refractionpcsx2 2022-03-25 21:45:22 +00:00
parent 38b83af714
commit 906480b8e2
2 changed files with 98 additions and 42 deletions

View File

@ -134,6 +134,7 @@ GSState::GSState()
PRIM = &m_env.PRIM; PRIM = &m_env.PRIM;
//CSR->rREV = 0x20; //CSR->rREV = 0x20;
m_env.PRMODECONT.AC = 1; m_env.PRMODECONT.AC = 1;
m_last_prim.U32[0] = PRIM->U32[0];
Reset(); Reset();
@ -650,6 +651,15 @@ void GSState::DumpVertices(const std::string& filename)
file.close(); file.close();
} }
__inline void GSState::CheckFlushes()
{
if (m_primflush)
Flush();
if ((m_context->FRAME.FBMSK & GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk) != GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk)
m_mem.m_clut.Invalidate(m_context->FRAME.Block());
}
void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r) void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r)
{ {
} }
@ -707,6 +717,8 @@ void GSState::GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r)
template <u32 prim, u32 adc, bool auto_flush, bool index_swap> template <u32 prim, u32 adc, bool auto_flush, bool index_swap>
void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r) void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
{ {
CheckFlushes();
GSVector4i xy = GSVector4i::loadl(&r->U64[0]); GSVector4i xy = GSVector4i::loadl(&r->U64[0]);
GSVector4i zf = GSVector4i::loadl(&r->U64[1]); GSVector4i zf = GSVector4i::loadl(&r->U64[1]);
@ -721,6 +733,8 @@ void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
template <u32 prim, u32 adc, bool auto_flush, bool index_swap> template <u32 prim, u32 adc, bool auto_flush, bool index_swap>
void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r) void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
{ {
CheckFlushes();
const GSVector4i xy = GSVector4i::loadl(&r->U64[0]); const GSVector4i xy = GSVector4i::loadl(&r->U64[0]);
const GSVector4i z = GSVector4i::loadl(&r->U64[1]); const GSVector4i z = GSVector4i::loadl(&r->U64[1]);
const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z); const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
@ -749,6 +763,8 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u3
{ {
ASSERT(size > 0 && size % 3 == 0); ASSERT(size > 0 && size % 3 == 0);
CheckFlushes();
const GIFPackedReg* RESTRICT r_end = r + size; const GIFPackedReg* RESTRICT r_end = r + size;
while (r < r_end) while (r < r_end)
@ -781,6 +797,8 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32
{ {
ASSERT(size > 0 && size % 3 == 0); ASSERT(size > 0 && size % 3 == 0);
CheckFlushes();
const GIFPackedReg* RESTRICT r_end = r + size; const GIFPackedReg* RESTRICT r_end = r + size;
while (r < r_end) while (r < r_end)
@ -817,19 +835,6 @@ void GSState::GIFRegHandlerNull(const GIFReg* RESTRICT r)
__forceinline void GSState::ApplyPRIM(u32 prim) __forceinline void GSState::ApplyPRIM(u32 prim)
{ {
if (GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim & 7)) // NOTE: assume strips/fans are converted to lists
{
u32 prim_mask = 0x7f8;
if (GSConfig.UseHardwareRenderer() && GSUtil::GetPrimClass(prim & 7) == GS_TRIANGLE_CLASS)
prim_mask &= ~0x80; // Mask out AA1.
if (m_env.PRMODECONT.AC == 1 && (m_env.PRIM.U32[0] ^ prim) & prim_mask) // all fields except PRIM
Flush();
}
else
{
Flush();
}
if (m_env.PRMODECONT.AC == 1) if (m_env.PRMODECONT.AC == 1)
{ {
@ -842,12 +847,29 @@ __forceinline void GSState::ApplyPRIM(u32 prim)
m_env.PRIM.PRIM = prim & 0x7; m_env.PRIM.PRIM = prim & 0x7;
} }
m_primflush = false;
u32 prim_mask = 0x7ff;
// Same class of draw so we don't need to flush
if (GSUtil::GetPrimClass(m_last_prim.PRIM) == GSUtil::GetPrimClass(m_env.PRIM.PRIM))
prim_mask &= ~0x7;
if (GSConfig.UseHardwareRenderer() && GSUtil::GetPrimClass(prim & 7) == GS_TRIANGLE_CLASS)
prim_mask &= ~0x80; // Mask out AA1.
if ((m_last_prim.U32[0] ^ m_env.PRIM.U32[0]) & prim_mask)
m_primflush = true;
UpdateVertexKick(); UpdateVertexKick();
ASSERT(m_index.tail == 0 || !g_gs_device->Features().provoking_vertex_last || m_index.buff[m_index.tail - 1] + 1 == m_vertex.next); ASSERT(m_index.tail == 0 || !g_gs_device->Features().provoking_vertex_last || m_index.buff[m_index.tail - 1] + 1 == m_vertex.next);
if (m_index.tail == 0) if (m_index.tail == 0)
{
m_vertex.next = 0; m_vertex.next = 0;
m_primflush = false;
m_last_prim.U32[0] = m_env.PRIM.U32[0];
}
m_vertex.head = m_vertex.tail = m_vertex.next; // remove unused vertices from the end of the vertex buffer m_vertex.head = m_vertex.tail = m_vertex.next; // remove unused vertices from the end of the vertex buffer
} }
@ -898,6 +920,8 @@ void GSState::GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r)
template <u32 prim, u32 adc, bool auto_flush, bool index_swap> template <u32 prim, u32 adc, bool auto_flush, bool index_swap>
void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
{ {
CheckFlushes();
GSVector4i xyzf = GSVector4i::loadl(&r->XYZF); GSVector4i xyzf = GSVector4i::loadl(&r->XYZF);
GSVector4i xyz = xyzf & (GSVector4i::xffffffff().upl32(GSVector4i::x00ffffff())); GSVector4i xyz = xyzf & (GSVector4i::xffffffff().upl32(GSVector4i::x00ffffff()));
GSVector4i uvf = GSVector4i::load((int)m_v.UV).upl32(xyzf.srl32(24).srl<4>()); GSVector4i uvf = GSVector4i::load((int)m_v.UV).upl32(xyzf.srl32(24).srl<4>());
@ -910,6 +934,8 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
template <u32 prim, u32 adc, bool auto_flush, bool index_swap> template <u32 prim, u32 adc, bool auto_flush, bool index_swap>
void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r)
{ {
CheckFlushes();
m_v.m[1] = GSVector4i::load(&r->XYZ, &m_v.UV); m_v.m[1] = GSVector4i::load(&r->XYZ, &m_v.UV);
VertexKick<prim, auto_flush, index_swap>(adc); VertexKick<prim, auto_flush, index_swap>(adc);
@ -940,7 +966,7 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
constexpr u64 mask = 0x1f78001fffffffffull; // TBP0 TBW PSM TW TH TCC TFX CPSM CSA constexpr u64 mask = 0x1f78001fffffffffull; // TBP0 TBW PSM TW TH TCC TFX CPSM CSA
if (wt || PRIM->CTXT == i && ((TEX0.U64 ^ m_env.CTXT[i].TEX0.U64) & mask)) if (wt || (PRIM->CTXT == i || m_primflush) && ((TEX0.U64 ^ m_env.CTXT[i].TEX0.U64) & mask))
Flush(); Flush();
TEX0.CPSM &= 0xa; // 1010b TEX0.CPSM &= 0xa; // 1010b
@ -1074,7 +1100,7 @@ void GSState::GIFRegHandlerCLAMP(const GIFReg* RESTRICT r)
{ {
GL_REG("CLAMP_%d = 0x%x_%x", i, r->U32[1], r->U32[0]); GL_REG("CLAMP_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
if (PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP) if ((PRIM->CTXT == i || m_primflush) && r->CLAMP != m_env.CTXT[i].CLAMP)
Flush(); Flush();
m_env.CTXT[i].CLAMP = (GSVector4i)r->CLAMP; m_env.CTXT[i].CLAMP = (GSVector4i)r->CLAMP;
@ -1094,7 +1120,7 @@ void GSState::GIFRegHandlerTEX1(const GIFReg* RESTRICT r)
{ {
GL_REG("TEX1_%d = 0x%x_%x", i, r->U32[1], r->U32[0]); GL_REG("TEX1_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
if (PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1) if ((PRIM->CTXT == i || m_primflush) && r->TEX1 != m_env.CTXT[i].TEX1)
Flush(); Flush();
m_env.CTXT[i].TEX1 = (GSVector4i)r->TEX1; m_env.CTXT[i].TEX1 = (GSVector4i)r->TEX1;
@ -1148,24 +1174,33 @@ void GSState::GIFRegHandlerPRMODE(const GIFReg* RESTRICT r)
{ {
GL_REG("PRMODE = 0x%x_%x", r->U32[1], r->U32[0]); GL_REG("PRMODE = 0x%x_%x", r->U32[1], r->U32[0]);
if (!m_env.PRMODECONT.AC) // We're in PRIM mode, need to ignore any writes
{ if (m_env.PRMODECONT.AC)
u32 prim_mask = 0x7f8;
if (GSConfig.UseHardwareRenderer() && GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GS_TRIANGLE_CLASS)
prim_mask &= ~0x80; // Mask out AA1.
if ((m_env.PRIM.U32[0] ^ r->PRMODE.U32[0]) & prim_mask)
Flush();
}
else
{
return; return;
}
const u32 _PRIM = m_env.PRIM.PRIM; const u32 _PRIM = m_env.PRIM.PRIM;
m_env.PRIM = (GSVector4i)r->PRMODE; m_env.PRIM = (GSVector4i)r->PRMODE;
m_env.PRIM.PRIM = _PRIM; m_env.PRIM.PRIM = _PRIM;
u32 prim_mask = 0x7ff;
// Same class of draw so we don't need to flush
if (GSUtil::GetPrimClass(m_last_prim.PRIM) == GSUtil::GetPrimClass(m_env.PRIM.PRIM))
prim_mask &= ~0x7;
if (GSConfig.UseHardwareRenderer() && GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GS_TRIANGLE_CLASS)
prim_mask &= ~0x80; // Mask out AA1.
m_primflush = false;
if ((m_last_prim.U32[0] ^ m_env.PRIM.U32[0]) & prim_mask)
m_primflush = true;
if (m_index.tail == 0)
{
m_primflush = false;
m_last_prim.U32[0] = m_env.PRIM.U32[0];
}
UpdateContext(); UpdateContext();
} }
@ -1194,7 +1229,7 @@ void GSState::GIFRegHandlerMIPTBP1(const GIFReg* RESTRICT r)
{ {
GL_REG("MIPTBP1_%d = 0x%x_%x", i, r->U32[1], r->U32[0]); GL_REG("MIPTBP1_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
if (PRIM->CTXT == i && r->MIPTBP1 != m_env.CTXT[i].MIPTBP1) if ((PRIM->CTXT == i || m_primflush) && r->MIPTBP1 != m_env.CTXT[i].MIPTBP1)
Flush(); Flush();
m_env.CTXT[i].MIPTBP1 = (GSVector4i)r->MIPTBP1; m_env.CTXT[i].MIPTBP1 = (GSVector4i)r->MIPTBP1;
@ -1205,7 +1240,7 @@ void GSState::GIFRegHandlerMIPTBP2(const GIFReg* RESTRICT r)
{ {
GL_REG("MIPTBP2_%d = 0x%x_%x", i, r->U32[1], r->U32[0]); GL_REG("MIPTBP2_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
if (PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2) if ((PRIM->CTXT == i || m_primflush) && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2)
Flush(); Flush();
m_env.CTXT[i].MIPTBP2 = (GSVector4i)r->MIPTBP2; m_env.CTXT[i].MIPTBP2 = (GSVector4i)r->MIPTBP2;
@ -1244,7 +1279,7 @@ void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* RESTRICT r)
template <int i> template <int i>
void GSState::GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r)
{ {
if (PRIM->CTXT == i && r->SCISSOR != m_env.CTXT[i].SCISSOR) if ((PRIM->CTXT == i || m_primflush) && r->SCISSOR != m_env.CTXT[i].SCISSOR)
Flush(); Flush();
m_env.CTXT[i].SCISSOR = (GSVector4i)r->SCISSOR; m_env.CTXT[i].SCISSOR = (GSVector4i)r->SCISSOR;
@ -1258,7 +1293,7 @@ template <int i>
void GSState::GIFRegHandlerALPHA(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerALPHA(const GIFReg* RESTRICT r)
{ {
GL_REG("ALPHA = 0x%x_%x", r->U32[1], r->U32[0]); GL_REG("ALPHA = 0x%x_%x", r->U32[1], r->U32[0]);
if (PRIM->CTXT == i && r->ALPHA != m_env.CTXT[i].ALPHA) if ((PRIM->CTXT == i || m_primflush) && r->ALPHA != m_env.CTXT[i].ALPHA)
Flush(); Flush();
m_env.CTXT[i].ALPHA = (GSVector4i)r->ALPHA; m_env.CTXT[i].ALPHA = (GSVector4i)r->ALPHA;
@ -1307,7 +1342,7 @@ void GSState::GIFRegHandlerCOLCLAMP(const GIFReg* RESTRICT r)
template <int i> template <int i>
void GSState::GIFRegHandlerTEST(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerTEST(const GIFReg* RESTRICT r)
{ {
if (PRIM->CTXT == i && r->TEST != m_env.CTXT[i].TEST) if ((PRIM->CTXT == i || m_primflush) && r->TEST != m_env.CTXT[i].TEST)
Flush(); Flush();
m_env.CTXT[i].TEST = (GSVector4i)r->TEST; m_env.CTXT[i].TEST = (GSVector4i)r->TEST;
@ -1324,7 +1359,7 @@ void GSState::GIFRegHandlerPABE(const GIFReg* RESTRICT r)
template <int i> template <int i>
void GSState::GIFRegHandlerFBA(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerFBA(const GIFReg* RESTRICT r)
{ {
if (PRIM->CTXT == i && r->FBA != m_env.CTXT[i].FBA) if ((PRIM->CTXT == i || m_primflush) && r->FBA != m_env.CTXT[i].FBA)
Flush(); Flush();
m_env.CTXT[i].FBA = (GSVector4i)r->FBA; m_env.CTXT[i].FBA = (GSVector4i)r->FBA;
@ -1340,7 +1375,7 @@ void GSState::GIFRegHandlerFRAME(const GIFReg* RESTRICT r)
// However there is some issues so even software mode is incorrect on PCSX2, but this works better.. // However there is some issues so even software mode is incorrect on PCSX2, but this works better..
NewFrame.FBW = std::clamp(NewFrame.FBW, 1U, 32U); NewFrame.FBW = std::clamp(NewFrame.FBW, 1U, 32U);
if (PRIM->CTXT == i && NewFrame != m_env.CTXT[i].FRAME) if ((PRIM->CTXT == i || m_primflush) && NewFrame != m_env.CTXT[i].FRAME)
Flush(); Flush();
if ((NewFrame.PSM & 0x30) == 0x30) if ((NewFrame.PSM & 0x30) == 0x30)
@ -1399,7 +1434,7 @@ void GSState::GIFRegHandlerZBUF(const GIFReg* RESTRICT r)
else else
ZBUF.PSM |= 0x30; ZBUF.PSM |= 0x30;
if (PRIM->CTXT == i && ZBUF != m_env.CTXT[i].ZBUF) if ((PRIM->CTXT == i || m_primflush) && ZBUF != m_env.CTXT[i].ZBUF)
Flush(); Flush();
if ((m_env.CTXT[i].ZBUF.U32[0] ^ ZBUF.U32[0]) & 0x3f0001ff) // ZBP PSM if ((m_env.CTXT[i].ZBUF.U32[0] ^ ZBUF.U32[0]) & 0x3f0001ff) // ZBP PSM
@ -1491,7 +1526,6 @@ void GSState::GIFRegHandlerHWREG(const GIFReg* RESTRICT r)
void GSState::Flush() void GSState::Flush()
{ {
FlushWrite(); FlushWrite();
FlushPrim(); FlushPrim();
} }
@ -1522,6 +1556,15 @@ void GSState::FlushWrite()
void GSState::FlushPrim() void GSState::FlushPrim()
{ {
const u32 new_prim = PRIM->U32[0];
if (m_primflush)
{
// We need to restore the old PRIM and update the Context (in case it changed)
m_env.PRIM.U32[0] = m_last_prim.U32[0];
UpdateContext();
}
if (m_index.tail > 0) if (m_index.tail > 0)
{ {
GL_REG("FlushPrim ctxt %d", PRIM->CTXT); GL_REG("FlushPrim ctxt %d", PRIM->CTXT);
@ -1554,10 +1597,12 @@ void GSState::FlushPrim()
case GS_LINELIST: case GS_LINELIST:
case GS_LINESTRIP: case GS_LINESTRIP:
case GS_SPRITE: case GS_SPRITE:
unused = 1;
buff[0] = m_vertex.buff[tail - 1];
case GS_TRIANGLELIST: case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP: case GS_TRIANGLESTRIP:
unused = tail - head; unused = std::min<size_t>(tail - head, 2);
memcpy(buff, &m_vertex.buff[head], sizeof(GSVertex) * unused); memcpy(buff, &m_vertex.buff[tail - unused], sizeof(GSVertex) * 2);
break; break;
case GS_TRIANGLEFAN: case GS_TRIANGLEFAN:
buff[0] = m_vertex.buff[head]; buff[0] = m_vertex.buff[head];
@ -1626,6 +1671,16 @@ void GSState::FlushPrim()
m_vertex.next = 0; m_vertex.next = 0;
} }
} }
if (m_primflush)
{
// Restore the backup
PRIM->U32[0] = new_prim;
UpdateContext();
}
m_primflush = false;
m_last_prim.U32[0] = new_prim;
} }
void GSState::Write(const u8* mem, int len) void GSState::Write(const u8* mem, int len)
@ -2730,9 +2785,6 @@ __forceinline void GSState::VertexKick(u32 skip)
break; break;
} }
if ((m_context->FRAME.FBMSK & GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk) != GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk)
m_mem.m_clut.Invalidate(m_context->FRAME.Block());
if (auto_flush && m_index.tail >= n) if (auto_flush && m_index.tail >= n)
HandleAutoFlush(); HandleAutoFlush();

View File

@ -51,6 +51,8 @@ class GSState : public GSAlignedClass<32>
GIFPackedRegHandler m_fpGIFPackedRegHandlers[16]; GIFPackedRegHandler m_fpGIFPackedRegHandlers[16];
GIFPackedRegHandler m_fpGIFPackedRegHandlerXYZ[8][4]; GIFPackedRegHandler m_fpGIFPackedRegHandlerXYZ[8][4];
void CheckFlushes();
void GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r);
@ -232,6 +234,8 @@ public:
bool m_NTSC_Saturation; bool m_NTSC_Saturation;
bool m_nativeres; bool m_nativeres;
bool m_mipmap; bool m_mipmap;
bool m_primflush;
GIFRegPRIM m_last_prim;
static int s_n; static int s_n;
bool s_dump; bool s_dump;