GS: Improve autoflush detection.

Old way didn't really work with triangles, especially triangle fans.  Some games will be slower, others faster.
This commit is contained in:
refractionpcsx2 2022-08-12 19:19:02 +01:00
parent a2e6a8e41f
commit c11030dde5
1 changed files with 90 additions and 43 deletions

View File

@ -2875,32 +2875,38 @@ __forceinline void GSState::HandleAutoFlush()
// So we need to calculate if a page boundary is being crossed for the format it is in and if the same part of the texture being written and read inside the draw.
if (PRIM->TME && (frame_hit || zbuf_hit) && GSUtil::HasSharedBits(frame_z_bp, frame_z_psm, m_context->TEX0.TBP0, m_context->TEX0.PSM))
{
size_t n = 1;
int n = 1;
size_t buff[3];
const size_t head = m_vertex.head;
const size_t tail = m_vertex.tail;
switch (GSUtil::GetPrimClass(PRIM->PRIM))
switch (PRIM->PRIM)
{
case GS_POINT_CLASS:
case GS_INVALID_CLASS:
case GS_POINTLIST:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
buff[0] = tail - 1;
n = 2;
break;
case GS_TRIANGLE_CLASS:
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
buff[0] = tail - 2;
buff[1] = tail - 1;
n = 3;
break;
case GS_TRIANGLEFAN:
buff[0] = head;
buff[1] = tail - 1;
n = 3;
break;
case GS_INVALID:
default:
break;
}
const int current_tex_end = (int)(m_vertex.tail - (m_vertex.tail % n)) - 1;
// Make sure including the new vert we have the whole rect to be drawn
// Example being a sprite which is 2 verts
// Tail = 3 (aka we have one sprite and 1 vert already saved, plus the incoming one)
// current_tex_end will be 1 ((3 - (3 % 2 == 1) == 2) - 1), meaning 3-1 = 2, so we have enough data for the full rect.
if (((m_vertex.tail - current_tex_end) < n) && PRIM->PRIM != GS_POINTLIST)
return;
const int page_mask_x = ~(GSLocalMemory::m_psm[m_context->TEX0.PSM].pgs.x - 1);
const int page_mask_y = ~(GSLocalMemory::m_psm[m_context->TEX0.PSM].pgs.y - 1);
const GSVector4i page_mask = { page_mask_x, page_mask_y, page_mask_x, page_mask_y };
@ -2914,16 +2920,19 @@ __forceinline void GSState::HandleAutoFlush()
}
else
{
tex_coord.x = (int)((1 << m_context->TEX0.TW) * (m_v.ST.S / m_v.RGBAQ.Q));
tex_coord.y = (int)((1 << m_context->TEX0.TH) * (m_v.ST.T / m_v.RGBAQ.Q));
const float s = std::min((m_v.ST.S / m_v.RGBAQ.Q), 1.0f);
const float t = std::min((m_v.ST.T / m_v.RGBAQ.Q), 1.0f);
tex_coord.x = (int)((1 << m_context->TEX0.TW) * s);
tex_coord.y = (int)((1 << m_context->TEX0.TH) * t);
}
GSVector4i tex_rect = tex_coord.xyxy();
// Get the rest of the rect.
for (int i = m_vertex.tail - 1; i > current_tex_end; i--)
for (int i = 0; i < (n - 1); i++)
{
const GSVertex* v = &m_vertex.buff[i];
const GSVertex* v = &m_vertex.buff[buff[i]];
if (PRIM->FST)
{
@ -2932,8 +2941,11 @@ __forceinline void GSState::HandleAutoFlush()
}
else
{
tex_coord.x = (int)((1 << m_context->TEX0.TW) * (v->ST.S / v->RGBAQ.Q));
tex_coord.y = (int)((1 << m_context->TEX0.TH) * (v->ST.T / v->RGBAQ.Q));
const float s = std::min((v->ST.S / v->RGBAQ.Q), 1.0f);
const float t = std::min((v->ST.T / v->RGBAQ.Q), 1.0f);
tex_coord.x = (int)((1 << m_context->TEX0.TW) * s);
tex_coord.y = (int)((1 << m_context->TEX0.TH) * t);
}
tex_rect.x = std::min(tex_rect.x, tex_coord.x);
@ -2952,34 +2964,69 @@ __forceinline void GSState::HandleAutoFlush()
}
else
{
tex_coord.x = (int)((1 << m_context->TEX0.TW) * (v->ST.S / v->RGBAQ.Q));
tex_coord.y = (int)((1 << m_context->TEX0.TH) * (v->ST.T / v->RGBAQ.Q));
const float s = std::min((v->ST.S / v->RGBAQ.Q), 1.0f);
const float t = std::min((v->ST.T / v->RGBAQ.Q), 1.0f);
tex_coord.x = (int)((1 << m_context->TEX0.TW) * s);
tex_coord.y = (int)((1 << m_context->TEX0.TH) * t);
}
const GSVector4i pages = tex_rect & page_mask;
const GSVector4i last_tex_page = tex_coord.xyxy() & page_mask;
const GSVector4i tex_page = tex_rect.xyxy() & page_mask;
tex_coord = tex_coord & page_mask;
bool page_crossed = false;
if (!pages.xyzw().eq(tex_coord.xyxy()))
page_crossed = true;
if(page_crossed)
// Crossed page since last draw end
if(!tex_page.eq(last_tex_page))
{
// Make sure the format matches, otherwise the coordinates aren't gonna match, so the draws won't intersect.
if (GSUtil::HasCompatibleBits(m_context->TEX0.PSM, frame_z_psm) && (m_context->FRAME.FBW == m_context->TEX0.TBW))
{
// Update the vertex trace, scissor it (important for Jak 3!) and intersect with the current texture.
m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail - m_vertex.head, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
// If the draw was 1 line thick, make it larger as rects are exclusive of ends.
if (tex_rect.x == tex_rect.z)
tex_rect += GSVector4i(0, 0, 1, 0);
if (tex_rect.y == tex_rect.w)
tex_rect += GSVector4i(0, 0, 0, 1);
// Intersect goes on space inside the rect
GSVector4i area_out = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p));
// Scissor output
area_out = area_out.rintersect(GSVector4i(m_context->scissor.in));
// Intersect with texture
if (!area_out.rintersect(tex_rect).rempty())
Flush();
const GSVector2i offset = GSVector2i(m_context->XYOFFSET.OFX, m_context->XYOFFSET.OFY);
const GSVector4i scissor = GSVector4i(m_context->scissor.in);
GSVector4i old_tex_rect = GSVector4i(0, 0, 0, 0);
int current_draw_end = m_index.tail;
while (current_draw_end >= n)
{
for (int i = current_draw_end - 1; i >= current_draw_end - n; i--)
{
const GSVertex* v = &m_vertex.buff[m_index.buff[i]];
tex_coord.x = (static_cast<int>(v->XYZ.X) - offset.x) >> 4;
tex_coord.y = (static_cast<int>(v->XYZ.Y) - offset.y) >> 4;
if (i == (current_draw_end - 1))
{
old_tex_rect = tex_coord.xyxy();
}
else
{
old_tex_rect.x = std::min(old_tex_rect.x, tex_coord.x);
old_tex_rect.z = std::max(old_tex_rect.z, tex_coord.x);
old_tex_rect.y = std::min(old_tex_rect.y, tex_coord.y);
old_tex_rect.w = std::max(old_tex_rect.w, tex_coord.y);
}
}
if (old_tex_rect.x == old_tex_rect.z)
old_tex_rect += GSVector4i(0, 0, 1, 0);
if (old_tex_rect.y == old_tex_rect.w)
old_tex_rect += GSVector4i(0, 0, 0, 1);
old_tex_rect = tex_rect.rintersect(old_tex_rect);
if (!old_tex_rect.rintersect(scissor).rempty())
{
Flush();
return;
}
current_draw_end -= n;
}
}
else // Storage of the TEX and FRAME/Z is different, so uhh, just fall back to flushing each page. It's slower, sorry.
{
@ -3034,7 +3081,7 @@ __forceinline void GSState::VertexKick(u32 skip)
ASSERT(m_vertex.tail < m_vertex.maxcount + 3);
if (auto_flush && m_index.tail >= n && !skip)
if (auto_flush && m_index.tail >= 0 && ((m_vertex.tail + 1) - m_vertex.head) >= n)
{
HandleAutoFlush();
}