GS: Only autoflush on draws of same format

Move overlap check to State
This commit is contained in:
refractionpcsx2 2022-01-22 16:19:32 +00:00
parent c9216e5625
commit 369bda13f8
4 changed files with 119 additions and 116 deletions

View File

@ -2309,6 +2309,110 @@ void GSState::GrowVertexBuffer()
m_index.buff = index;
}
GSState::PRIM_OVERLAP GSState::PrimitiveOverlap()
{
// Either 1 triangle or 1 line or 3 POINTs
// It is bad for the POINTs but low probability that they overlap
if (m_vertex.next < 4)
return PRIM_OVERLAP_NO;
if (m_vt.m_primclass != GS_SPRITE_CLASS)
return PRIM_OVERLAP_UNKNOW; // maybe, maybe not
// Check intersection of sprite primitive only
const size_t count = m_vertex.next;
PRIM_OVERLAP overlap = PRIM_OVERLAP_NO;
const GSVertex* v = m_vertex.buff;
m_drawlist.clear();
size_t i = 0;
while (i < count)
{
// In order to speed up comparison a bounding-box is accumulated. It removes a
// loop so code is much faster (check game virtua fighter). Besides it allow to check
// properly the Y order.
// .x = min(v[i].XYZ.X, v[i+1].XYZ.X)
// .y = min(v[i].XYZ.Y, v[i+1].XYZ.Y)
// .z = max(v[i].XYZ.X, v[i+1].XYZ.X)
// .w = max(v[i].XYZ.Y, v[i+1].XYZ.Y)
GSVector4i all = GSVector4i(v[i].m[1]).upl16(GSVector4i(v[i + 1].m[1])).upl16().xzyw();
all = all.xyxy().blend(all.zwzw(), all > all.zwxy());
size_t j = i + 2;
while (j < count)
{
GSVector4i sprite = GSVector4i(v[j].m[1]).upl16(GSVector4i(v[j + 1].m[1])).upl16().xzyw();
sprite = sprite.xyxy().blend(sprite.zwzw(), sprite > sprite.zwxy());
// Be sure to get vertex in good order, otherwise .r* function doesn't
// work as expected.
ASSERT(sprite.x <= sprite.z);
ASSERT(sprite.y <= sprite.w);
ASSERT(all.x <= all.z);
ASSERT(all.y <= all.w);
if (all.rintersect(sprite).rempty())
{
all = all.runion_ordered(sprite);
}
else
{
overlap = PRIM_OVERLAP_YES;
break;
}
j += 2;
}
m_drawlist.push_back((j - i) >> 1); // Sprite count
i = j;
}
#if 0
// Old algo: less constraint but O(n^2) instead of O(n) as above
// You have no guarantee on the sprite order, first vertex can be either top-left or bottom-left
// There is a high probability that the draw call will uses same ordering for all vertices.
// In order to keep a small performance impact only the first sprite will be checked
//
// Some safe-guard will be added in the outer-loop to avoid corruption with a limited perf impact
if (v[1].XYZ.Y < v[0].XYZ.Y) {
// First vertex is Top-Left
for (size_t i = 0; i < count; i += 2) {
if (v[i + 1].XYZ.Y > v[i].XYZ.Y) {
return PRIM_OVERLAP_UNKNOW;
}
GSVector4i vi(v[i].XYZ.X, v[i + 1].XYZ.Y, v[i + 1].XYZ.X, v[i].XYZ.Y);
for (size_t j = i + 2; j < count; j += 2) {
GSVector4i vj(v[j].XYZ.X, v[j + 1].XYZ.Y, v[j + 1].XYZ.X, v[j].XYZ.Y);
GSVector4i inter = vi.rintersect(vj);
if (!inter.rempty()) {
return PRIM_OVERLAP_YES;
}
}
}
}
else {
// First vertex is Bottom-Left
for (size_t i = 0; i < count; i += 2) {
if (v[i + 1].XYZ.Y < v[i].XYZ.Y) {
return PRIM_OVERLAP_UNKNOW;
}
GSVector4i vi(v[i].XYZ.X, v[i].XYZ.Y, v[i + 1].XYZ.X, v[i + 1].XYZ.Y);
for (size_t j = i + 2; j < count; j += 2) {
GSVector4i vj(v[j].XYZ.X, v[j].XYZ.Y, v[j + 1].XYZ.X, v[j + 1].XYZ.Y);
GSVector4i inter = vi.rintersect(vj);
if (!inter.rempty()) {
return PRIM_OVERLAP_YES;
}
}
}
}
#endif
// fprintf(stderr, "%d: Yes, code can be optimized (draw of %d vertices)\n", s_n, count);
return overlap;
}
template <u32 prim, bool auto_flush, bool index_swap>
__forceinline void GSState::VertexKick(u32 skip)
{
@ -2549,8 +2653,10 @@ __forceinline void GSState::VertexKick(u32 skip)
{
m_mem.m_clut.Invalidate(m_context->FRAME.Block());
if (auto_flush && PRIM->TME && (m_context->FRAME.Block() == m_context->TEX0.TBP0))
if (auto_flush && PRIM->TME && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && (m_context->TEX0.PSM == m_context->FRAME.PSM))
{
FlushPrim();
}
}
}

View File

@ -231,6 +231,16 @@ public:
int s_savel;
std::string m_dump_root;
enum PRIM_OVERLAP
{
PRIM_OVERLAP_UNKNOW,
PRIM_OVERLAP_YES,
PRIM_OVERLAP_NO
};
PRIM_OVERLAP m_prim_overlap;
std::vector<size_t> m_drawlist;
public:
GSState();
virtual ~GSState();
@ -277,4 +287,6 @@ public:
void SetRegsMem(u8* basemem) { m_regs = reinterpret_cast<GSPrivRegSet*>(basemem); }
void SetFrameSkip(int skip);
PRIM_OVERLAP PrimitiveOverlap();
};

View File

@ -1058,109 +1058,6 @@ void GSRendererNew::EmulateTextureSampler(const GSTextureCache::Source* tex)
m_conf.pal = tex->m_palette;
}
GSRendererNew::PRIM_OVERLAP GSRendererNew::PrimitiveOverlap()
{
// Either 1 triangle or 1 line or 3 POINTs
// It is bad for the POINTs but low probability that they overlap
if (m_vertex.next < 4)
return PRIM_OVERLAP_NO;
if (m_vt.m_primclass != GS_SPRITE_CLASS)
return PRIM_OVERLAP_UNKNOW; // maybe, maybe not
// Check intersection of sprite primitive only
const size_t count = m_vertex.next;
PRIM_OVERLAP overlap = PRIM_OVERLAP_NO;
const GSVertex* v = m_vertex.buff;
m_drawlist.clear();
size_t i = 0;
while (i < count)
{
// In order to speed up comparison a bounding-box is accumulated. It removes a
// loop so code is much faster (check game virtua fighter). Besides it allow to check
// properly the Y order.
// .x = min(v[i].XYZ.X, v[i+1].XYZ.X)
// .y = min(v[i].XYZ.Y, v[i+1].XYZ.Y)
// .z = max(v[i].XYZ.X, v[i+1].XYZ.X)
// .w = max(v[i].XYZ.Y, v[i+1].XYZ.Y)
GSVector4i all = GSVector4i(v[i].m[1]).upl16(GSVector4i(v[i + 1].m[1])).upl16().xzyw();
all = all.xyxy().blend(all.zwzw(), all > all.zwxy());
size_t j = i + 2;
while (j < count)
{
GSVector4i sprite = GSVector4i(v[j].m[1]).upl16(GSVector4i(v[j + 1].m[1])).upl16().xzyw();
sprite = sprite.xyxy().blend(sprite.zwzw(), sprite > sprite.zwxy());
// Be sure to get vertex in good order, otherwise .r* function doesn't
// work as expected.
ASSERT(sprite.x <= sprite.z);
ASSERT(sprite.y <= sprite.w);
ASSERT(all.x <= all.z);
ASSERT(all.y <= all.w);
if (all.rintersect(sprite).rempty())
{
all = all.runion_ordered(sprite);
}
else
{
overlap = PRIM_OVERLAP_YES;
break;
}
j += 2;
}
m_drawlist.push_back((j - i) >> 1); // Sprite count
i = j;
}
#if 0
// Old algo: less constraint but O(n^2) instead of O(n) as above
// You have no guarantee on the sprite order, first vertex can be either top-left or bottom-left
// There is a high probability that the draw call will uses same ordering for all vertices.
// In order to keep a small performance impact only the first sprite will be checked
//
// Some safe-guard will be added in the outer-loop to avoid corruption with a limited perf impact
if (v[1].XYZ.Y < v[0].XYZ.Y) {
// First vertex is Top-Left
for(size_t i = 0; i < count; i += 2) {
if (v[i+1].XYZ.Y > v[i].XYZ.Y) {
return PRIM_OVERLAP_UNKNOW;
}
GSVector4i vi(v[i].XYZ.X, v[i+1].XYZ.Y, v[i+1].XYZ.X, v[i].XYZ.Y);
for (size_t j = i+2; j < count; j += 2) {
GSVector4i vj(v[j].XYZ.X, v[j+1].XYZ.Y, v[j+1].XYZ.X, v[j].XYZ.Y);
GSVector4i inter = vi.rintersect(vj);
if (!inter.rempty()) {
return PRIM_OVERLAP_YES;
}
}
}
} else {
// First vertex is Bottom-Left
for(size_t i = 0; i < count; i += 2) {
if (v[i+1].XYZ.Y < v[i].XYZ.Y) {
return PRIM_OVERLAP_UNKNOW;
}
GSVector4i vi(v[i].XYZ.X, v[i].XYZ.Y, v[i+1].XYZ.X, v[i+1].XYZ.Y);
for (size_t j = i+2; j < count; j += 2) {
GSVector4i vj(v[j].XYZ.X, v[j].XYZ.Y, v[j+1].XYZ.X, v[j+1].XYZ.Y);
GSVector4i inter = vi.rintersect(vj);
if (!inter.rempty()) {
return PRIM_OVERLAP_YES;
}
}
}
}
#endif
// fprintf(stderr, "%d: Yes, code can be optimized (draw of %d vertices)\n", s_n, count);
return overlap;
}
void GSRendererNew::EmulateATST(float& AREF, GSHWDrawConfig::PSSelector& ps, bool pass_2)
{
static const u32 inverted_atst[] = {ATST_ALWAYS, ATST_NEVER, ATST_GEQUAL, ATST_GREATER, ATST_NOTEQUAL, ATST_LESS, ATST_LEQUAL, ATST_EQUAL};

View File

@ -20,17 +20,7 @@
class GSRendererNew final : public GSRendererHW
{
enum PRIM_OVERLAP
{
PRIM_OVERLAP_UNKNOW,
PRIM_OVERLAP_YES,
PRIM_OVERLAP_NO
};
private:
PRIM_OVERLAP m_prim_overlap;
std::vector<size_t> m_drawlist;
GSHWDrawConfig m_conf;
private:
@ -49,7 +39,5 @@ public:
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) override;
PRIM_OVERLAP PrimitiveOverlap();
bool IsDummyTexture() const override;
};