mirror of https://github.com/PCSX2/pcsx2.git
GS: Only autoflush on draws of same format
Move overlap check to State
This commit is contained in:
parent
c9216e5625
commit
369bda13f8
|
@ -2309,6 +2309,110 @@ void GSState::GrowVertexBuffer()
|
|||
m_index.buff = index;
|
||||
}
|
||||
|
||||
GSState::PRIM_OVERLAP GSState::PrimitiveOverlap()
|
||||
{
|
||||
// Either 1 triangle or 1 line or 3 POINTs
|
||||
// It is bad for the POINTs but low probability that they overlap
|
||||
if (m_vertex.next < 4)
|
||||
return PRIM_OVERLAP_NO;
|
||||
|
||||
if (m_vt.m_primclass != GS_SPRITE_CLASS)
|
||||
return PRIM_OVERLAP_UNKNOW; // maybe, maybe not
|
||||
|
||||
// Check intersection of sprite primitive only
|
||||
const size_t count = m_vertex.next;
|
||||
PRIM_OVERLAP overlap = PRIM_OVERLAP_NO;
|
||||
const GSVertex* v = m_vertex.buff;
|
||||
|
||||
m_drawlist.clear();
|
||||
size_t i = 0;
|
||||
while (i < count)
|
||||
{
|
||||
// In order to speed up comparison a bounding-box is accumulated. It removes a
|
||||
// loop so code is much faster (check game virtua fighter). Besides it allow to check
|
||||
// properly the Y order.
|
||||
|
||||
// .x = min(v[i].XYZ.X, v[i+1].XYZ.X)
|
||||
// .y = min(v[i].XYZ.Y, v[i+1].XYZ.Y)
|
||||
// .z = max(v[i].XYZ.X, v[i+1].XYZ.X)
|
||||
// .w = max(v[i].XYZ.Y, v[i+1].XYZ.Y)
|
||||
GSVector4i all = GSVector4i(v[i].m[1]).upl16(GSVector4i(v[i + 1].m[1])).upl16().xzyw();
|
||||
all = all.xyxy().blend(all.zwzw(), all > all.zwxy());
|
||||
|
||||
size_t j = i + 2;
|
||||
while (j < count)
|
||||
{
|
||||
GSVector4i sprite = GSVector4i(v[j].m[1]).upl16(GSVector4i(v[j + 1].m[1])).upl16().xzyw();
|
||||
sprite = sprite.xyxy().blend(sprite.zwzw(), sprite > sprite.zwxy());
|
||||
|
||||
// Be sure to get vertex in good order, otherwise .r* function doesn't
|
||||
// work as expected.
|
||||
ASSERT(sprite.x <= sprite.z);
|
||||
ASSERT(sprite.y <= sprite.w);
|
||||
ASSERT(all.x <= all.z);
|
||||
ASSERT(all.y <= all.w);
|
||||
|
||||
if (all.rintersect(sprite).rempty())
|
||||
{
|
||||
all = all.runion_ordered(sprite);
|
||||
}
|
||||
else
|
||||
{
|
||||
overlap = PRIM_OVERLAP_YES;
|
||||
break;
|
||||
}
|
||||
j += 2;
|
||||
}
|
||||
m_drawlist.push_back((j - i) >> 1); // Sprite count
|
||||
i = j;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Old algo: less constraint but O(n^2) instead of O(n) as above
|
||||
|
||||
// You have no guarantee on the sprite order, first vertex can be either top-left or bottom-left
|
||||
// There is a high probability that the draw call will uses same ordering for all vertices.
|
||||
// In order to keep a small performance impact only the first sprite will be checked
|
||||
//
|
||||
// Some safe-guard will be added in the outer-loop to avoid corruption with a limited perf impact
|
||||
if (v[1].XYZ.Y < v[0].XYZ.Y) {
|
||||
// First vertex is Top-Left
|
||||
for (size_t i = 0; i < count; i += 2) {
|
||||
if (v[i + 1].XYZ.Y > v[i].XYZ.Y) {
|
||||
return PRIM_OVERLAP_UNKNOW;
|
||||
}
|
||||
GSVector4i vi(v[i].XYZ.X, v[i + 1].XYZ.Y, v[i + 1].XYZ.X, v[i].XYZ.Y);
|
||||
for (size_t j = i + 2; j < count; j += 2) {
|
||||
GSVector4i vj(v[j].XYZ.X, v[j + 1].XYZ.Y, v[j + 1].XYZ.X, v[j].XYZ.Y);
|
||||
GSVector4i inter = vi.rintersect(vj);
|
||||
if (!inter.rempty()) {
|
||||
return PRIM_OVERLAP_YES;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// First vertex is Bottom-Left
|
||||
for (size_t i = 0; i < count; i += 2) {
|
||||
if (v[i + 1].XYZ.Y < v[i].XYZ.Y) {
|
||||
return PRIM_OVERLAP_UNKNOW;
|
||||
}
|
||||
GSVector4i vi(v[i].XYZ.X, v[i].XYZ.Y, v[i + 1].XYZ.X, v[i + 1].XYZ.Y);
|
||||
for (size_t j = i + 2; j < count; j += 2) {
|
||||
GSVector4i vj(v[j].XYZ.X, v[j].XYZ.Y, v[j + 1].XYZ.X, v[j + 1].XYZ.Y);
|
||||
GSVector4i inter = vi.rintersect(vj);
|
||||
if (!inter.rempty()) {
|
||||
return PRIM_OVERLAP_YES;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// fprintf(stderr, "%d: Yes, code can be optimized (draw of %d vertices)\n", s_n, count);
|
||||
return overlap;
|
||||
}
|
||||
|
||||
template <u32 prim, bool auto_flush, bool index_swap>
|
||||
__forceinline void GSState::VertexKick(u32 skip)
|
||||
{
|
||||
|
@ -2549,8 +2653,10 @@ __forceinline void GSState::VertexKick(u32 skip)
|
|||
{
|
||||
m_mem.m_clut.Invalidate(m_context->FRAME.Block());
|
||||
|
||||
if (auto_flush && PRIM->TME && (m_context->FRAME.Block() == m_context->TEX0.TBP0))
|
||||
if (auto_flush && PRIM->TME && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && (m_context->TEX0.PSM == m_context->FRAME.PSM))
|
||||
{
|
||||
FlushPrim();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -231,6 +231,16 @@ public:
|
|||
int s_savel;
|
||||
std::string m_dump_root;
|
||||
|
||||
enum PRIM_OVERLAP
|
||||
{
|
||||
PRIM_OVERLAP_UNKNOW,
|
||||
PRIM_OVERLAP_YES,
|
||||
PRIM_OVERLAP_NO
|
||||
};
|
||||
|
||||
PRIM_OVERLAP m_prim_overlap;
|
||||
std::vector<size_t> m_drawlist;
|
||||
|
||||
public:
|
||||
GSState();
|
||||
virtual ~GSState();
|
||||
|
@ -277,4 +287,6 @@ public:
|
|||
void SetRegsMem(u8* basemem) { m_regs = reinterpret_cast<GSPrivRegSet*>(basemem); }
|
||||
|
||||
void SetFrameSkip(int skip);
|
||||
|
||||
PRIM_OVERLAP PrimitiveOverlap();
|
||||
};
|
||||
|
|
|
@ -1058,109 +1058,6 @@ void GSRendererNew::EmulateTextureSampler(const GSTextureCache::Source* tex)
|
|||
m_conf.pal = tex->m_palette;
|
||||
}
|
||||
|
||||
GSRendererNew::PRIM_OVERLAP GSRendererNew::PrimitiveOverlap()
|
||||
{
|
||||
// Either 1 triangle or 1 line or 3 POINTs
|
||||
// It is bad for the POINTs but low probability that they overlap
|
||||
if (m_vertex.next < 4)
|
||||
return PRIM_OVERLAP_NO;
|
||||
|
||||
if (m_vt.m_primclass != GS_SPRITE_CLASS)
|
||||
return PRIM_OVERLAP_UNKNOW; // maybe, maybe not
|
||||
|
||||
// Check intersection of sprite primitive only
|
||||
const size_t count = m_vertex.next;
|
||||
PRIM_OVERLAP overlap = PRIM_OVERLAP_NO;
|
||||
const GSVertex* v = m_vertex.buff;
|
||||
|
||||
m_drawlist.clear();
|
||||
size_t i = 0;
|
||||
while (i < count)
|
||||
{
|
||||
// In order to speed up comparison a bounding-box is accumulated. It removes a
|
||||
// loop so code is much faster (check game virtua fighter). Besides it allow to check
|
||||
// properly the Y order.
|
||||
|
||||
// .x = min(v[i].XYZ.X, v[i+1].XYZ.X)
|
||||
// .y = min(v[i].XYZ.Y, v[i+1].XYZ.Y)
|
||||
// .z = max(v[i].XYZ.X, v[i+1].XYZ.X)
|
||||
// .w = max(v[i].XYZ.Y, v[i+1].XYZ.Y)
|
||||
GSVector4i all = GSVector4i(v[i].m[1]).upl16(GSVector4i(v[i + 1].m[1])).upl16().xzyw();
|
||||
all = all.xyxy().blend(all.zwzw(), all > all.zwxy());
|
||||
|
||||
size_t j = i + 2;
|
||||
while (j < count)
|
||||
{
|
||||
GSVector4i sprite = GSVector4i(v[j].m[1]).upl16(GSVector4i(v[j + 1].m[1])).upl16().xzyw();
|
||||
sprite = sprite.xyxy().blend(sprite.zwzw(), sprite > sprite.zwxy());
|
||||
|
||||
// Be sure to get vertex in good order, otherwise .r* function doesn't
|
||||
// work as expected.
|
||||
ASSERT(sprite.x <= sprite.z);
|
||||
ASSERT(sprite.y <= sprite.w);
|
||||
ASSERT(all.x <= all.z);
|
||||
ASSERT(all.y <= all.w);
|
||||
|
||||
if (all.rintersect(sprite).rempty())
|
||||
{
|
||||
all = all.runion_ordered(sprite);
|
||||
}
|
||||
else
|
||||
{
|
||||
overlap = PRIM_OVERLAP_YES;
|
||||
break;
|
||||
}
|
||||
j += 2;
|
||||
}
|
||||
m_drawlist.push_back((j - i) >> 1); // Sprite count
|
||||
i = j;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Old algo: less constraint but O(n^2) instead of O(n) as above
|
||||
|
||||
// You have no guarantee on the sprite order, first vertex can be either top-left or bottom-left
|
||||
// There is a high probability that the draw call will uses same ordering for all vertices.
|
||||
// In order to keep a small performance impact only the first sprite will be checked
|
||||
//
|
||||
// Some safe-guard will be added in the outer-loop to avoid corruption with a limited perf impact
|
||||
if (v[1].XYZ.Y < v[0].XYZ.Y) {
|
||||
// First vertex is Top-Left
|
||||
for(size_t i = 0; i < count; i += 2) {
|
||||
if (v[i+1].XYZ.Y > v[i].XYZ.Y) {
|
||||
return PRIM_OVERLAP_UNKNOW;
|
||||
}
|
||||
GSVector4i vi(v[i].XYZ.X, v[i+1].XYZ.Y, v[i+1].XYZ.X, v[i].XYZ.Y);
|
||||
for (size_t j = i+2; j < count; j += 2) {
|
||||
GSVector4i vj(v[j].XYZ.X, v[j+1].XYZ.Y, v[j+1].XYZ.X, v[j].XYZ.Y);
|
||||
GSVector4i inter = vi.rintersect(vj);
|
||||
if (!inter.rempty()) {
|
||||
return PRIM_OVERLAP_YES;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// First vertex is Bottom-Left
|
||||
for(size_t i = 0; i < count; i += 2) {
|
||||
if (v[i+1].XYZ.Y < v[i].XYZ.Y) {
|
||||
return PRIM_OVERLAP_UNKNOW;
|
||||
}
|
||||
GSVector4i vi(v[i].XYZ.X, v[i].XYZ.Y, v[i+1].XYZ.X, v[i+1].XYZ.Y);
|
||||
for (size_t j = i+2; j < count; j += 2) {
|
||||
GSVector4i vj(v[j].XYZ.X, v[j].XYZ.Y, v[j+1].XYZ.X, v[j+1].XYZ.Y);
|
||||
GSVector4i inter = vi.rintersect(vj);
|
||||
if (!inter.rempty()) {
|
||||
return PRIM_OVERLAP_YES;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// fprintf(stderr, "%d: Yes, code can be optimized (draw of %d vertices)\n", s_n, count);
|
||||
return overlap;
|
||||
}
|
||||
|
||||
void GSRendererNew::EmulateATST(float& AREF, GSHWDrawConfig::PSSelector& ps, bool pass_2)
|
||||
{
|
||||
static const u32 inverted_atst[] = {ATST_ALWAYS, ATST_NEVER, ATST_GEQUAL, ATST_GREATER, ATST_NOTEQUAL, ATST_LESS, ATST_LEQUAL, ATST_EQUAL};
|
||||
|
|
|
@ -20,17 +20,7 @@
|
|||
|
||||
class GSRendererNew final : public GSRendererHW
|
||||
{
|
||||
enum PRIM_OVERLAP
|
||||
{
|
||||
PRIM_OVERLAP_UNKNOW,
|
||||
PRIM_OVERLAP_YES,
|
||||
PRIM_OVERLAP_NO
|
||||
};
|
||||
|
||||
private:
|
||||
PRIM_OVERLAP m_prim_overlap;
|
||||
std::vector<size_t> m_drawlist;
|
||||
|
||||
GSHWDrawConfig m_conf;
|
||||
|
||||
private:
|
||||
|
@ -49,7 +39,5 @@ public:
|
|||
|
||||
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) override;
|
||||
|
||||
PRIM_OVERLAP PrimitiveOverlap();
|
||||
|
||||
bool IsDummyTexture() const override;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue