From bd8fcc8f81242c8916a41509f76b6626b4720f50 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Fri, 20 Nov 2020 23:07:17 -0600 Subject: [PATCH] GS: Remove inaccurate stq calculations from GSVertexTrace They were the same speed or slower than full div on IvyBridge+ and Bulldozer+ --- pcsx2/GS/Renderers/Common/GSVertexTrace.cpp | 15 +++++---------- pcsx2/GS/Renderers/Common/GSVertexTrace.h | 4 ++-- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/pcsx2/GS/Renderers/Common/GSVertexTrace.cpp b/pcsx2/GS/Renderers/Common/GSVertexTrace.cpp index 744c2d2fea..7d91e10c1f 100644 --- a/pcsx2/GS/Renderers/Common/GSVertexTrace.cpp +++ b/pcsx2/GS/Renderers/Common/GSVertexTrace.cpp @@ -27,8 +27,7 @@ GSVertexTrace::GSVertexTrace(const GSState* state) memset(&m_alpha, 0, sizeof(m_alpha)); #define InitUpdate3(P, IIP, TME, FST, COLOR) \ - m_fmm[0][COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax; \ - m_fmm[1][COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax; \ + m_fmm[COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax; #define InitUpdate2(P, IIP, TME) \ InitUpdate3(P, IIP, TME, 0, 0) \ @@ -57,7 +56,7 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count, uint32 fst = m_state->PRIM->FST; uint32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC); - (this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count); + (this->*m_fmm[color][fst][tme][iip][primclass])(vertex, index, i_count); // Potential float overflow detected. Better uses the slower division instead // Note: If Q is too big, 1/Q will end up as 0. 1e30 is a random number @@ -66,7 +65,6 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count, { fprintf(stderr, "Vertex Trace: float overflow detected ! min %e max %e\n", m_min.t.z, m_max.t.z); m_accurate_stq = true; - (this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count); } m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); @@ -150,7 +148,7 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count, } } -template +template void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count) { const GSDrawingContext* context = m_state->m_context; @@ -209,7 +207,7 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun GSVector4 stq0 = GSVector4::cast(GSVector4i(v0.m[0])); GSVector4 stq1 = GSVector4::cast(GSVector4i(v1.m[0])); - GSVector4 st, q; + GSVector4 q; // Sprites always have indices == vertices, so we don't have to look at the index table here if (primclass == GS_SPRITE_CLASS) q = stq1.wwww(); @@ -220,10 +218,7 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun // make sure to remove the z (rgba) field as it's often denormal. // Then, use GSVector4::noopt() to prevent clang from optimizing out your "useless" shuffle // e.g. stq = (stq.xyww() / stq.wwww()).noopt().xyww(stq); - if (accurate_stq) - st = stq0.xyxy(stq1) / q; - else - st = stq0.xyxy(stq1) * q.rcpnr(); + GSVector4 st = stq0.xyxy(stq1) / q; stq0 = st.xyww(primclass == GS_SPRITE_CLASS ? stq1 : stq0); stq1 = st.zwww(stq1); diff --git a/pcsx2/GS/Renderers/Common/GSVertexTrace.h b/pcsx2/GS/Renderers/Common/GSVertexTrace.h index ba86ceb9c9..41c7e11e53 100644 --- a/pcsx2/GS/Renderers/Common/GSVertexTrace.h +++ b/pcsx2/GS/Renderers/Common/GSVertexTrace.h @@ -47,9 +47,9 @@ protected: typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count); - FindMinMaxPtr m_fmm[2][2][2][2][2][4]; + FindMinMaxPtr m_fmm[2][2][2][2][4]; - template + template void FindMinMax(const void* vertex, const uint32* index, int count); public: