GS: Remove inaccurate stq calculations from GSVertexTrace

They were the same speed or slower than full div on IvyBridge+ and Bulldozer+
This commit is contained in:
TellowKrinkle 2020-11-20 23:07:17 -06:00 committed by refractionpcsx2
parent 5d33165fa5
commit bd8fcc8f81
2 changed files with 7 additions and 12 deletions

View File

@ -27,8 +27,7 @@ GSVertexTrace::GSVertexTrace(const GSState* state)
memset(&m_alpha, 0, sizeof(m_alpha));
#define InitUpdate3(P, IIP, TME, FST, COLOR) \
m_fmm[0][COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR, 0>; \
m_fmm[1][COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR, 1>; \
m_fmm[COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR>;
#define InitUpdate2(P, IIP, TME) \
InitUpdate3(P, IIP, TME, 0, 0) \
@ -57,7 +56,7 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
uint32 fst = m_state->PRIM->FST;
uint32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC);
(this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count);
(this->*m_fmm[color][fst][tme][iip][primclass])(vertex, index, i_count);
// Potential float overflow detected. Better uses the slower division instead
// Note: If Q is too big, 1/Q will end up as 0. 1e30 is a random number
@ -66,7 +65,6 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
{
fprintf(stderr, "Vertex Trace: float overflow detected ! min %e max %e\n", m_min.t.z, m_max.t.z);
m_accurate_stq = true;
(this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count);
}
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
@ -150,7 +148,7 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
}
}
template <GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color, uint32 accurate_stq>
template <GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count)
{
const GSDrawingContext* context = m_state->m_context;
@ -209,7 +207,7 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4 stq0 = GSVector4::cast(GSVector4i(v0.m[0]));
GSVector4 stq1 = GSVector4::cast(GSVector4i(v1.m[0]));
GSVector4 st, q;
GSVector4 q;
// Sprites always have indices == vertices, so we don't have to look at the index table here
if (primclass == GS_SPRITE_CLASS)
q = stq1.wwww();
@ -220,10 +218,7 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
// make sure to remove the z (rgba) field as it's often denormal.
// Then, use GSVector4::noopt() to prevent clang from optimizing out your "useless" shuffle
// e.g. stq = (stq.xyww() / stq.wwww()).noopt().xyww(stq);
if (accurate_stq)
st = stq0.xyxy(stq1) / q;
else
st = stq0.xyxy(stq1) * q.rcpnr();
GSVector4 st = stq0.xyxy(stq1) / q;
stq0 = st.xyww(primclass == GS_SPRITE_CLASS ? stq1 : stq0);
stq1 = st.zwww(stq1);

View File

@ -47,9 +47,9 @@ protected:
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
FindMinMaxPtr m_fmm[2][2][2][2][2][4];
FindMinMaxPtr m_fmm[2][2][2][2][4];
template <GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color, uint32 accurate_stq>
template <GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
void FindMinMax(const void* vertex, const uint32* index, int count);
public: