mirror of https://github.com/PCSX2/pcsx2.git
GS: Remove inaccurate stq calculations from GSVertexTrace
They were the same speed or slower than full div on IvyBridge+ and Bulldozer+
This commit is contained in:
parent
5d33165fa5
commit
bd8fcc8f81
|
@ -27,8 +27,7 @@ GSVertexTrace::GSVertexTrace(const GSState* state)
|
|||
memset(&m_alpha, 0, sizeof(m_alpha));
|
||||
|
||||
#define InitUpdate3(P, IIP, TME, FST, COLOR) \
|
||||
m_fmm[0][COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR, 0>; \
|
||||
m_fmm[1][COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR, 1>; \
|
||||
m_fmm[COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR>;
|
||||
|
||||
#define InitUpdate2(P, IIP, TME) \
|
||||
InitUpdate3(P, IIP, TME, 0, 0) \
|
||||
|
@ -57,7 +56,7 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
|
|||
uint32 fst = m_state->PRIM->FST;
|
||||
uint32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC);
|
||||
|
||||
(this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count);
|
||||
(this->*m_fmm[color][fst][tme][iip][primclass])(vertex, index, i_count);
|
||||
|
||||
// Potential float overflow detected. Better uses the slower division instead
|
||||
// Note: If Q is too big, 1/Q will end up as 0. 1e30 is a random number
|
||||
|
@ -66,7 +65,6 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
|
|||
{
|
||||
fprintf(stderr, "Vertex Trace: float overflow detected ! min %e max %e\n", m_min.t.z, m_max.t.z);
|
||||
m_accurate_stq = true;
|
||||
(this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count);
|
||||
}
|
||||
|
||||
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
|
||||
|
@ -150,7 +148,7 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
|
|||
}
|
||||
}
|
||||
|
||||
template <GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color, uint32 accurate_stq>
|
||||
template <GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
|
||||
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count)
|
||||
{
|
||||
const GSDrawingContext* context = m_state->m_context;
|
||||
|
@ -209,7 +207,7 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
|
|||
GSVector4 stq0 = GSVector4::cast(GSVector4i(v0.m[0]));
|
||||
GSVector4 stq1 = GSVector4::cast(GSVector4i(v1.m[0]));
|
||||
|
||||
GSVector4 st, q;
|
||||
GSVector4 q;
|
||||
// Sprites always have indices == vertices, so we don't have to look at the index table here
|
||||
if (primclass == GS_SPRITE_CLASS)
|
||||
q = stq1.wwww();
|
||||
|
@ -220,10 +218,7 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
|
|||
// make sure to remove the z (rgba) field as it's often denormal.
|
||||
// Then, use GSVector4::noopt() to prevent clang from optimizing out your "useless" shuffle
|
||||
// e.g. stq = (stq.xyww() / stq.wwww()).noopt().xyww(stq);
|
||||
if (accurate_stq)
|
||||
st = stq0.xyxy(stq1) / q;
|
||||
else
|
||||
st = stq0.xyxy(stq1) * q.rcpnr();
|
||||
GSVector4 st = stq0.xyxy(stq1) / q;
|
||||
|
||||
stq0 = st.xyww(primclass == GS_SPRITE_CLASS ? stq1 : stq0);
|
||||
stq1 = st.zwww(stq1);
|
||||
|
|
|
@ -47,9 +47,9 @@ protected:
|
|||
|
||||
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
|
||||
|
||||
FindMinMaxPtr m_fmm[2][2][2][2][2][4];
|
||||
FindMinMaxPtr m_fmm[2][2][2][2][4];
|
||||
|
||||
template <GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color, uint32 accurate_stq>
|
||||
template <GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
|
||||
void FindMinMax(const void* vertex, const uint32* index, int count);
|
||||
|
||||
public:
|
||||
|
|
Loading…
Reference in New Issue