gsdx: handle float overflow on Q in vertex trace

Replace the fast reciprocal with a slower division when we detect a too big Q value.

Improve #551, #1684
This commit is contained in:
Gregory Hainaut 2017-03-02 19:26:37 +01:00
parent f862f5be03
commit 611239db5c
2 changed files with 63 additions and 17 deletions

View File

@ -32,13 +32,14 @@ void GSVertexTrace::InitVectors()
}
GSVertexTrace::GSVertexTrace(const GSState* state)
: m_state(state), m_primclass(GS_INVALID_CLASS)
: m_state(state), m_accurate_stq(false), m_primclass(GS_INVALID_CLASS)
{
m_force_filter = static_cast<BiFiltering>(theApp.GetConfigI("filter"));
memset(&m_alpha, 0, sizeof(m_alpha));
#define InitUpdate3(P, IIP, TME, FST, COLOR) \
m_fmm[COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR>;
m_fmm[0][COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR, 0>; \
m_fmm[1][COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR, 1>; \
#define InitUpdate2(P, IIP, TME) \
InitUpdate3(P, IIP, TME, 0, 0) \
@ -67,7 +68,16 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
uint32 fst = m_state->PRIM->FST;
uint32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC);
(this->*m_fmm[color][fst][tme][iip][primclass])(vertex, index, i_count);
(this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count);
// Potential float overflow detected. Better uses the slower division instead
// Note: If Q is too big, 1/Q will end up as 0. 1e30 is a random number
// that feel big enough.
if (!fst && !m_accurate_stq && m_min.t.z > 1e30) {
fprintf(stderr, "Vertex Trace: float overflow detected ! min %e max %e\n", m_min.t.z, m_max.t.z);
m_accurate_stq = true;
(this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count);
}
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
@ -144,7 +154,7 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
}
}
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color, uint32 accurate_stq>
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count)
{
const GSDrawingContext* context = m_state->m_context;
@ -204,7 +214,10 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4 q = stq.wwww();
stq = (stq.xyww() * q.rcpnr()).xyww(q);
if (accurate_stq)
stq = (stq.xyww() / q).xyww(q);
else
stq = (stq.xyww() * q.rcpnr()).xyww(q);
tmin = tmin.min(stq);
tmax = tmax.max(stq);
@ -267,10 +280,20 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4 stq0 = GSVector4::cast(c0);
GSVector4 stq1 = GSVector4::cast(c1);
GSVector4 q = stq0.wwww(stq1).rcpnr();
if(accurate_stq)
{
GSVector4 q = stq0.wwww(stq1);
stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
stq1 = (stq1.xyww() * q.zzzz()).xyww(stq1);
stq0 = (stq0.xyww() / q.xxxx()).xyww(stq0);
stq1 = (stq1.xyww() / q.zzzz()).xyww(stq1);
}
else
{
GSVector4 q = stq0.wwww(stq1).rcpnr();
stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
stq1 = (stq1.xyww() * q.zzzz()).xyww(stq1);
}
tmin = tmin.min(stq0.min(stq1));
tmax = tmax.max(stq0.max(stq1));
@ -342,11 +365,22 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4 stq1 = GSVector4::cast(c1);
GSVector4 stq2 = GSVector4::cast(c2);
GSVector4 q = stq0.wwww(stq1).xzww(stq2).rcpnr();
if(accurate_stq)
{
GSVector4 q = stq0.wwww(stq1).xzww(stq2);
stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
stq1 = (stq1.xyww() * q.yyyy()).xyww(stq1);
stq2 = (stq2.xyww() * q.zzzz()).xyww(stq2);
stq0 = (stq0.xyww() / q.xxxx()).xyww(stq0);
stq1 = (stq1.xyww() / q.yyyy()).xyww(stq1);
stq2 = (stq2.xyww() / q.zzzz()).xyww(stq2);
}
else
{
GSVector4 q = stq0.wwww(stq1).xzww(stq2).rcpnr();
stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
stq1 = (stq1.xyww() * q.yyyy()).xyww(stq1);
stq2 = (stq2.xyww() * q.zzzz()).xyww(stq2);
}
tmin = tmin.min(stq2).min(stq0.min(stq1));
tmax = tmax.max(stq2).max(stq0.max(stq1));
@ -423,10 +457,20 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4 stq0 = GSVector4::cast(c0);
GSVector4 stq1 = GSVector4::cast(c1);
GSVector4 q = stq1.wwww().rcpnr();
if(accurate_stq)
{
GSVector4 q = stq1.wwww();
stq0 = (stq0.xyww() * q).xyww(stq1);
stq1 = (stq1.xyww() * q).xyww(stq1);
stq0 = (stq0.xyww() / q).xyww(stq1);
stq1 = (stq1.xyww() / q).xyww(stq1);
}
else
{
GSVector4 q = stq1.wwww().rcpnr();
stq0 = (stq0.xyww() * q).xyww(stq1);
stq1 = (stq1.xyww() * q).xyww(stq1);
}
tmin = tmin.min(stq0.min(stq1));
tmax = tmax.max(stq0.max(stq1));

View File

@ -42,11 +42,13 @@ protected:
static GSVector4 s_minmax;
bool m_accurate_stq;
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
FindMinMaxPtr m_fmm[2][2][2][2][4];
FindMinMaxPtr m_fmm[2][2][2][2][2][4];
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color, uint32 accurate_stq>
void FindMinMax(const void* vertex, const uint32* index, int count);
public: