mirror of https://github.com/PCSX2/pcsx2.git
gsdx: handle float overflow on Q in vertex trace
Replace the fast reciprocal with a slower division when we detect a too big Q value. Improve #551, #1684
This commit is contained in:
parent
f862f5be03
commit
611239db5c
|
@ -32,13 +32,14 @@ void GSVertexTrace::InitVectors()
|
|||
}
|
||||
|
||||
GSVertexTrace::GSVertexTrace(const GSState* state)
|
||||
: m_state(state), m_primclass(GS_INVALID_CLASS)
|
||||
: m_state(state), m_accurate_stq(false), m_primclass(GS_INVALID_CLASS)
|
||||
{
|
||||
m_force_filter = static_cast<BiFiltering>(theApp.GetConfigI("filter"));
|
||||
memset(&m_alpha, 0, sizeof(m_alpha));
|
||||
|
||||
#define InitUpdate3(P, IIP, TME, FST, COLOR) \
|
||||
m_fmm[COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR>;
|
||||
m_fmm[0][COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR, 0>; \
|
||||
m_fmm[1][COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR, 1>; \
|
||||
|
||||
#define InitUpdate2(P, IIP, TME) \
|
||||
InitUpdate3(P, IIP, TME, 0, 0) \
|
||||
|
@ -67,7 +68,16 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
|
|||
uint32 fst = m_state->PRIM->FST;
|
||||
uint32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC);
|
||||
|
||||
(this->*m_fmm[color][fst][tme][iip][primclass])(vertex, index, i_count);
|
||||
(this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count);
|
||||
|
||||
// Potential float overflow detected. Better uses the slower division instead
|
||||
// Note: If Q is too big, 1/Q will end up as 0. 1e30 is a random number
|
||||
// that feel big enough.
|
||||
if (!fst && !m_accurate_stq && m_min.t.z > 1e30) {
|
||||
fprintf(stderr, "Vertex Trace: float overflow detected ! min %e max %e\n", m_min.t.z, m_max.t.z);
|
||||
m_accurate_stq = true;
|
||||
(this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count);
|
||||
}
|
||||
|
||||
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
|
||||
|
||||
|
@ -144,7 +154,7 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
|
|||
}
|
||||
}
|
||||
|
||||
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
|
||||
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color, uint32 accurate_stq>
|
||||
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count)
|
||||
{
|
||||
const GSDrawingContext* context = m_state->m_context;
|
||||
|
@ -204,7 +214,10 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
|
|||
|
||||
GSVector4 q = stq.wwww();
|
||||
|
||||
stq = (stq.xyww() * q.rcpnr()).xyww(q);
|
||||
if (accurate_stq)
|
||||
stq = (stq.xyww() / q).xyww(q);
|
||||
else
|
||||
stq = (stq.xyww() * q.rcpnr()).xyww(q);
|
||||
|
||||
tmin = tmin.min(stq);
|
||||
tmax = tmax.max(stq);
|
||||
|
@ -267,10 +280,20 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
|
|||
GSVector4 stq0 = GSVector4::cast(c0);
|
||||
GSVector4 stq1 = GSVector4::cast(c1);
|
||||
|
||||
GSVector4 q = stq0.wwww(stq1).rcpnr();
|
||||
if(accurate_stq)
|
||||
{
|
||||
GSVector4 q = stq0.wwww(stq1);
|
||||
|
||||
stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
|
||||
stq1 = (stq1.xyww() * q.zzzz()).xyww(stq1);
|
||||
stq0 = (stq0.xyww() / q.xxxx()).xyww(stq0);
|
||||
stq1 = (stq1.xyww() / q.zzzz()).xyww(stq1);
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4 q = stq0.wwww(stq1).rcpnr();
|
||||
|
||||
stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
|
||||
stq1 = (stq1.xyww() * q.zzzz()).xyww(stq1);
|
||||
}
|
||||
|
||||
tmin = tmin.min(stq0.min(stq1));
|
||||
tmax = tmax.max(stq0.max(stq1));
|
||||
|
@ -342,11 +365,22 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
|
|||
GSVector4 stq1 = GSVector4::cast(c1);
|
||||
GSVector4 stq2 = GSVector4::cast(c2);
|
||||
|
||||
GSVector4 q = stq0.wwww(stq1).xzww(stq2).rcpnr();
|
||||
if(accurate_stq)
|
||||
{
|
||||
GSVector4 q = stq0.wwww(stq1).xzww(stq2);
|
||||
|
||||
stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
|
||||
stq1 = (stq1.xyww() * q.yyyy()).xyww(stq1);
|
||||
stq2 = (stq2.xyww() * q.zzzz()).xyww(stq2);
|
||||
stq0 = (stq0.xyww() / q.xxxx()).xyww(stq0);
|
||||
stq1 = (stq1.xyww() / q.yyyy()).xyww(stq1);
|
||||
stq2 = (stq2.xyww() / q.zzzz()).xyww(stq2);
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4 q = stq0.wwww(stq1).xzww(stq2).rcpnr();
|
||||
|
||||
stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
|
||||
stq1 = (stq1.xyww() * q.yyyy()).xyww(stq1);
|
||||
stq2 = (stq2.xyww() * q.zzzz()).xyww(stq2);
|
||||
}
|
||||
|
||||
tmin = tmin.min(stq2).min(stq0.min(stq1));
|
||||
tmax = tmax.max(stq2).max(stq0.max(stq1));
|
||||
|
@ -423,10 +457,20 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
|
|||
GSVector4 stq0 = GSVector4::cast(c0);
|
||||
GSVector4 stq1 = GSVector4::cast(c1);
|
||||
|
||||
GSVector4 q = stq1.wwww().rcpnr();
|
||||
if(accurate_stq)
|
||||
{
|
||||
GSVector4 q = stq1.wwww();
|
||||
|
||||
stq0 = (stq0.xyww() * q).xyww(stq1);
|
||||
stq1 = (stq1.xyww() * q).xyww(stq1);
|
||||
stq0 = (stq0.xyww() / q).xyww(stq1);
|
||||
stq1 = (stq1.xyww() / q).xyww(stq1);
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4 q = stq1.wwww().rcpnr();
|
||||
|
||||
stq0 = (stq0.xyww() * q).xyww(stq1);
|
||||
stq1 = (stq1.xyww() * q).xyww(stq1);
|
||||
}
|
||||
|
||||
tmin = tmin.min(stq0.min(stq1));
|
||||
tmax = tmax.max(stq0.max(stq1));
|
||||
|
|
|
@ -42,11 +42,13 @@ protected:
|
|||
|
||||
static GSVector4 s_minmax;
|
||||
|
||||
bool m_accurate_stq;
|
||||
|
||||
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
|
||||
|
||||
FindMinMaxPtr m_fmm[2][2][2][2][4];
|
||||
FindMinMaxPtr m_fmm[2][2][2][2][2][4];
|
||||
|
||||
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
|
||||
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color, uint32 accurate_stq>
|
||||
void FindMinMax(const void* vertex, const uint32* index, int count);
|
||||
|
||||
public:
|
||||
|
|
Loading…
Reference in New Issue