gsdx: _isnan was not reliable, rewrote it as GSVector4::replace_nan, it uses cmpps and the result as the mask to blend the original value and FLT_MAX. No jumps or function calls.

This commit is contained in:
gabest11 2015-08-03 14:35:47 +02:00
parent 85117ecbdd
commit e010004f1f
3 changed files with 28 additions and 7 deletions

View File

@ -27,8 +27,6 @@
#include <sys/stat.h> // mkdir
#endif
#define _isnan(f) (f != f)
//#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering
static int s_crc_hack_level = 3;
@ -715,17 +713,23 @@ void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
{
GSVector4i rgbaq = (GSVector4i)r->RGBAQ;
rgbaq = rgbaq.upl32(rgbaq.blend8(GSVector4i::cast(GSVector4::m_one), rgbaq == GSVector4i::zero()).yyyy()); // see GIFPackedRegHandlerSTQ
m_v.RGBAQ = rgbaq;
GSVector4i q = rgbaq.blend8(GSVector4i::cast(GSVector4::m_one), rgbaq == GSVector4i::zero()).yyyy(); // see GIFPackedRegHandlerSTQ
// Silent Hill output a nan in Q to emulate the flash light. Unfortunately it
// breaks GSVertexTrace code that rely on min/max.
//if (std::isnan(m_v.RGBAQ.Q))
if(_isnan(m_v.RGBAQ.Q))
q = GSVector4i::cast(GSVector4::cast(q).replace_nan(GSVector4::m_max));
m_v.RGBAQ = rgbaq.upl32(q);
/*
// Silent Hill output a nan in Q to emulate the flash light. Unfortunately it
// breaks GSVertexTrace code that rely on min/max.
if (std::isnan(m_v.RGBAQ.Q))
{
m_v.RGBAQ.Q = std::numeric_limits<float>::max();
}
*/
}
void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)

View File

@ -72,6 +72,8 @@ const GSVector4 GSVector4::m_two(2.0f);
const GSVector4 GSVector4::m_four(4.0f);
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
const GSVector4 GSVector4::m_max(FLT_MAX);
const GSVector4 GSVector4::m_min(FLT_MIN);
#if _M_SSE >= 0x500
@ -81,6 +83,8 @@ const GSVector8 GSVector8::m_x7fffffff(_mm256_castsi256_ps(_mm256_set1_epi32(0x7
const GSVector8 GSVector8::m_x80000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
const GSVector8 GSVector8::m_x4b000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000)));
const GSVector8 GSVector8::m_x4f800000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000)));
const GSVector8 GSVector8::m_max(FLT_MAX);
const GSVector8 GSVector8::m_min(FLT_MIN);
#endif

View File

@ -2431,6 +2431,8 @@ public:
static const GSVector4 m_four;
static const GSVector4 m_x4b000000;
static const GSVector4 m_x4f800000;
static const GSVector4 m_max;
static const GSVector4 m_min;
__forceinline GSVector4()
{
@ -2908,6 +2910,11 @@ public:
#endif
}
__forceinline GSVector4 replace_nan(const GSVector4& v) const
{
return v.blend32(*this, *this == *this);
}
template<int src, int dst> __forceinline GSVector4 insert32(const GSVector4& v) const
{
// TODO: use blendps when src == dst
@ -5134,6 +5141,8 @@ public:
static const GSVector8 m_x80000000;
static const GSVector8 m_x4b000000;
static const GSVector8 m_x4f800000;
static const GSVector8 m_max;
static const GSVector8 m_min;
__forceinline GSVector8()
{
@ -5519,6 +5528,10 @@ public:
return _mm256_testz_ps(m, m) != 0;
}
__forceinline GSVector8 replace_nan(const GSVector8& v) const
{
return v.blend32(*this, *this == *this);
}
template<int src, int dst> __forceinline GSVector8 insert32(const GSVector8& v) const
{