gsdx: _isnan was not reliable, rewrote it as GSVector4::replace_nan, it uses cmpps and the result as the mask to blend the original value and FLT_MAX. No jumps or function calls.

This commit is contained in:
gabest11 2015-08-03 14:35:47 +02:00
parent 85117ecbdd
commit e010004f1f
3 changed files with 28 additions and 7 deletions

View File

@ -27,8 +27,6 @@
#include <sys/stat.h> // mkdir #include <sys/stat.h> // mkdir
#endif #endif
#define _isnan(f) (f != f)
//#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering //#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering
static int s_crc_hack_level = 3; static int s_crc_hack_level = 3;
@ -715,17 +713,23 @@ void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
{ {
GSVector4i rgbaq = (GSVector4i)r->RGBAQ; GSVector4i rgbaq = (GSVector4i)r->RGBAQ;
rgbaq = rgbaq.upl32(rgbaq.blend8(GSVector4i::cast(GSVector4::m_one), rgbaq == GSVector4i::zero()).yyyy()); // see GIFPackedRegHandlerSTQ GSVector4i q = rgbaq.blend8(GSVector4i::cast(GSVector4::m_one), rgbaq == GSVector4i::zero()).yyyy(); // see GIFPackedRegHandlerSTQ
m_v.RGBAQ = rgbaq;
// Silent Hill output a nan in Q to emulate the flash light. Unfortunately it // Silent Hill output a nan in Q to emulate the flash light. Unfortunately it
// breaks GSVertexTrace code that rely on min/max. // breaks GSVertexTrace code that rely on min/max.
//if (std::isnan(m_v.RGBAQ.Q))
if(_isnan(m_v.RGBAQ.Q)) q = GSVector4i::cast(GSVector4::cast(q).replace_nan(GSVector4::m_max));
m_v.RGBAQ = rgbaq.upl32(q);
/*
// Silent Hill output a nan in Q to emulate the flash light. Unfortunately it
// breaks GSVertexTrace code that rely on min/max.
if (std::isnan(m_v.RGBAQ.Q))
{ {
m_v.RGBAQ.Q = std::numeric_limits<float>::max(); m_v.RGBAQ.Q = std::numeric_limits<float>::max();
} }
*/
} }
void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)

View File

@ -72,6 +72,8 @@ const GSVector4 GSVector4::m_two(2.0f);
const GSVector4 GSVector4::m_four(4.0f); const GSVector4 GSVector4::m_four(4.0f);
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000))); const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000))); const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
const GSVector4 GSVector4::m_max(FLT_MAX);
const GSVector4 GSVector4::m_min(FLT_MIN);
#if _M_SSE >= 0x500 #if _M_SSE >= 0x500
@ -81,6 +83,8 @@ const GSVector8 GSVector8::m_x7fffffff(_mm256_castsi256_ps(_mm256_set1_epi32(0x7
const GSVector8 GSVector8::m_x80000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000))); const GSVector8 GSVector8::m_x80000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
const GSVector8 GSVector8::m_x4b000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000))); const GSVector8 GSVector8::m_x4b000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000)));
const GSVector8 GSVector8::m_x4f800000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000))); const GSVector8 GSVector8::m_x4f800000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000)));
const GSVector8 GSVector8::m_max(FLT_MAX);
const GSVector8 GSVector8::m_min(FLT_MIN);
#endif #endif

View File

@ -2431,6 +2431,8 @@ public:
static const GSVector4 m_four; static const GSVector4 m_four;
static const GSVector4 m_x4b000000; static const GSVector4 m_x4b000000;
static const GSVector4 m_x4f800000; static const GSVector4 m_x4f800000;
static const GSVector4 m_max;
static const GSVector4 m_min;
__forceinline GSVector4() __forceinline GSVector4()
{ {
@ -2908,6 +2910,11 @@ public:
#endif #endif
} }
__forceinline GSVector4 replace_nan(const GSVector4& v) const
{
return v.blend32(*this, *this == *this);
}
template<int src, int dst> __forceinline GSVector4 insert32(const GSVector4& v) const template<int src, int dst> __forceinline GSVector4 insert32(const GSVector4& v) const
{ {
// TODO: use blendps when src == dst // TODO: use blendps when src == dst
@ -5134,6 +5141,8 @@ public:
static const GSVector8 m_x80000000; static const GSVector8 m_x80000000;
static const GSVector8 m_x4b000000; static const GSVector8 m_x4b000000;
static const GSVector8 m_x4f800000; static const GSVector8 m_x4f800000;
static const GSVector8 m_max;
static const GSVector8 m_min;
__forceinline GSVector8() __forceinline GSVector8()
{ {
@ -5519,6 +5528,10 @@ public:
return _mm256_testz_ps(m, m) != 0; return _mm256_testz_ps(m, m) != 0;
} }
__forceinline GSVector8 replace_nan(const GSVector8& v) const
{
return v.blend32(*this, *this == *this);
}
template<int src, int dst> __forceinline GSVector8 insert32(const GSVector8& v) const template<int src, int dst> __forceinline GSVector8 insert32(const GSVector8& v) const
{ {