gsdx: _isnan was not reliable, rewrote it as GSVector4::replace_nan, it uses cmpps and the result as the mask to blend the original value and FLT_MAX. No jumps or function calls.

2015-08-03 14:35:47 +02:00 · 2015-08-03 14:35:47 +02:00 · e010004f1f
parent 85117ecbdd
commit e010004f1f
3 changed files with 28 additions and 7 deletions
--- a/plugins/GSdx/GSState.cpp
+++ b/plugins/GSdx/GSState.cpp
@ -27,8 +27,6 @@
 #include <sys/stat.h> // mkdir
 #endif

-#define _isnan(f) (f != f)
-
 //#define Offset_ST  // Fixes Persona3 mini map alignment which is off even in software rendering

 static int s_crc_hack_level = 3;
@ -715,17 +713,23 @@ void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
 {
 	GSVector4i rgbaq = (GSVector4i)r->RGBAQ;

-	rgbaq = rgbaq.upl32(rgbaq.blend8(GSVector4i::cast(GSVector4::m_one), rgbaq == GSVector4i::zero()).yyyy()); // see GIFPackedRegHandlerSTQ
-
-	m_v.RGBAQ = rgbaq;
+	GSVector4i q = rgbaq.blend8(GSVector4i::cast(GSVector4::m_one), rgbaq == GSVector4i::zero()).yyyy(); // see GIFPackedRegHandlerSTQ

 	// Silent Hill output a nan in Q to emulate the flash light. Unfortunately it
 	// breaks GSVertexTrace code that rely on min/max.
-	//if (std::isnan(m_v.RGBAQ.Q))
-	if(_isnan(m_v.RGBAQ.Q))
+
+	q = GSVector4i::cast(GSVector4::cast(q).replace_nan(GSVector4::m_max));
+
+	m_v.RGBAQ = rgbaq.upl32(q);
+
+	/*
+	// Silent Hill output a nan in Q to emulate the flash light. Unfortunately it
+	// breaks GSVertexTrace code that rely on min/max.
+	if (std::isnan(m_v.RGBAQ.Q))
 	{
 		m_v.RGBAQ.Q = std::numeric_limits<float>::max();
 	}
+	*/
 }

 void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)
--- a/plugins/GSdx/GSVector.cpp
+++ b/plugins/GSdx/GSVector.cpp
@ -72,6 +72,8 @@ const GSVector4 GSVector4::m_two(2.0f);
 const GSVector4 GSVector4::m_four(4.0f);
 const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
 const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
+const GSVector4 GSVector4::m_max(FLT_MAX);
+const GSVector4 GSVector4::m_min(FLT_MIN);

 #if _M_SSE >= 0x500

@ -81,6 +83,8 @@ const GSVector8 GSVector8::m_x7fffffff(_mm256_castsi256_ps(_mm256_set1_epi32(0x7
 const GSVector8 GSVector8::m_x80000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
 const GSVector8 GSVector8::m_x4b000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000)));
 const GSVector8 GSVector8::m_x4f800000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000)));
+const GSVector8 GSVector8::m_max(FLT_MAX);
+const GSVector8 GSVector8::m_min(FLT_MIN);

 #endif

--- a/plugins/GSdx/GSVector.h
+++ b/plugins/GSdx/GSVector.h
@ -2431,6 +2431,8 @@ public:
 	static const GSVector4 m_four;
 	static const GSVector4 m_x4b000000;
 	static const GSVector4 m_x4f800000;
+	static const GSVector4 m_max;
+	static const GSVector4 m_min;

 	__forceinline GSVector4()
 	{
@ -2908,6 +2910,11 @@ public:
 		#endif
 	}

+	__forceinline GSVector4 replace_nan(const GSVector4& v) const
+	{
+		return v.blend32(*this, *this == *this);
+	}
+
 	template<int src, int dst> __forceinline GSVector4 insert32(const GSVector4& v) const
 	{
 		// TODO: use blendps when src == dst
@ -5134,6 +5141,8 @@ public:
 	static const GSVector8 m_x80000000;
 	static const GSVector8 m_x4b000000;
 	static const GSVector8 m_x4f800000;
+	static const GSVector8 m_max;
+	static const GSVector8 m_min;

 	__forceinline GSVector8() 
 	{
@ -5519,6 +5528,10 @@ public:
 		return _mm256_testz_ps(m, m) != 0;
 	}
 	
+	__forceinline GSVector8 replace_nan(const GSVector8& v) const
+	{
+		return v.blend32(*this, *this == *this);
+	}

 	template<int src, int dst> __forceinline GSVector8 insert32(const GSVector8& v) const
 	{