Sigh... partially revert commit e8328ed to make Linux builds happy.

2022-04-06 10:34:26 -07:00 · 2022-04-06 10:34:26 -07:00 · 27a32d6262
parent 42c59d9257
commit 27a32d6262
2 changed files with 26 additions and 21 deletions
--- a/desmume/src/GPU.h
+++ b/desmume/src/GPU.h
@ -27,6 +27,31 @@
 #include "types.h"
 #include "./utils/colorspacehandler/colorspacehandler.h"

+// For now, let's keep these SSE2 compatibility functions here to avoid build issues with Linux.
+// These should be moved to a more universal file like "types.h" so that they are available
+// everywhere, but Linux builds seem to be very finicky with their include structure. So let's
+// not rock the boat and make Linux builds happy.
+// - rogerman, 2022/04/06
+
+#if defined(ENABLE_SSSE3)
+	#include <tmmintrin.h>
+#elif defined(ENABLE_SSE2)
+	// Note: Technically, the shift count of palignr can be any value of [0-255]. But practically speaking, the
+	// shift count should be a value of [0-15]. If we assume that the value range will always be [0-15], we can
+	// then substitute the palignr instruction with an SSE2 equivalent.
+	#define _mm_alignr_epi8(a, b, immShiftCount) _mm_or_si128(_mm_slli_si128((a), 16-(immShiftCount)), _mm_srli_si128((b), (immShiftCount)))
+#endif // ENABLE_SSSE3
+
+#if defined(ENABLE_SSE4_1)
+	#include <smmintrin.h>
+#elif defined(ENABLE_SSE2)
+	// Note: The SSE4.1 version of pblendvb only requires that the MSBs of the 8-bit mask vector are set in order to
+	// pass the b byte through. However, our SSE2 substitute of pblendvb requires that all of the bits of the 8-bit
+	// mask vector are set. So when using this intrinsic in practice, just set/clear all mask bits together, and it
+	// should work fine for both SSE4.1 and SSE2.
+	#define _mm_blendv_epi8(a, b, fullmask) _mm_or_si128(_mm_and_si128((fullmask), (b)), _mm_andnot_si128((fullmask), (a)))
+#endif // ENABLE_SSE4_1
+
 class GPUEngineBase;
 class NDSDisplay;
 class EMUFILE;
--- a/desmume/src/types.h
+++ b/desmume/src/types.h
@ -288,27 +288,7 @@ typedef __m128i v128u16;
 typedef __m128i v128s16;
 typedef __m128i v128u32;
 typedef __m128i v128s32;
-
-#ifdef ENABLE_SSSE3
-	#include <tmmintrin.h>
-#else
-	// Note: Technically, the shift count of palignr can be any value of [0-255]. But practically speaking, the
-	// shift count should be a value of [0-15]. If we assume that the value range will always be [0-15], we can
-	// then substitute the palignr instruction with an SSE2 equivalent.
-	#define _mm_alignr_epi8(a, b, immShiftCount) _mm_or_si128(_mm_slli_si128((a), 16-(immShiftCount)), _mm_srli_si128((b), (immShiftCount)))
-#endif // ENABLE_SSSE3
-
-#ifdef ENABLE_SSE4_1
-	#include <smmintrin.h>
-#else
-	// Note: The SSE4.1 version of pblendvb only requires that the MSBs of the 8-bit mask vector are set in order to
-	// pass the b byte through. However, our SSE2 substitute of pblendvb requires that all of the bits of the 8-bit
-	// mask vector are set. So when using this intrinsic in practice, just set/clear all mask bits together, and it
-	// should work fine for both SSE4.1 and SSE2.
-	#define _mm_blendv_epi8(a, b, fullmask) _mm_or_si128(_mm_and_si128((fullmask), (b)), _mm_andnot_si128((fullmask), (a)))
-#endif // ENABLE_SSE4_1
-
-#endif // ENABLE_SSE2
+#endif

 #if defined(ENABLE_AVX) || defined(ENABLE_AVX512_0)