Sigh... partially revert commit e8328ed to make Linux builds happy.

This commit is contained in:
rogerman 2022-04-06 10:34:26 -07:00
parent 42c59d9257
commit 27a32d6262
2 changed files with 26 additions and 21 deletions

View File

@ -27,6 +27,31 @@
#include "types.h"
#include "./utils/colorspacehandler/colorspacehandler.h"
// For now, let's keep these SSE2 compatibility functions here to avoid build issues with Linux.
// These should be moved to a more universal file like "types.h" so that they are available
// everywhere, but Linux builds seem to be very finicky with their include structure. So let's
// not rock the boat and make Linux builds happy.
// - rogerman, 2022/04/06
#if defined(ENABLE_SSSE3)
#include <tmmintrin.h>
#elif defined(ENABLE_SSE2)
// Note: Technically, the shift count of palignr can be any value of [0-255]. But practically speaking, the
// shift count should be a value of [0-15]. If we assume that the value range will always be [0-15], we can
// then substitute the palignr instruction with an SSE2 equivalent.
#define _mm_alignr_epi8(a, b, immShiftCount) _mm_or_si128(_mm_slli_si128((a), 16-(immShiftCount)), _mm_srli_si128((b), (immShiftCount)))
#endif // ENABLE_SSSE3
#if defined(ENABLE_SSE4_1)
#include <smmintrin.h>
#elif defined(ENABLE_SSE2)
// Note: The SSE4.1 version of pblendvb only requires that the MSBs of the 8-bit mask vector are set in order to
// pass the b byte through. However, our SSE2 substitute of pblendvb requires that all of the bits of the 8-bit
// mask vector are set. So when using this intrinsic in practice, just set/clear all mask bits together, and it
// should work fine for both SSE4.1 and SSE2.
#define _mm_blendv_epi8(a, b, fullmask) _mm_or_si128(_mm_and_si128((fullmask), (b)), _mm_andnot_si128((fullmask), (a)))
#endif // ENABLE_SSE4_1
class GPUEngineBase;
class NDSDisplay;
class EMUFILE;

View File

@ -288,27 +288,7 @@ typedef __m128i v128u16;
typedef __m128i v128s16;
typedef __m128i v128u32;
typedef __m128i v128s32;
#ifdef ENABLE_SSSE3
#include <tmmintrin.h>
#else
// Note: Technically, the shift count of palignr can be any value of [0-255]. But practically speaking, the
// shift count should be a value of [0-15]. If we assume that the value range will always be [0-15], we can
// then substitute the palignr instruction with an SSE2 equivalent.
#define _mm_alignr_epi8(a, b, immShiftCount) _mm_or_si128(_mm_slli_si128((a), 16-(immShiftCount)), _mm_srli_si128((b), (immShiftCount)))
#endif // ENABLE_SSSE3
#ifdef ENABLE_SSE4_1
#include <smmintrin.h>
#else
// Note: The SSE4.1 version of pblendvb only requires that the MSBs of the 8-bit mask vector are set in order to
// pass the b byte through. However, our SSE2 substitute of pblendvb requires that all of the bits of the 8-bit
// mask vector are set. So when using this intrinsic in practice, just set/clear all mask bits together, and it
// should work fine for both SSE4.1 and SSE2.
#define _mm_blendv_epi8(a, b, fullmask) _mm_or_si128(_mm_and_si128((fullmask), (b)), _mm_andnot_si128((fullmask), (a)))
#endif // ENABLE_SSE4_1
#endif // ENABLE_SSE2
#endif
#if defined(ENABLE_AVX) || defined(ENABLE_AVX512_0)