Sigh... partially revert commit e8328ed
to make Linux builds happy.
This commit is contained in:
parent
42c59d9257
commit
27a32d6262
|
@ -27,6 +27,31 @@
|
|||
#include "types.h"
|
||||
#include "./utils/colorspacehandler/colorspacehandler.h"
|
||||
|
||||
// For now, let's keep these SSE2 compatibility functions here to avoid build issues with Linux.
|
||||
// These should be moved to a more universal file like "types.h" so that they are available
|
||||
// everywhere, but Linux builds seem to be very finicky with their include structure. So let's
|
||||
// not rock the boat and make Linux builds happy.
|
||||
// - rogerman, 2022/04/06
|
||||
|
||||
#if defined(ENABLE_SSSE3)
|
||||
#include <tmmintrin.h>
|
||||
#elif defined(ENABLE_SSE2)
|
||||
// Note: Technically, the shift count of palignr can be any value of [0-255]. But practically speaking, the
|
||||
// shift count should be a value of [0-15]. If we assume that the value range will always be [0-15], we can
|
||||
// then substitute the palignr instruction with an SSE2 equivalent.
|
||||
#define _mm_alignr_epi8(a, b, immShiftCount) _mm_or_si128(_mm_slli_si128((a), 16-(immShiftCount)), _mm_srli_si128((b), (immShiftCount)))
|
||||
#endif // ENABLE_SSSE3
|
||||
|
||||
#if defined(ENABLE_SSE4_1)
|
||||
#include <smmintrin.h>
|
||||
#elif defined(ENABLE_SSE2)
|
||||
// Note: The SSE4.1 version of pblendvb only requires that the MSBs of the 8-bit mask vector are set in order to
|
||||
// pass the b byte through. However, our SSE2 substitute of pblendvb requires that all of the bits of the 8-bit
|
||||
// mask vector are set. So when using this intrinsic in practice, just set/clear all mask bits together, and it
|
||||
// should work fine for both SSE4.1 and SSE2.
|
||||
#define _mm_blendv_epi8(a, b, fullmask) _mm_or_si128(_mm_and_si128((fullmask), (b)), _mm_andnot_si128((fullmask), (a)))
|
||||
#endif // ENABLE_SSE4_1
|
||||
|
||||
class GPUEngineBase;
|
||||
class NDSDisplay;
|
||||
class EMUFILE;
|
||||
|
|
|
@ -288,27 +288,7 @@ typedef __m128i v128u16;
|
|||
typedef __m128i v128s16;
|
||||
typedef __m128i v128u32;
|
||||
typedef __m128i v128s32;
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#else
|
||||
// Note: Technically, the shift count of palignr can be any value of [0-255]. But practically speaking, the
|
||||
// shift count should be a value of [0-15]. If we assume that the value range will always be [0-15], we can
|
||||
// then substitute the palignr instruction with an SSE2 equivalent.
|
||||
#define _mm_alignr_epi8(a, b, immShiftCount) _mm_or_si128(_mm_slli_si128((a), 16-(immShiftCount)), _mm_srli_si128((b), (immShiftCount)))
|
||||
#endif // ENABLE_SSSE3
|
||||
|
||||
#ifdef ENABLE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
#else
|
||||
// Note: The SSE4.1 version of pblendvb only requires that the MSBs of the 8-bit mask vector are set in order to
|
||||
// pass the b byte through. However, our SSE2 substitute of pblendvb requires that all of the bits of the 8-bit
|
||||
// mask vector are set. So when using this intrinsic in practice, just set/clear all mask bits together, and it
|
||||
// should work fine for both SSE4.1 and SSE2.
|
||||
#define _mm_blendv_epi8(a, b, fullmask) _mm_or_si128(_mm_and_si128((fullmask), (b)), _mm_andnot_si128((fullmask), (a)))
|
||||
#endif // ENABLE_SSE4_1
|
||||
|
||||
#endif // ENABLE_SSE2
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_AVX) || defined(ENABLE_AVX512_0)
|
||||
|
||||
|
|
Loading…
Reference in New Issue