GPU: Clean up some old header stuff now that the SIMD code has been factored out.

This commit is contained in:
rogerman 2022-04-05 23:15:51 -07:00
parent c5c9e2d3a7
commit e8328eda33
4 changed files with 26 additions and 40 deletions

View File

@ -2,7 +2,7 @@
Copyright (C) 2006 yopyop
Copyright (C) 2006-2007 Theo Berkau
Copyright (C) 2007 shash
Copyright (C) 2009-2021 DeSmuME team
Copyright (C) 2009-2022 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -27,41 +27,6 @@
#include "types.h"
#include "./utils/colorspacehandler/colorspacehandler.h"
#ifdef ENABLE_SSE2
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
#endif
#ifdef ENABLE_SSSE3
#include <tmmintrin.h>
#endif
#ifdef ENABLE_SSE4_1
#include <smmintrin.h>
#endif
#ifdef ENABLE_AVX2
#include "./utils/colorspacehandler/colorspacehandler_AVX2.h"
#endif
#ifdef ENABLE_AVX512_1
#include "./utils/colorspacehandler/colorspacehandler_AVX512.h"
#endif
// Note: Technically, the shift count of palignr can be any value of [0-255]. But practically speaking, the
// shift count should be a value of [0-15]. If we assume that the value range will always be [0-15], we can
// then substitute the palignr instruction with an SSE2 equivalent.
#if defined(ENABLE_SSE2) && !defined(ENABLE_SSSE3)
#define _mm_alignr_epi8(a, b, immShiftCount) _mm_or_si128(_mm_slli_si128(a, 16-(immShiftCount)), _mm_srli_si128(b, (immShiftCount)))
#endif
// Note: The SSE4.1 version of pblendvb only requires that the MSBs of the 8-bit mask vector are set in order to
// pass the b byte through. However, our SSE2 substitute of pblendvb requires that all of the bits of the 8-bit
// mask vector are set. So when using this intrinsic in practice, just set/clear all mask bits together, and it
// should work fine for both SSE4.1 and SSE2.
#if defined(ENABLE_SSE2) && !defined(ENABLE_SSE4_1)
#define _mm_blendv_epi8(a, b, fullmask) _mm_or_si128(_mm_and_si128((fullmask), (b)), _mm_andnot_si128((fullmask), (a)))
#endif
class GPUEngineBase;
class NDSDisplay;
class EMUFILE;

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2021 DeSmuME team
Copyright (C) 2021-2022 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -21,6 +21,7 @@
#else
#include "GPU_Operations_AVX2.h"
#include "./utils/colorspacehandler/colorspacehandler_AVX2.h"
static const ColorOperation_AVX2 colorop_vec;

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2021 DeSmuME team
Copyright (C) 2021-2022 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -21,7 +21,7 @@
#else
#include "GPU_Operations_SSE2.h"
#include <emmintrin.h>
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
static const ColorOperation_SSE2 colorop_vec;

View File

@ -288,7 +288,27 @@ typedef __m128i v128u16;
typedef __m128i v128s16;
typedef __m128i v128u32;
typedef __m128i v128s32;
#endif
#ifdef ENABLE_SSSE3
#include <tmmintrin.h>
#else
// Note: Technically, the shift count of palignr can be any value of [0-255]. But practically speaking, the
// shift count should be a value of [0-15]. If we assume that the value range will always be [0-15], we can
// then substitute the palignr instruction with an SSE2 equivalent.
#define _mm_alignr_epi8(a, b, immShiftCount) _mm_or_si128(_mm_slli_si128(a, 16-(immShiftCount)), _mm_srli_si128(b, (immShiftCount)))
#endif // ENABLE_SSSE3
#ifdef ENABLE_SSE4_1
#include <smmintrin.h>
#else
// Note: The SSE4.1 version of pblendvb only requires that the MSBs of the 8-bit mask vector are set in order to
// pass the b byte through. However, our SSE2 substitute of pblendvb requires that all of the bits of the 8-bit
// mask vector are set. So when using this intrinsic in practice, just set/clear all mask bits together, and it
// should work fine for both SSE4.1 and SSE2.
#define _mm_blendv_epi8(a, b, fullmask) _mm_or_si128(_mm_and_si128((fullmask), (b)), _mm_andnot_si128((fullmask), (a)))
#endif // ENABLE_SSE4_1
#endif // ENABLE_SSE2
#if defined(ENABLE_AVX) || defined(ENABLE_AVX512_0)