GPU: Clean up some old header stuff now that the SIMD code has been factored out.
This commit is contained in:
parent
c5c9e2d3a7
commit
e8328eda33
|
@ -2,7 +2,7 @@
|
||||||
Copyright (C) 2006 yopyop
|
Copyright (C) 2006 yopyop
|
||||||
Copyright (C) 2006-2007 Theo Berkau
|
Copyright (C) 2006-2007 Theo Berkau
|
||||||
Copyright (C) 2007 shash
|
Copyright (C) 2007 shash
|
||||||
Copyright (C) 2009-2021 DeSmuME team
|
Copyright (C) 2009-2022 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -27,41 +27,6 @@
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
#include "./utils/colorspacehandler/colorspacehandler.h"
|
#include "./utils/colorspacehandler/colorspacehandler.h"
|
||||||
|
|
||||||
#ifdef ENABLE_SSE2
|
|
||||||
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef ENABLE_SSSE3
|
|
||||||
#include <tmmintrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef ENABLE_SSE4_1
|
|
||||||
#include <smmintrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef ENABLE_AVX2
|
|
||||||
#include "./utils/colorspacehandler/colorspacehandler_AVX2.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef ENABLE_AVX512_1
|
|
||||||
#include "./utils/colorspacehandler/colorspacehandler_AVX512.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Note: Technically, the shift count of palignr can be any value of [0-255]. But practically speaking, the
|
|
||||||
// shift count should be a value of [0-15]. If we assume that the value range will always be [0-15], we can
|
|
||||||
// then substitute the palignr instruction with an SSE2 equivalent.
|
|
||||||
#if defined(ENABLE_SSE2) && !defined(ENABLE_SSSE3)
|
|
||||||
#define _mm_alignr_epi8(a, b, immShiftCount) _mm_or_si128(_mm_slli_si128(a, 16-(immShiftCount)), _mm_srli_si128(b, (immShiftCount)))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Note: The SSE4.1 version of pblendvb only requires that the MSBs of the 8-bit mask vector are set in order to
|
|
||||||
// pass the b byte through. However, our SSE2 substitute of pblendvb requires that all of the bits of the 8-bit
|
|
||||||
// mask vector are set. So when using this intrinsic in practice, just set/clear all mask bits together, and it
|
|
||||||
// should work fine for both SSE4.1 and SSE2.
|
|
||||||
#if defined(ENABLE_SSE2) && !defined(ENABLE_SSE4_1)
|
|
||||||
#define _mm_blendv_epi8(a, b, fullmask) _mm_or_si128(_mm_and_si128((fullmask), (b)), _mm_andnot_si128((fullmask), (a)))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
class GPUEngineBase;
|
class GPUEngineBase;
|
||||||
class NDSDisplay;
|
class NDSDisplay;
|
||||||
class EMUFILE;
|
class EMUFILE;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
Copyright (C) 2021 DeSmuME team
|
Copyright (C) 2021-2022 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -21,6 +21,7 @@
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#include "GPU_Operations_AVX2.h"
|
#include "GPU_Operations_AVX2.h"
|
||||||
|
#include "./utils/colorspacehandler/colorspacehandler_AVX2.h"
|
||||||
|
|
||||||
|
|
||||||
static const ColorOperation_AVX2 colorop_vec;
|
static const ColorOperation_AVX2 colorop_vec;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
Copyright (C) 2021 DeSmuME team
|
Copyright (C) 2021-2022 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -21,7 +21,7 @@
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#include "GPU_Operations_SSE2.h"
|
#include "GPU_Operations_SSE2.h"
|
||||||
#include <emmintrin.h>
|
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
|
||||||
|
|
||||||
|
|
||||||
static const ColorOperation_SSE2 colorop_vec;
|
static const ColorOperation_SSE2 colorop_vec;
|
||||||
|
|
|
@ -288,7 +288,27 @@ typedef __m128i v128u16;
|
||||||
typedef __m128i v128s16;
|
typedef __m128i v128s16;
|
||||||
typedef __m128i v128u32;
|
typedef __m128i v128u32;
|
||||||
typedef __m128i v128s32;
|
typedef __m128i v128s32;
|
||||||
#endif
|
|
||||||
|
#ifdef ENABLE_SSSE3
|
||||||
|
#include <tmmintrin.h>
|
||||||
|
#else
|
||||||
|
// Note: Technically, the shift count of palignr can be any value of [0-255]. But practically speaking, the
|
||||||
|
// shift count should be a value of [0-15]. If we assume that the value range will always be [0-15], we can
|
||||||
|
// then substitute the palignr instruction with an SSE2 equivalent.
|
||||||
|
#define _mm_alignr_epi8(a, b, immShiftCount) _mm_or_si128(_mm_slli_si128(a, 16-(immShiftCount)), _mm_srli_si128(b, (immShiftCount)))
|
||||||
|
#endif // ENABLE_SSSE3
|
||||||
|
|
||||||
|
#ifdef ENABLE_SSE4_1
|
||||||
|
#include <smmintrin.h>
|
||||||
|
#else
|
||||||
|
// Note: The SSE4.1 version of pblendvb only requires that the MSBs of the 8-bit mask vector are set in order to
|
||||||
|
// pass the b byte through. However, our SSE2 substitute of pblendvb requires that all of the bits of the 8-bit
|
||||||
|
// mask vector are set. So when using this intrinsic in practice, just set/clear all mask bits together, and it
|
||||||
|
// should work fine for both SSE4.1 and SSE2.
|
||||||
|
#define _mm_blendv_epi8(a, b, fullmask) _mm_or_si128(_mm_and_si128((fullmask), (b)), _mm_andnot_si128((fullmask), (a)))
|
||||||
|
#endif // ENABLE_SSE4_1
|
||||||
|
|
||||||
|
#endif // ENABLE_SSE2
|
||||||
|
|
||||||
#if defined(ENABLE_AVX) || defined(ENABLE_AVX512_0)
|
#if defined(ENABLE_AVX) || defined(ENABLE_AVX512_0)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue