Colorspace Handler: Add support for NEON-A64.

Signed-off-by: rogerman <rogerman@users.noreply.github.com>
This commit is contained in:
rogerman 2022-04-02 23:47:21 -07:00
parent 8a9fec431a
commit 9ccc791e32
5 changed files with 1154 additions and 2 deletions

View File

@ -38,6 +38,9 @@
#elif defined(ENABLE_SSE2)
#define USEVECTORSIZE_128
#define VECTORSIZE 16
#elif defined(ENABLE_NEON_A64)
#define USEVECTORSIZE_128
#define VECTORSIZE 16
#elif defined(ENABLE_ALTIVEC)
#define USEVECTORSIZE_128
#define VECTORSIZE 16

View File

@ -3636,6 +3636,8 @@
AB96EE861F990E4700B7AA67 /* lzio.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = lzio.c; sourceTree = "<group>"; };
AB96EE871F990E4700B7AA67 /* lzio.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lzio.h; sourceTree = "<group>"; };
AB9971CE134EDA0800531BA7 /* cocoa_globals.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cocoa_globals.h; sourceTree = "<group>"; };
ABA48DF527F95C2E00D961FB /* colorspacehandler_NEON.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_NEON.h; sourceTree = "<group>"; };
ABA48DF627F95C2E00D961FB /* colorspacehandler_NEON.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_NEON.cpp; sourceTree = "<group>"; };
ABA6574914511EC90077E5E9 /* cocoa_cheat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cocoa_cheat.h; sourceTree = "<group>"; };
ABA6574A14511EC90077E5E9 /* cocoa_cheat.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = cocoa_cheat.mm; sourceTree = "<group>"; };
ABA731251BB5104200B26147 /* SIL Open Font License.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "SIL Open Font License.txt"; sourceTree = "<group>"; };
@ -4968,11 +4970,13 @@
ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */,
ABBFFF7B1D610457003CD598 /* colorspacehandler_AVX2.cpp */,
ABCC19332287879000DFA471 /* colorspacehandler_AVX512.cpp */,
ABA48DF627F95C2E00D961FB /* colorspacehandler_NEON.cpp */,
ABBFFF811D611A36003CD598 /* colorspacehandler_AltiVec.cpp */,
ABBFFF701D5F9C52003CD598 /* colorspacehandler.h */,
ABBFFF761D5FD2ED003CD598 /* colorspacehandler_SSE2.h */,
ABBFFF7C1D610457003CD598 /* colorspacehandler_AVX2.h */,
ABCC19342287879000DFA471 /* colorspacehandler_AVX512.h */,
ABA48DF527F95C2E00D961FB /* colorspacehandler_NEON.h */,
ABBFFF821D611A36003CD598 /* colorspacehandler_AltiVec.h */,
);
path = colorspacehandler;

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2021 DeSmuME team
Copyright (C) 2016-2022 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -30,6 +30,10 @@
#include "colorspacehandler_SSE2.cpp"
#endif
#if defined(ENABLE_NEON_A64)
#include "colorspacehandler_NEON.cpp"
#endif
#if defined(ENABLE_ALTIVEC)
#include "colorspacehandler_AltiVec.cpp"
#endif
@ -40,7 +44,7 @@
#elif defined(ENABLE_AVX2)
#define USEVECTORSIZE_256
#define VECTORSIZE 32
#elif defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC)
#elif defined(ENABLE_SSE2) || defined(ENABLE_NEON_A64) || defined(ENABLE_ALTIVEC)
#define USEVECTORSIZE_128
#define VECTORSIZE 16
#endif
@ -60,6 +64,8 @@
static const ColorspaceHandler_AVX2 csh;
#elif defined(ENABLE_SSE2)
static const ColorspaceHandler_SSE2 csh;
#elif defined(ENABLE_NEON_A64)
static const ColorspaceHandler_NEON csh;
#elif defined(ENABLE_ALTIVEC)
static const ColorspaceHandler_AltiVec csh;
#else

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,114 @@
/*
Copyright (C) 2016-2022 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_NEON_H
#define COLORSPACEHANDLER_NEON_H
#include "colorspacehandler.h"
#ifndef ENABLE_NEON_A64
#warning This header requires ARM64 NEON support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_NEON(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_NEON(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u32 C6olorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceCopy16_NEON(const v128u16 &src);
template<bool SWAP_RB> v128u32 ColorspaceCopy32_NEON(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceApplyIntensity16_NEON(const v128u16 &src, float intensity);
template<bool SWAP_RB> v128u32 ColorspaceApplyIntensity32_NEON(const v128u32 &src, float intensity);
class ColorspaceHandler_NEON : public ColorspaceHandler
{
public:
ColorspaceHandler_NEON() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const;
size_t CopyBuffer32_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t CopyBuffer32_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ApplyIntensityToBuffer16(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer16_SwapRB(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer16_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer16_SwapRB_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32_SwapRB(u32 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const;
};
#endif // ENABLE_NEON_A64
#endif // COLORSPACEHANDLER_NEON_H