Cocoa Port: Eliminate the old colorspace conversion functions from utilities.c in favor of the new colorspace handler functions.

This commit is contained in:
rogerman 2017-01-10 23:45:06 -08:00
parent 858d14b5fe
commit 669487ff05
13 changed files with 290 additions and 226 deletions

View File

@ -18,6 +18,7 @@
#include "OGLDisplayOutput.h"
#include "cocoa_globals.h"
#include "utilities.h"
#include "../../utils/colorspacehandler/colorspacehandler.h"
#include "../../filter/videofilter.h"
#include <sstream>
@ -7384,11 +7385,11 @@ void OGLDisplayLayer::LoadFrameOGL(bool isMainSizeNative, bool isTouchSizeNative
{
if (this->_videoColorFormat == GL_UNSIGNED_SHORT_1_5_5_5_REV)
{
RGB555ToBGRA8888Buffer((const uint16_t *)this->_videoSrcNativeBuffer[0], this->_vf[0]->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer555To8888Opaque<true, false>((const uint16_t *)this->_videoSrcNativeBuffer[0], this->_vf[0]->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
}
else
{
RGB888ToBGRA8888Buffer((const uint32_t *)this->_videoSrcNativeBuffer[0], this->_vf[0]->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer888XTo8888Opaque<true, false>((const uint32_t *)this->_videoSrcNativeBuffer[0], this->_vf[0]->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
}
}
}
@ -7426,11 +7427,11 @@ void OGLDisplayLayer::LoadFrameOGL(bool isMainSizeNative, bool isTouchSizeNative
{
if (this->_videoColorFormat == GL_UNSIGNED_SHORT_1_5_5_5_REV)
{
RGB555ToBGRA8888Buffer((const uint16_t *)this->_videoSrcNativeBuffer[1], this->_vf[1]->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer555To8888Opaque<true, false>((const uint16_t *)this->_videoSrcNativeBuffer[1], this->_vf[1]->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
}
else
{
RGB888ToBGRA8888Buffer((const uint32_t *)this->_videoSrcNativeBuffer[1], this->_vf[1]->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer888XTo8888Opaque<true, false>((const uint32_t *)this->_videoSrcNativeBuffer[1], this->_vf[1]->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
}
}
}

View File

@ -718,11 +718,11 @@
if (dispInfo.pixelBytes == 2)
{
ColorspaceConvertBuffer555To8888Opaque<false, false>((u16 *)displayBuffer, bitmapData, (w * h));
ColorspaceConvertBuffer555To8888Opaque<false, true>((u16 *)displayBuffer, bitmapData, (w * h));
}
else if (dispInfo.pixelBytes == 4)
{
RGBA8888ForceOpaqueBuffer((u32 *)displayBuffer, bitmapData, (w * h));
memcpy(bitmapData, displayBuffer, w * h * sizeof(uint32_t));
}
pthread_rwlock_unlock(self.rwlockProducer);

View File

@ -18,7 +18,7 @@
#import "cocoa_videofilter.h"
#import <Cocoa/Cocoa.h>
#include "utilities.h"
#include "../../utils/colorspacehandler/colorspacehandler.h"
@implementation CocoaVideoFilter
@ -139,7 +139,7 @@
}
uint32_t *bitmapData = (uint32_t *)[imageRep bitmapData];
RGBA8888ForceOpaqueBuffer((const uint32_t *)[self runFilter], bitmapData, (w * h));
ColorspaceConvertBuffer888XTo8888Opaque<false, true>((const uint32_t *)[self runFilter], bitmapData, w * h);
#ifdef MSB_FIRST
uint32_t *bitmapDataEnd = bitmapData + (w * h);

View File

@ -126,212 +126,6 @@ bool IsOSXVersionSupported(const unsigned int major, const unsigned int minor, c
return result;
}
/********************************************************************************************
RGB555ToRGBA8888() - INLINE
Converts a color from 15-bit RGB555 format into 32-bit RGBA8888 format.
Takes:
color16 - The pixel in 15-bit RGB555 format.
Returns:
A 32-bit unsigned integer containing the RGBA8888 formatted color.
Details:
The input and output pixels are expected to have little-endian byte order.
********************************************************************************************/
inline uint32_t RGB555ToRGBA8888(const uint16_t color16)
{
return (bits5to8[((color16 >> 0) & 0x001F)] << 0) |
(bits5to8[((color16 >> 5) & 0x001F)] << 8) |
(bits5to8[((color16 >> 10) & 0x001F)] << 16) |
0xFF000000;
}
/********************************************************************************************
RGB555ToBGRA8888() - INLINE
Converts a color from 15-bit RGB555 format into 32-bit BGRA8888 format.
Takes:
color16 - The pixel in 15-bit RGB555 format.
Returns:
A 32-bit unsigned integer containing the BGRA8888 formatted color.
Details:
The input and output pixels are expected to have little-endian byte order.
********************************************************************************************/
inline uint32_t RGB555ToBGRA8888(const uint16_t color16)
{
return (bits5to8[((color16 >> 10) & 0x001F)] << 0) |
(bits5to8[((color16 >> 5) & 0x001F)] << 8) |
(bits5to8[((color16 >> 0) & 0x001F)] << 16) |
0xFF000000;
}
/********************************************************************************************
RGB888ToBGRA8888() - INLINE
Converts a color from 24-bit RGB888 format into 32-bit BGRA8888 format.
Takes:
color32 - The pixel in 24-bit RGB888 format.
Returns:
A 32-bit unsigned integer containing the BGRA8888 formatted color.
Details:
The input and output pixels are expected to have little-endian byte order.
********************************************************************************************/
inline uint32_t RGB888ToBGRA8888(const uint32_t color32)
{
return ((color32 & 0x000000FF) << 16) |
((color32 & 0x0000FF00) ) |
((color32 & 0x00FF0000) >> 16) |
0xFF000000;
}
/********************************************************************************************
RGBA8888ForceOpaque() - INLINE
Forces the alpha channel of a 32-bit RGBA8888 color to a value of 0xFF.
Takes:
color32 - The pixel in 32-bit RGBA8888 format.
Returns:
A 32-bit unsigned integer containing the RGBA8888 formatted color.
Details:
The input and output pixels are expected to have little-endian byte order.
********************************************************************************************/
inline uint32_t RGBA8888ForceOpaque(const uint32_t color32)
{
return color32 | 0xFF000000;
}
/********************************************************************************************
RGB555ToRGBA8888Buffer()
Copies a 15-bit RGB555 pixel buffer into a 32-bit RGBA8888 pixel buffer.
Takes:
srcBuffer - Pointer to the source 15-bit RGB555 pixel buffer.
destBuffer - Pointer to the destination 32-bit RGBA8888 pixel buffer.
pixelCount - The number of pixels to copy.
Returns:
Nothing.
Details:
The source and destination pixels are expected to have little-endian byte order.
Also, it is the caller's responsibility to ensure that the source and destination
buffers are large enough to accomodate the requested number of pixels.
********************************************************************************************/
void RGB555ToRGBA8888Buffer(const uint16_t *__restrict__ srcBuffer, uint32_t *__restrict__ destBuffer, size_t pixelCount)
{
const uint32_t *__restrict__ destBufferEnd = destBuffer + pixelCount;
while (destBuffer < destBufferEnd)
{
*destBuffer++ = RGB555ToRGBA8888(*srcBuffer++);
}
}
/********************************************************************************************
RGB555ToBGRA8888Buffer()
Copies a 15-bit RGB555 pixel buffer into a 32-bit BGRA8888 pixel buffer.
Takes:
srcBuffer - Pointer to the source 15-bit RGB555 pixel buffer.
destBuffer - Pointer to the destination 32-bit BGRA8888 pixel buffer.
pixelCount - The number of pixels to copy.
Returns:
Nothing.
Details:
The source and destination pixels are expected to have little-endian byte order.
Also, it is the caller's responsibility to ensure that the source and destination
buffers are large enough to accomodate the requested number of pixels.
********************************************************************************************/
void RGB555ToBGRA8888Buffer(const uint16_t *__restrict__ srcBuffer, uint32_t *__restrict__ destBuffer, size_t pixelCount)
{
const uint32_t *__restrict__ destBufferEnd = destBuffer + pixelCount;
while (destBuffer < destBufferEnd)
{
*destBuffer++ = RGB555ToBGRA8888(*srcBuffer++);
}
}
/********************************************************************************************
RGB888ToBGRA8888Buffer()
Copies a 24-bit RGB888 pixel buffer into a 32-bit BGRA8888 pixel buffer.
Takes:
srcBuffer - Pointer to the source 24-bit RGB888 pixel buffer.
destBuffer - Pointer to the destination 32-bit BGRA8888 pixel buffer.
pixelCount - The number of pixels to copy.
Returns:
Nothing.
Details:
The source and destination pixels are expected to have little-endian byte order.
Also, it is the caller's responsibility to ensure that the source and destination
buffers are large enough to accomodate the requested number of pixels.
********************************************************************************************/
void RGB888ToBGRA8888Buffer(const uint32_t *__restrict__ srcBuffer, uint32_t *__restrict__ destBuffer, size_t pixelCount)
{
const uint32_t *__restrict__ destBufferEnd = destBuffer + pixelCount;
while (destBuffer < destBufferEnd)
{
*destBuffer++ = RGB888ToBGRA8888(*srcBuffer++);
}
}
/********************************************************************************************
RGBA8888ForceOpaqueBuffer()
Copies a 32-bit RGBA8888 pixel buffer into another 32-bit RGBA8888 pixel buffer.
The pixels in the destination buffer will have an alpha value of 0xFF.
Takes:
srcBuffer - Pointer to the source 32-bit RGBA8888 pixel buffer.
destBuffer - Pointer to the destination 32-bit RGBA8888 pixel buffer.
pixelCount - The number of pixels to copy.
Returns:
Nothing.
Details:
The source and destination pixels are expected to have little-endian byte order.
Also, it is the caller's responsibility to ensure that the source and destination
buffers are large enough to accomodate the requested number of pixels.
********************************************************************************************/
void RGBA8888ForceOpaqueBuffer(const uint32_t *__restrict__ srcBuffer, uint32_t *__restrict__ destBuffer, size_t pixelCount)
{
const uint32_t *__restrict__ destBufferEnd = destBuffer + pixelCount;
while (destBuffer < destBufferEnd)
{
*destBuffer++ = RGBA8888ForceOpaque(*srcBuffer++);
}
}
/********************************************************************************************
GetNearestPositivePOT()

View File

@ -27,17 +27,7 @@ extern "C"
{
#endif
bool IsOSXVersionSupported(const unsigned int major, const unsigned int minor, const unsigned int revision);
uint32_t RGB555ToRGBA8888(const uint16_t color16);
uint32_t RGB555ToBGRA8888(const uint16_t color16);
uint32_t RGB888ToBGRA8888(const uint32_t color32);
uint32_t RGBA8888ForceOpaque(const uint32_t color32);
void RGB555ToRGBA8888Buffer(const uint16_t *__restrict__ srcBuffer, uint32_t *__restrict__ destBuffer, size_t pixelCount);
void RGB555ToBGRA8888Buffer(const uint16_t *__restrict__ srcBuffer, uint32_t *__restrict__ destBuffer, size_t pixelCount);
void RGB888ToBGRA8888Buffer(const uint32_t *__restrict__ srcBuffer, uint32_t *__restrict__ destBuffer, size_t pixelCount);
void RGBA8888ForceOpaqueBuffer(const uint32_t *__restrict__ srcBuffer, uint32_t *__restrict__ destBuffer, size_t pixelCount);
bool IsOSXVersionSupported(const unsigned int major, const unsigned int minor, const unsigned int revision);
uint32_t GetNearestPositivePOT(uint32_t value);
#ifdef __cplusplus

View File

@ -426,6 +426,54 @@ void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restric
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer888XTo8888Opaque_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer888XTo8888Opaque_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer888XTo8888Opaque(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert888XTo8888Opaque<SWAP_RB>(src[i]);
}
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
@ -612,7 +660,7 @@ size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict s
{
size_t i = 0;
for (;i < pixCount; i++)
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To5551<true>(src[i]);
}
@ -630,6 +678,40 @@ size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *
return this->ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert888XTo8888Opaque<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert888XTo8888Opaque<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ConvertBuffer888XTo8888Opaque(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ConvertBuffer888XTo8888Opaque_SwapRB(src, dst, pixCount);
}
template void ColorspaceConvertBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
@ -659,3 +741,8 @@ template void ColorspaceConvertBuffer6665To5551<true, true>(const u32 *__restric
template void ColorspaceConvertBuffer6665To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo8888Opaque<true, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo8888Opaque<true, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo8888Opaque<false, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo8888Opaque<false, false>(const u32 *src, u32 *dst, size_t pixCount);

View File

@ -214,12 +214,34 @@ FORCEINLINE u16 ColorspaceConvert6665To5551(u32 srcColor)
return ColorspaceConvert6665To5551<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert888XTo8888Opaque(FragmentColor srcColor)
{
FragmentColor outColor;
outColor.r = (SWAP_RB) ? srcColor.b : srcColor.r;
outColor.g = srcColor.g;
outColor.b = (SWAP_RB) ? srcColor.r : srcColor.b;
outColor.a = 0xFF;
return outColor.color;
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert888XTo8888Opaque(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert888XTo8888Opaque<SWAP_RB>(srcColorComponent);
}
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount);
class ColorspaceHandler
{
@ -255,6 +277,11 @@ public:
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
};
FORCEINLINE FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a)

View File

@ -212,6 +212,17 @@ FORCEINLINE v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const
return _ConvertColorBaseTo5551_AVX2<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src)
{
if (SWAP_RB)
{
return _mm256_or_si256( _mm256_shuffle_epi8(src, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2)), _mm256_set1_epi32(0xFF000000) );
}
return _mm256_or_si256(src, _mm256_set1_epi32(0xFF000000));
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256)
{
@ -344,6 +355,26 @@ size_t ColorspaceConvertBuffer6665To5551_AVX2(const u32 *__restrict src, u16 *__
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=8)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
}
else
{
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
}
}
return i;
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, false>(src, dst, pixCount);
@ -464,6 +495,26 @@ size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const
return ColorspaceConvertBuffer6665To5551_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AVX2<true, true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_AVX2<true>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888_AVX2<false>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
@ -488,4 +539,7 @@ template v256u16 ColorspaceConvert8888To5551_AVX2<false>(const v256u32 &srcLo, c
template v256u16 ColorspaceConvert6665To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u16 ColorspaceConvert6665To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u32 ColorspaceConvert888XTo8888Opaque_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceConvert888XTo8888Opaque_AVX2<false>(const v256u32 &src);
#endif // ENABLE_AVX2

View File

@ -32,6 +32,7 @@ template<bool SWAP_RB> v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &s
template<bool SWAP_RB> v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
template<bool SWAP_RB> v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
template<bool SWAP_RB> v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src);
class ColorspaceHandler_AVX2 : public ColorspaceHandler
{
@ -67,6 +68,11 @@ public:
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
};
#endif // ENABLE_AVX2

View File

@ -172,6 +172,17 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, co
return _ConvertColorBaseTo5551_AltiVec<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src)
{
if (SWAP_RB)
{
return vec_or( vec_perm(src, src, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15})), vec_splat_u32(0xFF000000) );
}
return vec_or(src, vec_splat_u32(0xFF000000));
}
template <bool SWAP_RB>
static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
@ -258,6 +269,19 @@ size_t ColorspaceConvertBuffer6665To5551_AltiVec(const u32 *__restrict src, u16
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer888XTo8888Opaque_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
vec_st( ColorspaceConvert888XTo8888Opaque_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
}
return i;
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<false>(src, dst, pixCount);
@ -318,6 +342,16 @@ size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551_SwapRB(const u32 *__re
return ColorspaceConvertBuffer6665To5551_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AltiVec<true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_AltiVec<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
@ -342,4 +376,7 @@ template v128u16 ColorspaceConvert8888To5551_AltiVec<false>(const v128u32 &srcLo
template v128u16 ColorspaceConvert6665To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec<true>(const v128u32 &src);
template v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec<false>(const v128u32 &src);
#endif // ENABLE_SSE2

View File

@ -32,6 +32,7 @@ template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src);
// AltiVec has very poor support for dealing with unaligned addresses (it's possible, just
// very obtuse), so we're not even going to bother dealing with any unaligned addresses.
@ -57,6 +58,9 @@ public:
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
};
#endif // ENABLE_ALTIVEC

View File

@ -250,6 +250,21 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const
return _ConvertColorBaseTo5551_SSE2<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src)
{
if (SWAP_RB)
{
#ifdef ENABLE_SSSE3
return _mm_or_si128( _mm_shuffle_epi8(src, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2)), _mm_set1_epi32(0xFF000000) );
#else
return _mm_or_si128( _mm_or_si128(_mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(src, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x000000FF)), 16))), _mm_set1_epi32(0xFF000000) );
#endif
}
return _mm_or_si128(src, _mm_set1_epi32(0xFF000000));
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_SSE2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
@ -382,6 +397,26 @@ size_t ColorspaceConvertBuffer6665To5551_SSE2(const u32 *__restrict src, u16 *__
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo8888Opaque_SSE2(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
}
else
{
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
}
}
return i;
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, false>(src, dst, pixCount);
@ -502,6 +537,26 @@ size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const
return ColorspaceConvertBuffer6665To5551_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<true, true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_SSE2<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_SSE2<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
@ -526,4 +581,7 @@ template v128u16 ColorspaceConvert8888To5551_SSE2<false>(const v128u32 &srcLo, c
template v128u16 ColorspaceConvert6665To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u32 ColorspaceConvert888XTo8888Opaque_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceConvert888XTo8888Opaque_SSE2<false>(const v128u32 &src);
#endif // ENABLE_SSE2

View File

@ -32,6 +32,7 @@ template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &s
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src);
class ColorspaceHandler_SSE2 : public ColorspaceHandler
{
@ -67,6 +68,11 @@ public:
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
};
#endif // ENABLE_SSE2