Colorspace Handler: Add new functions for 16-bit to 18-bit and 32-bit color conversion, now respecting the 16-bit color's alpha bit.

- ColorspaceConvert5551To8888()
- ColorspaceConvert5551To6665()
- ColorspaceConvertBuffer5551To8888()
- ColorspaceConvertBuffer5551To6665()
- Also rename the existing 16-bit color conversion functions to help further distinguish the functions from one another.
This commit is contained in:
rogerman 2024-07-15 16:24:02 -07:00
parent 4b0805e139
commit 586aea5310
29 changed files with 1680 additions and 781 deletions

View File

@ -1,7 +1,7 @@
/*
Copyright 2006 yopyop
Copyright 2007 shash
Copyright 2007-2023 DeSmuME team
Copyright 2007-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -489,11 +489,11 @@ void DISP_FIFOrecv_LineOpaque(u32 *__restrict dst)
}
else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapDst>((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
}
else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
}
_DISP_FIFOrecv_LineAdvance();

View File

@ -2,7 +2,7 @@
Copyright (C) 2006 yopyop
Copyright (C) 2006-2007 Theo Berkau
Copyright (C) 2007 shash
Copyright (C) 2008-2023 DeSmuME team
Copyright (C) 2008-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -1032,12 +1032,12 @@ void GPUEngineBase::_TransitionLineNativeToCustom(GPUEngineCompositorInfo &compI
{
if ( (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (compInfo.line.renderCount == 1) )
{
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH);
}
else
{
u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32();
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH);
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(compInfo.line, workingNativeBuffer32 + compInfo.line.blockOffsetNative, compInfo.target.lineColorHeadCustom);
}
break;
@ -1047,12 +1047,12 @@ void GPUEngineBase::_TransitionLineNativeToCustom(GPUEngineCompositorInfo &compI
{
if ( (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (compInfo.line.renderCount == 1) )
{
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH);
}
else
{
u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32();
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH);
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(compInfo.line, workingNativeBuffer32 + compInfo.line.blockOffsetNative, compInfo.target.lineColorHeadCustom);
}
break;
@ -3729,7 +3729,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA
{
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH);
}
this->_RenderLine_DispCapture_Copy<OUTPUTFORMAT, 1, CAPTURELENGTH, true, false>(lineInfo, srcBPtr, dstCustomPtr, captureLengthExt);
@ -3755,7 +3755,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA
else
{
u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32();
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapNone>((u16 *)srcAPtr, workingNativeBuffer32 + lineInfo.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapNone>((u16 *)srcAPtr, workingNativeBuffer32 + lineInfo.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH);
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, workingNativeBuffer32 + lineInfo.blockOffsetNative, this->_captureWorkingA32);
srcAPtr = this->_captureWorkingA32;
}
@ -3772,7 +3772,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA
{
if ((OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) && (DISPCAPCNT.SrcB != 0))
{
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH);
}
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, srcBPtr, this->_captureWorkingB32);
@ -3919,7 +3919,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI
{
if (willReadNativeVRAM)
{
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(vramNative16, (u32 *)vramCustom32, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(vramNative16, (u32 *)vramCustom32, GPU_FRAMEBUFFER_NATIVE_WIDTH);
}
}
@ -4435,7 +4435,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const GPUEngineLineInfo &lineInfo)
{
const u16 *src = (u16 *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + lineInfo.blockOffsetCustom;
u32 *dst = (u32 *)customBuffer + lineInfo.blockOffsetCustom;
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapSrcDst>(src, dst, lineInfo.pixelCount);
ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapSrcDst>(src, dst, lineInfo.pixelCount);
break;
}
@ -5780,11 +5780,11 @@ void GPUSubsystem::_ConvertAndUpscaleForLoadstate(const NDSDisplayID displayID,
switch (this->_display[displayID]->GetColorFormat())
{
case NDSColorFormat_BGR666_Rev:
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
break;
case NDSColorFormat_BGR888_Rev:
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
break;
default:
@ -6212,11 +6212,11 @@ void NDSDisplay::ResolveLinesDisplayedNative()
{
if (this->_customColorFormat == NDSColorFormat_BGR888_Rev)
{
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH);
}
else
{
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH);
}
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, working, dst);
@ -6256,7 +6256,7 @@ void NDSDisplay::ResolveFramebufferToCustom(NDSDisplayInfo &mutableInfo)
{
case NDSColorFormat_BGR666_Rev:
case NDSColorFormat_BGR888_Rev:
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
break;
default:
@ -6298,7 +6298,7 @@ void NDSDisplay::ResolveFramebufferToCustom(NDSDisplayInfo &mutableInfo)
case NDSColorFormat_BGR666_Rev:
case NDSColorFormat_BGR888_Rev:
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(this->_nativeBuffer16, (u32 *)this->_customBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(this->_nativeBuffer16, (u32 *)this->_customBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
break;
}
}

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2021-2023 DeSmuME team
Copyright (C) 2021-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -1128,13 +1128,13 @@ FORCEINLINE void PixelOperation_AVX2::_copy16(GPUEngineCompositorInfo &compInfo,
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555To6665Opaque_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To6665Opaque_AVX2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo6665Opaque_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo6665Opaque_AVX2<false>(src1, src32[2], src32[3]);
}
else
{
ColorspaceConvert555To8888Opaque_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To8888Opaque_AVX2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo8888Opaque_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo8888Opaque_AVX2<false>(src1, src32[2], src32[3]);
}
_mm256_store_si256( (v256u32 *)compInfo.target.lineColor32 + 0, src32[0] );
@ -1205,13 +1205,13 @@ FORCEINLINE void PixelOperation_AVX2::_copyMask16(GPUEngineCompositorInfo &compI
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555To6665Opaque_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To6665Opaque_AVX2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo6665Opaque_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo6665Opaque_AVX2<false>(src1, src32[2], src32[3]);
}
else
{
ColorspaceConvert555To8888Opaque_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To8888Opaque_AVX2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo8888Opaque_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo8888Opaque_AVX2<false>(src1, src32[2], src32[3]);
}
passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8);
@ -1304,13 +1304,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessUp16(GPUEngineCompositorInfo &c
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555XTo666X_AVX2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo666X_AVX2<false>(src1, dst[2], dst[3]);
ColorspaceConvert555xTo666x_AVX2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555xTo666x_AVX2<false>(src1, dst[2], dst[3]);
}
else
{
ColorspaceConvert555XTo888X_AVX2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo888X_AVX2<false>(src1, dst[2], dst[3]);
ColorspaceConvert555xTo888x_AVX2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555xTo888x_AVX2<false>(src1, dst[2], dst[3]);
}
const v256u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm256_set1_epi32(0x1F000000) : _mm256_set1_epi32(0xFF000000);
@ -1377,13 +1377,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessUpMask16(GPUEngineCompositorInf
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555XTo666X_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo666X_AVX2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo666x_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo666x_AVX2<false>(src1, src32[2], src32[3]);
}
else
{
ColorspaceConvert555XTo888X_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo888X_AVX2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo888x_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo888x_AVX2<false>(src1, src32[2], src32[3]);
}
passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8);
@ -1471,13 +1471,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessDown16(GPUEngineCompositorInfo
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555XTo666X_AVX2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo666X_AVX2<false>(src1, dst[2], dst[3]);
ColorspaceConvert555xTo666x_AVX2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555xTo666x_AVX2<false>(src1, dst[2], dst[3]);
}
else
{
ColorspaceConvert555XTo888X_AVX2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo888X_AVX2<false>(src1, dst[2], dst[3]);
ColorspaceConvert555xTo888x_AVX2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555xTo888x_AVX2<false>(src1, dst[2], dst[3]);
}
const v256u32 alphaBits = _mm256_set1_epi32((OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F000000 : 0xFF000000);
@ -1544,13 +1544,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessDownMask16(GPUEngineCompositorI
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555XTo666X_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo666X_AVX2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo666x_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo666x_AVX2<false>(src1, src32[2], src32[3]);
}
else
{
ColorspaceConvert555XTo888X_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo888X_AVX2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo888x_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo888x_AVX2<false>(src1, src32[2], src32[3]);
}
passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8);
@ -1674,13 +1674,13 @@ FORCEINLINE void PixelOperation_AVX2::_unknownEffectMask16(GPUEngineCompositorIn
}
else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555XTo666X_AVX2<false>(src0, tmpSrc[0], tmpSrc[1]);
ColorspaceConvert555XTo666X_AVX2<false>(src1, tmpSrc[2], tmpSrc[3]);
ColorspaceConvert555xTo666x_AVX2<false>(src0, tmpSrc[0], tmpSrc[1]);
ColorspaceConvert555xTo666x_AVX2<false>(src1, tmpSrc[2], tmpSrc[3]);
}
else
{
ColorspaceConvert555XTo888X_AVX2<false>(src0, tmpSrc[0], tmpSrc[1]);
ColorspaceConvert555XTo888X_AVX2<false>(src1, tmpSrc[2], tmpSrc[3]);
ColorspaceConvert555xTo888x_AVX2<false>(src0, tmpSrc[0], tmpSrc[1]);
ColorspaceConvert555xTo888x_AVX2<false>(src1, tmpSrc[2], tmpSrc[3]);
}
switch (compInfo.renderState.colorEffect)

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2021-2023 DeSmuME team
Copyright (C) 2021-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -922,13 +922,13 @@ FORCEINLINE void PixelOperation_SSE2::_copy16(GPUEngineCompositorInfo &compInfo,
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555To6665Opaque_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo6665Opaque_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo6665Opaque_SSE2<false>(src1, src32[2], src32[3]);
}
else
{
ColorspaceConvert555To8888Opaque_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo8888Opaque_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo8888Opaque_SSE2<false>(src1, src32[2], src32[3]);
}
_mm_store_si128( (v128u32 *)compInfo.target.lineColor32 + 0, src32[0] );
@ -999,13 +999,13 @@ FORCEINLINE void PixelOperation_SSE2::_copyMask16(GPUEngineCompositorInfo &compI
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555To6665Opaque_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo6665Opaque_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo6665Opaque_SSE2<false>(src1, src32[2], src32[3]);
}
else
{
ColorspaceConvert555To8888Opaque_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo8888Opaque_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo8888Opaque_SSE2<false>(src1, src32[2], src32[3]);
}
const v128u32 dst32[4] = {
@ -1104,13 +1104,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessUp16(GPUEngineCompositorInfo &c
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555XTo666X_SSE2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo666X_SSE2<false>(src1, dst[2], dst[3]);
ColorspaceConvert555xTo666x_SSE2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555xTo666x_SSE2<false>(src1, dst[2], dst[3]);
}
else
{
ColorspaceConvert555XTo888X_SSE2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo888X_SSE2<false>(src1, dst[2], dst[3]);
ColorspaceConvert555xTo888x_SSE2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555xTo888x_SSE2<false>(src1, dst[2], dst[3]);
}
const v128u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm_set1_epi32(0x1F000000) : _mm_set1_epi32(0xFF000000);
@ -1182,13 +1182,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessUpMask16(GPUEngineCompositorInf
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555XTo666X_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo666X_SSE2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo666x_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo666x_SSE2<false>(src1, src32[2], src32[3]);
}
else
{
ColorspaceConvert555XTo888X_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo888X_SSE2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo888x_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo888x_SSE2<false>(src1, src32[2], src32[3]);
}
const v128u32 dst32[4] = {
@ -1275,13 +1275,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessDown16(GPUEngineCompositorInfo
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555XTo666X_SSE2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo666X_SSE2<false>(src1, dst[2], dst[3]);
ColorspaceConvert555xTo666x_SSE2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555xTo666x_SSE2<false>(src1, dst[2], dst[3]);
}
else
{
ColorspaceConvert555XTo888X_SSE2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo888X_SSE2<false>(src1, dst[2], dst[3]);
ColorspaceConvert555xTo888x_SSE2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555xTo888x_SSE2<false>(src1, dst[2], dst[3]);
}
const v128u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm_set1_epi32(0x1F000000) : _mm_set1_epi32(0xFF000000);
@ -1353,13 +1353,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessDownMask16(GPUEngineCompositorI
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555XTo666X_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo666X_SSE2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo666x_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo666x_SSE2<false>(src1, src32[2], src32[3]);
}
else
{
ColorspaceConvert555XTo888X_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo888X_SSE2<false>(src1, src32[2], src32[3]);
ColorspaceConvert555xTo888x_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555xTo888x_SSE2<false>(src1, src32[2], src32[3]);
}
const v128u32 dst32[4] = {
@ -1494,13 +1494,13 @@ FORCEINLINE void PixelOperation_SSE2::_unknownEffectMask16(GPUEngineCompositorIn
}
else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvert555XTo666X_SSE2<false>(src0, tmpSrc[0], tmpSrc[1]);
ColorspaceConvert555XTo666X_SSE2<false>(src1, tmpSrc[2], tmpSrc[3]);
ColorspaceConvert555xTo666x_SSE2<false>(src0, tmpSrc[0], tmpSrc[1]);
ColorspaceConvert555xTo666x_SSE2<false>(src1, tmpSrc[2], tmpSrc[3]);
}
else
{
ColorspaceConvert555XTo888X_SSE2<false>(src0, tmpSrc[0], tmpSrc[1]);
ColorspaceConvert555XTo888X_SSE2<false>(src1, tmpSrc[2], tmpSrc[3]);
ColorspaceConvert555xTo888x_SSE2<false>(src0, tmpSrc[0], tmpSrc[1]);
ColorspaceConvert555xTo888x_SSE2<false>(src1, tmpSrc[2], tmpSrc[3]);
}
switch (compInfo.renderState.colorEffect)

View File

@ -484,7 +484,7 @@ void ClientAVCaptureObject::ConvertVideoSlice555Xto888(const VideoConvertParam &
const u16 *__restrict src = (const u16 *__restrict)param.src;
u8 *__restrict dst = param.dst;
ColorspaceConvertBuffer555XTo888<false, false>(src, dst, param.frameWidth * lineCount);
ColorspaceConvertBuffer555xTo888<false, false>(src, dst, param.frameWidth * lineCount);
}
//converts 32bpp to 24bpp and flips
@ -494,7 +494,7 @@ void ClientAVCaptureObject::ConvertVideoSlice888Xto888(const VideoConvertParam &
const u32 *__restrict src = (const u32 *__restrict)param.src;
u8 *__restrict dst = param.dst;
ColorspaceConvertBuffer888XTo888<false, false>(src, dst, param.frameWidth * lineCount);
ColorspaceConvertBuffer888xTo888<false, false>(src, dst, param.frameWidth * lineCount);
}
void ClientAVCaptureObject::CaptureVideoFrame(const void *srcVideoFrame, const size_t inFrameWidth, const size_t inFrameHeight, const NDSColorFormat colorFormat)

6
desmume/src/frontend/cocoa/OGLDisplayOutput.cpp Executable file → Normal file
View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2014-2023 DeSmuME team
Copyright (C) 2014-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -4719,7 +4719,7 @@ void OGLClientSharedData::FetchNativeDisplayToSrcClone(const NDSDisplayInfo *dis
return;
}
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapNone>(displayInfoList[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapNone>(displayInfoList[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
this->_srcCloneNeedsUpdate[displayID][bufferIndex] = false;
if (needsLock)
@ -4744,7 +4744,7 @@ void OGLClientSharedData::FetchCustomDisplayToSrcClone(const NDSDisplayInfo *dis
return;
}
ColorspaceConvertBuffer888XTo8888Opaque<false, false>((u32 *)displayInfoList[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer888xTo8888Opaque<false, false>((u32 *)displayInfoList[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
this->_srcCloneNeedsUpdate[displayID][bufferIndex] = false;
if (needsLock)

View File

@ -1,6 +1,6 @@
/*
Copyright (C) 2011 Roger Manuel
Copyright (C) 2011-2022 DeSmuME team
Copyright (C) 2011-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -674,7 +674,7 @@ void RomIconToRGBA8888(uint32_t *bitmapData)
// The first entry always represents the alpha, so just set it to 0.
const uint16_t *clut4 = (uint16_t *)ndsRomBanner.palette;
CACHE_ALIGN uint32_t clut32[16];
ColorspaceConvertBuffer555To8888Opaque<false, true, BESwapNone>(clut4, clut32, 16);
ColorspaceConvertBuffer555xTo8888Opaque<false, true, BESwapNone>(clut4, clut32, 16);
clut32[0] = 0x00000000;
// Load the image from the icon pixel data.

View File

@ -1,6 +1,6 @@
/*
Copyright (C) 2011 Roger Manuel
Copyright (C) 2013 DeSmuME team
Copyright (C) 2013-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -139,7 +139,7 @@
}
uint32_t *bitmapData = (uint32_t *)[imageRep bitmapData];
ColorspaceConvertBuffer888XTo8888Opaque<false, true>((const uint32_t *)[self runFilter], bitmapData, w * h);
ColorspaceConvertBuffer888xTo8888Opaque<false, true>((const uint32_t *)[self runFilter], bitmapData, w * h);
#ifdef MSB_FIRST
for (size_t i = 0; i < w * h; i++)

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2017-2023 DeSmuME team
Copyright (C) 2017-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -2556,7 +2556,7 @@ void MacMetalFetchObject::_FetchNativeDisplayByID(const NDSDisplayID displayID,
GPU->PostprocessDisplay(displayID, this->_fetchDisplayInfo[bufferIndex]);
pthread_rwlock_wrlock(&this->_srcCloneRWLock[displayID][bufferIndex]);
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(this->_fetchDisplayInfo[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(this->_fetchDisplayInfo[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
pthread_rwlock_unlock(&this->_srcCloneRWLock[displayID][bufferIndex]);
}
@ -2570,7 +2570,7 @@ void MacMetalFetchObject::_FetchCustomDisplayByID(const NDSDisplayID displayID,
GPU->PostprocessDisplay(displayID, this->_fetchDisplayInfo[bufferIndex]);
pthread_rwlock_wrlock(&this->_srcCloneRWLock[displayID][bufferIndex]);
ColorspaceConvertBuffer888XTo8888Opaque<false, false>((u32 *)this->_fetchDisplayInfo[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer888xTo8888Opaque<false, false>((u32 *)this->_fetchDisplayInfo[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
pthread_rwlock_unlock(&this->_srcCloneRWLock[displayID][bufferIndex]);
}

View File

@ -1380,7 +1380,7 @@ static int ConfigureDrawingArea(GtkWidget *widget, GdkEventConfigure *event, gpo
static inline void gpu_screen_to_rgb(u32* dst)
{
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16,
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16,
dst, real_framebuffer_width * real_framebuffer_height * 2);
}
@ -1591,7 +1591,7 @@ static gboolean ExposeDrawingArea (GtkWidget *widget, GdkEventExpose *event, gpo
}
static void RedrawScreen() {
ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapDst>(
ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapDst>(
GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16,
(uint32_t *)video->GetSrcBufferPtr(), real_framebuffer_width * real_framebuffer_height * 2);
#ifdef HAVE_LIBAGG

View File

@ -1666,7 +1666,7 @@ static int ConfigureDrawingArea(GtkWidget *widget, GdkEventConfigure *event, gpo
static inline void gpu_screen_to_rgb(u32* dst)
{
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(GPU->GetDisplayInfo().masterNativeBuffer16, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(GPU->GetDisplayInfo().masterNativeBuffer16, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2);
}
static inline void drawScreen(cairo_t* cr, u32* buf, gint w, gint h) {
@ -1791,7 +1791,7 @@ static gboolean ExposeDrawingArea (GtkWidget *widget, GdkEventExpose *event, gpo
}
static void RedrawScreen() {
ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapDst>(GPU->GetDisplayInfo().masterNativeBuffer16, (uint32_t *)video->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2);
ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapDst>(GPU->GetDisplayInfo().masterNativeBuffer16, (uint32_t *)video->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2);
#ifdef HAVE_LIBAGG
aggDraw.hud->attach((u8*)video->GetSrcBufferPtr(), 256, 384, 1024);
osd->update();

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2006-2018 DeSmuME team
Copyright (C) 2006-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -736,7 +736,7 @@ void NDSCaptureObject::ConvertVideoSlice555Xto888(const VideoConvertParam &param
for (size_t y = param.firstLineIndex; y <= param.lastLineIndex; y++)
{
ColorspaceConvertBuffer555XTo888<true, false>(src, dst, param.frameWidth);
ColorspaceConvertBuffer555xTo888<true, false>(src, dst, param.frameWidth);
src += param.frameWidth;
dst -= param.frameWidth * 3;
}
@ -750,7 +750,7 @@ void NDSCaptureObject::ConvertVideoSlice888Xto888(const VideoConvertParam &param
for (size_t y = param.firstLineIndex; y <= param.lastLineIndex; y++)
{
ColorspaceConvertBuffer888XTo888<true, false>(src, dst, param.frameWidth);
ColorspaceConvertBuffer888xTo888<true, false>(src, dst, param.frameWidth);
src += param.frameWidth;
dst -= param.frameWidth * 3;
}

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2018 DeSmuME team
Copyright (C) 2018-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -683,9 +683,9 @@ void DoDisplay()
//we have to do a copy here because we're about to draw the OSD onto it. bummer.
if (gpu_bpp == 15)
ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapNone>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / 2);
ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapNone>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / 2);
else
ColorspaceConvertBuffer888XTo8888Opaque<true, false>((u32*)video.srcBuffer, video.buffer, video.srcBufferSize / 4);
ColorspaceConvertBuffer888xTo8888Opaque<true, false>((u32*)video.srcBuffer, video.buffer, video.srcBufferSize / 4);
//some games use the backlight for fading effects
const size_t pixCount = video.prefilterWidth * video.prefilterHeight / 2;

View File

@ -3,7 +3,7 @@
licensed under the terms supplied at the end of this file (for the terms are very long!)
Differences from that baseline version are:
Copyright (C) 2009-2019 DeSmuME team
Copyright (C) 2009-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -174,7 +174,7 @@ static void DoScreenshot(const char* fname)
else
{
u32* swapbuf = (u32*)malloc_alignedCacheLine(dispInfo.customWidth * dispInfo.customHeight * 2 * 4);
ColorspaceConvertBuffer888XTo8888Opaque<true, true>((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2);
ColorspaceConvertBuffer888xTo8888Opaque<true, true>((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2);
NDS_WritePNG_32bppBuffer(dispInfo.customWidth, dispInfo.customHeight*2, swapbuf, fname);
free_aligned(swapbuf);
}
@ -189,7 +189,7 @@ static void DoScreenshot(const char* fname)
else
{
u32* swapbuf = (u32*)malloc_alignedCacheLine(dispInfo.customWidth * dispInfo.customHeight * 2 * 4);
ColorspaceConvertBuffer888XTo8888Opaque<true, true>((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2);
ColorspaceConvertBuffer888xTo8888Opaque<true, true>((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2);
NDS_WriteBMP_32bppBuffer(dispInfo.customWidth, dispInfo.customHeight *2, swapbuf, fname);
free_aligned(swapbuf);
}

View File

@ -3441,7 +3441,7 @@ void ScreenshotToClipboard(bool extraInfo)
else
{
u32* swapbuf = (u32*)malloc_alignedPage(width*height * 4);
ColorspaceConvertBuffer888XTo8888Opaque<true, false>((const u32*)dispInfo.masterCustomBuffer, swapbuf, width * height);
ColorspaceConvertBuffer888xTo8888Opaque<true, false>((const u32*)dispInfo.masterCustomBuffer, swapbuf, width * height);
SetDIBitsToDevice(hMemDC, 0, 0, width, height, 0, 0, 0, height, swapbuf, (BITMAPINFO*)&bmi, DIB_RGB_COLORS);

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2009-2023 DeSmuME team
Copyright (C) 2009-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -2032,7 +2032,7 @@ Render3DError SoftRasterizerRenderer::BeginRender(const GFX3D_State &renderState
}
// Convert the toon table colors
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>(renderState.toonTable16, (u32 *)this->toonColor32LUT, 32);
ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapDst>(renderState.toonTable16, (u32 *)this->toonColor32LUT, 32);
if (this->_enableEdgeMark)
{

View File

@ -1,7 +1,7 @@
/*
Copyright (C) 2006 yopyop
Copyright (C) 2006-2007 shash
Copyright (C) 2008-2023 DeSmuME team
Copyright (C) 2008-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -867,13 +867,13 @@ void __NDSTextureUnpackI2_AVX2(const size_t texelCount, const u8 *__restrict src
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555xTo6665Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555xTo6665Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
else
{
ColorspaceConvert555To8888Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555xTo8888Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555xTo8888Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
// Set converted colors to 0 if the palette index is 0.
@ -923,13 +923,13 @@ void __NDSTextureUnpackI2_SSSE3(const size_t texelCount, const u8 *__restrict sr
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555xTo6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555xTo6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
else
{
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555xTo8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555xTo8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
// Set converted colors to 0 if the palette index is 0.
@ -977,13 +977,13 @@ void __NDSTextureUnpackI2_NEON(const size_t texelCount, const u8 *__restrict src
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To6665Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]);
ColorspaceConvert555xTo6665Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555xTo6665Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]);
}
else
{
ColorspaceConvert555To8888Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To8888Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]);
ColorspaceConvert555xTo8888Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555xTo8888Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]);
}
// Set converted colors to 0 if the palette index is 0.
@ -1028,13 +1028,13 @@ void __NDSTextureUnpackI2_AltiVec(const size_t texelCount, const u8 *__restrict
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]);
ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]);
ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]);
}
else
{
ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]);
ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]);
ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]);
}
// Set converted colors to 0 if the palette index is 0.
@ -1146,13 +1146,13 @@ void __NDSTextureUnpackI4_AVX2(const size_t texelCount, const u8 *__restrict src
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555xTo6665Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555xTo6665Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
else
{
ColorspaceConvert555To8888Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555xTo8888Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555xTo8888Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
// Set converted colors to 0 if the palette index is 0.
@ -1208,13 +1208,13 @@ void __NDSTextureUnpackI4_SSSE3(const size_t texelCount, const u8 *__restrict sr
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555xTo6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555xTo6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
else
{
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555xTo8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555xTo8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
// Set converted colors to 0 if the palette index is 0.
@ -1261,13 +1261,13 @@ void __NDSTextureUnpackI4_NEON(const size_t texelCount, const u8 *__restrict src
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To6665Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]);
ColorspaceConvert555xTo6665Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555xTo6665Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]);
}
else
{
ColorspaceConvert555To8888Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To8888Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]);
ColorspaceConvert555xTo8888Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555xTo8888Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]);
}
// Set converted colors to 0 if the palette index is 0.
@ -1312,13 +1312,13 @@ void __NDSTextureUnpackI4_AltiVec(const size_t texelCount, const u8 *__restrict
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]);
ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]);
ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]);
}
else
{
ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]);
ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]);
ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]);
}
// Set converted colors to 0 if the palette index is 0.
@ -1434,13 +1434,13 @@ void __NDSTextureUnpackA3I5_NEON(const size_t texelCount, const u8 *__restrict s
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To6665_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]);
ColorspaceConvert555aTo6665_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555aTo6665_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]);
}
else
{
ColorspaceConvert555To8888_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To8888_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]);
ColorspaceConvert555aTo8888_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555aTo8888_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]);
}
vst1q_u32_x4(dstBuffer + i, convertedColor);
@ -1486,13 +1486,13 @@ void __NDSTextureUnpackA3I5_AltiVec(const size_t texelCount, const u8 *__restric
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To6665_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
ColorspaceConvert555aTo6665_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555aTo6665_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
}
else
{
ColorspaceConvert555To8888_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To8888_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
ColorspaceConvert555aTo8888_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555aTo8888_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
}
vec_st(convertedColor[0], 0, dstBuffer);
@ -1566,8 +1566,8 @@ void __NDSTextureUnpackA5I3_AVX2(const size_t texelCount, const u8 *__restrict s
const v256u16 alphaLo = _mm256_unpacklo_epi8(_mm256_setzero_si256(), alpha);
const v256u16 alphaHi = _mm256_unpackhi_epi8(_mm256_setzero_si256(), alpha);
ColorspaceConvert555To6665_AVX2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665_AVX2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]);
ColorspaceConvert555aTo6665_AVX2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]);
ColorspaceConvert555aTo6665_AVX2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]);
}
else
{
@ -1577,8 +1577,8 @@ void __NDSTextureUnpackA5I3_AVX2(const size_t texelCount, const u8 *__restrict s
const v256u16 alphaLo = _mm256_unpacklo_epi8(_mm256_setzero_si256(), alpha);
const v256u16 alphaHi = _mm256_unpackhi_epi8(_mm256_setzero_si256(), alpha);
ColorspaceConvert555To8888_AVX2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888_AVX2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]);
ColorspaceConvert555aTo8888_AVX2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]);
ColorspaceConvert555aTo8888_AVX2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]);
}
_mm256_store_si256((v256u32 *)dstBuffer + 0, convertedColor[0]);
@ -1615,8 +1615,8 @@ void __NDSTextureUnpackA5I3_SSSE3(const size_t texelCount, const u8 *__restrict
const v128u16 alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha);
const v128u16 alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha);
ColorspaceConvert555To6665_SSE2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665_SSE2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]);
ColorspaceConvert555aTo6665_SSE2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]);
ColorspaceConvert555aTo6665_SSE2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]);
}
else
{
@ -1624,8 +1624,8 @@ void __NDSTextureUnpackA5I3_SSSE3(const size_t texelCount, const u8 *__restrict
const v128u16 alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha);
const v128u16 alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha);
ColorspaceConvert555To8888_SSE2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888_SSE2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]);
ColorspaceConvert555aTo8888_SSE2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]);
ColorspaceConvert555aTo8888_SSE2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]);
}
_mm_store_si128((v128u32 *)(dstBuffer + i) + 0, convertedColor[0]);
@ -1661,8 +1661,8 @@ void __NDSTextureUnpackA5I3_NEON(const size_t texelCount, const u8 *__restrict s
const v128u16 alphaLo = vreinterpretq_u16_u8( vzip1q_u8(vdupq_n_u8(0), alpha) );
const v128u16 alphaHi = vreinterpretq_u16_u8( vzip2q_u8(vdupq_n_u8(0), alpha) );
ColorspaceConvert555To6665_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To6665_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]);
ColorspaceConvert555aTo6665_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555aTo6665_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]);
}
else
{
@ -1670,8 +1670,8 @@ void __NDSTextureUnpackA5I3_NEON(const size_t texelCount, const u8 *__restrict s
const v128u16 alphaLo = vreinterpretq_u16_u8( vzip1q_u8(vdupq_n_u8(0), alpha) );
const v128u16 alphaHi = vreinterpretq_u16_u8( vzip2q_u8(vdupq_n_u8(0), alpha) );
ColorspaceConvert555To8888_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To8888_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]);
ColorspaceConvert555aTo8888_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555aTo8888_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]);
}
vst1q_u32_x4(dstBuffer + i, convertedColor);
@ -1707,8 +1707,8 @@ void __NDSTextureUnpackA5I3_AltiVec(const size_t texelCount, const u8 *__restric
const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) );
const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) );
ColorspaceConvert555To6665_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To6665_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
ColorspaceConvert555aTo6665_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555aTo6665_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
}
else
{
@ -1716,8 +1716,8 @@ void __NDSTextureUnpackA5I3_AltiVec(const size_t texelCount, const u8 *__restric
const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) );
const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) );
ColorspaceConvert555To8888_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To8888_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
ColorspaceConvert555aTo8888_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555aTo8888_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
}
vec_st(convertedColor[0], 0, dstBuffer);
@ -1900,11 +1900,11 @@ void __NDSTextureUnpackDirect16Bit_AVX2(const size_t texelCount, const u16 *__re
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665Opaque_AVX2<false>(c, convertedColor[0], convertedColor[1]);
ColorspaceConvert555xTo6665Opaque_AVX2<false>(c, convertedColor[0], convertedColor[1]);
}
else
{
ColorspaceConvert555To8888Opaque_AVX2<false>(c, convertedColor[0], convertedColor[1]);
ColorspaceConvert555xTo8888Opaque_AVX2<false>(c, convertedColor[0], convertedColor[1]);
}
v256u16 alpha = _mm256_cmpeq_epi16(_mm256_srli_epi16(c, 15), _mm256_set1_epi16(1));
@ -1930,11 +1930,11 @@ void __NDSTextureUnpackDirect16Bit_SSE2(const size_t texelCount, const u16 *__re
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665Opaque_SSE2<false>(c, convertedColor[0], convertedColor[1]);
ColorspaceConvert555xTo6665Opaque_SSE2<false>(c, convertedColor[0], convertedColor[1]);
}
else
{
ColorspaceConvert555To8888Opaque_SSE2<false>(c, convertedColor[0], convertedColor[1]);
ColorspaceConvert555xTo8888Opaque_SSE2<false>(c, convertedColor[0], convertedColor[1]);
}
const v128u16 alpha = _mm_cmpeq_epi16(_mm_srli_epi16(c, 15), _mm_set1_epi16(1));
@ -1959,11 +1959,11 @@ void __NDSTextureUnpackDirect16Bit_NEON(const size_t texelCount, const u16 *__re
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665Opaque_NEON<false>(c, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555xTo6665Opaque_NEON<false>(c, convertedColor.val[0], convertedColor.val[1]);
}
else
{
ColorspaceConvert555To8888Opaque_NEON<false>(c, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555xTo8888Opaque_NEON<false>(c, convertedColor.val[0], convertedColor.val[1]);
}
const v128u16 alpha = vceqq_u16(vshrq_n_u16(c,15), vdupq_n_u16(1));
@ -1987,11 +1987,11 @@ void __NDSTextureUnpackDirect16Bit_AltiVec(const size_t texelCount, const u16 *_
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapSrcDst>(c, convertedColor[1], convertedColor[0]);
ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapSrcDst>(c, convertedColor[1], convertedColor[0]);
}
else
{
ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapSrcDst>(c, convertedColor[1], convertedColor[0]);
ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapSrcDst>(c, convertedColor[1], convertedColor[0]);
}
const v128u16 alpha = vec_and(c, ((v128u16){0x0080,0x0080,0x0080,0x0080,0x0080,0x0080,0x0080,0x0080}));

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2023 DeSmuME team
Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -187,7 +187,7 @@ void ColorspaceHandlerInit()
}
template <bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP>
void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
@ -198,22 +198,22 @@ void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__re
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
i = csh.ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To8888Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCountVector);
i = csh.ConvertBuffer555xTo8888Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To8888Opaque_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
i = csh.ConvertBuffer555xTo8888Opaque_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To8888Opaque<BE_BYTESWAP>(src, dst, pixCountVector);
i = csh.ConvertBuffer555xTo8888Opaque<BE_BYTESWAP>(src, dst, pixCountVector);
}
}
@ -243,7 +243,7 @@ void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__re
}
template <bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP>
void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
@ -254,22 +254,22 @@ void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__re
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
i = csh.ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To6665Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCountVector);
i = csh.ConvertBuffer555xTo6665Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To6665Opaque_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
i = csh.ConvertBuffer555xTo6665Opaque_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To6665Opaque<BE_BYTESWAP>(src, dst, pixCountVector);
i = csh.ConvertBuffer555xTo6665Opaque<BE_BYTESWAP>(src, dst, pixCountVector);
}
}
@ -298,6 +298,119 @@ void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__re
}
}
template <bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP>
void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
const size_t pixCountVector = pixCount - (pixCount % (VECTORSIZE / sizeof(u16)));
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer5551To8888_SwapRB_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer5551To8888_SwapRB<BE_BYTESWAP>(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer5551To8888_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer5551To8888<BE_BYTESWAP>(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
switch (BE_BYTESWAP)
{
case BESwapNone:
dst[i] = ColorspaceConvert5551To8888<SWAP_RB>(src[i]);
break;
case BESwapIn:
dst[i] = ColorspaceConvert5551To8888<SWAP_RB>(LE_TO_LOCAL_16(src[i]));
break;
case BESwapOut:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888<SWAP_RB>(src[i]) );
break;
case BESwapInOut:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888<SWAP_RB>(LE_TO_LOCAL_16(src[i])) );
break;
}
}
}
template <bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP>
void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
const size_t pixCountVector = pixCount - (pixCount % (VECTORSIZE / sizeof(u16)));
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer5551To6665_SwapRB_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer5551To6665_SwapRB<BE_BYTESWAP>(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer5551To6665_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer5551To6665<BE_BYTESWAP>(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
switch (BE_BYTESWAP)
{
case BESwapNone:
dst[i] = ColorspaceConvert5551To6665<SWAP_RB>(src[i]);
break;
case BESwapIn:
dst[i] = ColorspaceConvert5551To6665<SWAP_RB>(LE_TO_LOCAL_16(src[i]));
break;
case BESwapOut:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665<SWAP_RB>(src[i]) );
break;
case BESwapInOut:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665<SWAP_RB>(LE_TO_LOCAL_16(src[i])) );
break;
}
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
{
@ -455,7 +568,7 @@ void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restric
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount)
void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
@ -466,22 +579,22 @@ void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pi
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
i = csh.ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer888XTo8888Opaque_SwapRB(src, dst, pixCountVector);
i = csh.ConvertBuffer888xTo8888Opaque_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer888XTo8888Opaque_IsUnaligned(src, dst, pixCountVector);
i = csh.ConvertBuffer888xTo8888Opaque_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer888XTo8888Opaque(src, dst, pixCountVector);
i = csh.ConvertBuffer888xTo8888Opaque(src, dst, pixCountVector);
}
}
@ -494,7 +607,7 @@ void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pi
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount)
void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount)
{
size_t i = 0;
@ -505,22 +618,22 @@ void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555XTo888_SwapRB_IsUnaligned(src, dst, pixCountVector);
i = csh.ConvertBuffer555xTo888_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555XTo888_SwapRB(src, dst, pixCountVector);
i = csh.ConvertBuffer555xTo888_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555XTo888_IsUnaligned(src, dst, pixCountVector);
i = csh.ConvertBuffer555xTo888_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555XTo888(src, dst, pixCountVector);
i = csh.ConvertBuffer555xTo888(src, dst, pixCountVector);
}
}
@ -533,7 +646,7 @@ void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount)
void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount)
{
size_t i = 0;
@ -544,22 +657,22 @@ void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer888XTo888_SwapRB_IsUnaligned(src, dst, pixCountVector);
i = csh.ConvertBuffer888xTo888_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer888XTo888_SwapRB(src, dst, pixCountVector);
i = csh.ConvertBuffer888xTo888_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer888XTo888_IsUnaligned(src, dst, pixCountVector);
i = csh.ConvertBuffer888xTo888_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer888XTo888(src, dst, pixCountVector);
i = csh.ConvertBuffer888xTo888(src, dst, pixCountVector);
}
}
@ -811,7 +924,7 @@ void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensi
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
@ -841,7 +954,7 @@ size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
@ -871,19 +984,19 @@ size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restr
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque<BE_BYTESWAP>(src, dst, pixCount);
return this->ColorspaceHandler::ConvertBuffer555xTo8888Opaque<BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCount);
return this->ColorspaceHandler::ConvertBuffer555xTo8888Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
@ -913,7 +1026,7 @@ size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
@ -943,15 +1056,159 @@ size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restr
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque<BE_BYTESWAP>(src, dst, pixCount);
return this->ColorspaceHandler::ConvertBuffer555xTo6665Opaque<BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCount);
return this->ColorspaceHandler::ConvertBuffer555xTo6665Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
switch (BE_BYTESWAP)
{
case BESwapNone:
dst[i] = ColorspaceConvert5551To8888<false>(src[i]);
break;
case BESwapSrc:
dst[i] = ColorspaceConvert5551To8888<false>(LE_TO_LOCAL_16(src[i]));
break;
case BESwapDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888<false>(src[i]) );
break;
case BESwapSrcDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888<false>(LE_TO_LOCAL_16(src[i])) );
break;
}
}
return i;
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
switch (BE_BYTESWAP)
{
case BESwapNone:
dst[i] = ColorspaceConvert5551To8888<true>(src[i]);
break;
case BESwapSrc:
dst[i] = ColorspaceConvert5551To8888<true>(LE_TO_LOCAL_16(src[i]));
break;
case BESwapDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888<true>(src[i]) );
break;
case BESwapSrcDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888<true>(LE_TO_LOCAL_16(src[i])) );
break;
}
}
return i;
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer5551To8888<BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer5551To8888_SwapRB<BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
switch (BE_BYTESWAP)
{
case BESwapNone:
dst[i] = ColorspaceConvert5551To6665<false>(src[i]);
break;
case BESwapSrc:
dst[i] = ColorspaceConvert5551To6665<false>(LE_TO_LOCAL_16(src[i]));
break;
case BESwapDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665<false>(src[i]) );
break;
case BESwapSrcDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665<false>(LE_TO_LOCAL_16(src[i])) );
break;
}
}
return i;
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
switch (BE_BYTESWAP)
{
case BESwapNone:
dst[i] = ColorspaceConvert5551To6665<true>(src[i]);
break;
case BESwapSrc:
dst[i] = ColorspaceConvert5551To6665<true>(LE_TO_LOCAL_16(src[i]));
break;
case BESwapDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665<true>(src[i]) );
break;
case BESwapSrcDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665<true>(LE_TO_LOCAL_16(src[i])) );
break;
}
}
return i;
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer5551To6665<BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer5551To6665_SwapRB<BE_BYTESWAP>(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
@ -1090,7 +1347,7 @@ size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *
return this->ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
@ -1102,7 +1359,7 @@ size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst
return i;
}
size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
@ -1114,17 +1371,17 @@ size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u
return i;
}
size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ConvertBuffer888XTo8888Opaque(src, dst, pixCount);
return this->ConvertBuffer888xTo8888Opaque(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ConvertBuffer888XTo8888Opaque_SwapRB(src, dst, pixCount);
return this->ConvertBuffer888xTo8888Opaque_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
@ -1136,7 +1393,7 @@ size_t ColorspaceHandler::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *
return i;
}
size_t ColorspaceHandler::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
@ -1148,17 +1405,17 @@ size_t ColorspaceHandler::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict sr
return i;
}
size_t ColorspaceHandler::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return this->ConvertBuffer555XTo888(src, dst, pixCount);
return this->ConvertBuffer555xTo888(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return this->ConvertBuffer555XTo888_SwapRB(src, dst, pixCount);
return this->ConvertBuffer555xTo888_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
@ -1170,7 +1427,7 @@ size_t ColorspaceHandler::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *
return i;
}
size_t ColorspaceHandler::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
@ -1182,14 +1439,14 @@ size_t ColorspaceHandler::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict sr
return i;
}
size_t ColorspaceHandler::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return this->ConvertBuffer888XTo888(src, dst, pixCount);
return this->ConvertBuffer888xTo888(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return this->ConvertBuffer888XTo888_SwapRB(src, dst, pixCount);
return this->ConvertBuffer888xTo888_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const
@ -1396,39 +1653,73 @@ size_t ColorspaceHandler::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst,
return this->ApplyIntensityToBuffer32_SwapRB(dst, pixCount, intensity);
}
template void ColorspaceConvertBuffer555To8888Opaque<true, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<true, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<false, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<true, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<false, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<true, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<false, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<true, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<false, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<true, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<true, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<false, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<true, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<true, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<false, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<true, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<true, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<false, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<true, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<true, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<false, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<true, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<true, false>(const u32 *src, u32 *dst, size_t pixCount);
@ -1450,20 +1741,20 @@ template void ColorspaceConvertBuffer6665To5551<true, false>(const u32 *__restri
template void ColorspaceConvertBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo8888Opaque<true, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo8888Opaque<true, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo8888Opaque<false, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo8888Opaque<false, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888xTo8888Opaque<true, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888xTo8888Opaque<true, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888xTo8888Opaque<false, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888xTo8888Opaque<false, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer555XTo888<true, true>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555XTo888<true, false>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555XTo888<false, true>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555XTo888<false, false>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo888<true, true>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo888<true, false>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo888<false, true>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555xTo888<false, false>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo888<true, true>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo888<true, false>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo888<false, true>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo888<false, false>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888xTo888<true, true>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888xTo888<true, false>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888xTo888<false, true>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888xTo888<false, false>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceCopyBuffer16<true, true>(const u16 *src, u16 *dst, size_t pixCount);
template void ColorspaceCopyBuffer16<true, false>(const u16 *src, u16 *dst, size_t pixCount);

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2023 DeSmuME team
Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -126,6 +126,26 @@ FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src)
return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert5551To8888(const u16 src)
{
Color4u8 outColor;
outColor.value = (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF);
outColor.a = (src & 0x8000) ? 0xFF : 0x00;
return outColor.value;
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert5551To6665(const u16 src)
{
Color4u8 outColor;
outColor.value = (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
outColor.a = (src & 0x8000) ? 0x1F : 0x00;
return outColor.value;
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert8888To6665(Color4u8 srcColor)
{
@ -331,16 +351,18 @@ FORCEINLINE u32 ColorspaceApplyIntensity32(u32 srcColor, float intensity)
return ColorspaceApplyIntensity32<SWAP_RB>(srcColorComponent);
}
template<bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceCopyBuffer16(const u16 *src, u16 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceCopyBuffer32(const u32 *src, u32 *dst, size_t pixCount);
@ -353,15 +375,25 @@ class ColorspaceHandler
public:
ColorspaceHandler() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
@ -383,20 +415,20 @@ public:
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const;

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2021 DeSmuME team
Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -25,7 +25,7 @@
#include <immintrin.h>
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi)
FORCEINLINE void ColorspaceConvert555aTo8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -64,7 +64,7 @@ FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo888x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -101,7 +101,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi)
FORCEINLINE void ColorspaceConvert555aTo6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -141,7 +141,7 @@ FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo666x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -178,17 +178,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
const v256u16 srcAlphaBits16 = _mm256_set1_epi16(0xFF00);
ColorspaceConvert555To8888_AVX2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
ColorspaceConvert555aTo8888_AVX2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
const v256u16 srcAlphaBits16 = _mm256_set1_epi16(0x1F00);
ColorspaceConvert555To6665_AVX2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
ColorspaceConvert555aTo6665_AVX2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To8888_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
const v256u16 srcAlphaBits16 = _mm256_and_si256( _mm256_cmpgt_epi16(srcColor, _mm256_set1_epi16(0xFFFF)), _mm256_set1_epi16(0xFF00) );
ColorspaceConvert555aTo8888_AVX2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To6665_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
const v256u16 srcAlphaBits16 = _mm256_and_si256( _mm256_cmpgt_epi16(srcColor, _mm256_set1_epi16(0xFFFF)), _mm256_set1_epi16(0x1F00) );
ColorspaceConvert555aTo6665_AVX2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
@ -320,7 +334,7 @@ FORCEINLINE v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const
}
template <bool SWAP_RB>
FORCEINLINE v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src)
FORCEINLINE v256u32 ColorspaceConvert888xTo8888Opaque_AVX2(const v256u32 &src)
{
if (SWAP_RB)
{
@ -407,7 +421,7 @@ FORCEINLINE v256u32 ColorspaceApplyIntensity32_AVX2(const v256u32 &src, float in
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256)
static size_t ColorspaceConvertBuffer555xTo8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256)
{
size_t i = 0;
@ -415,7 +429,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict
{
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
ColorspaceConvert555xTo8888Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
@ -433,7 +447,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256)
size_t ColorspaceConvertBuffer555xTo6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
@ -441,7 +455,59 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u3
{
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
ColorspaceConvert555xTo6665Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi);
}
else
{
_mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer5551To8888_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=(sizeof(v256u16)/sizeof(u16)))
{
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To8888_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi);
}
else
{
_mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer5551To6665_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=(sizeof(v256u16)/sizeof(u16)))
{
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To6665_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
@ -539,7 +605,7 @@ size_t ColorspaceConvertBuffer6665To5551_AVX2(const u32 *__restrict src, u16 *__
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
size_t ColorspaceConvertBuffer888xTo8888Opaque_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
{
size_t i = 0;
@ -547,11 +613,11 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, si
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
}
else
{
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
}
}
@ -559,7 +625,7 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, si
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555XTo888_AVX2(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec256)
size_t ColorspaceConvertBuffer555xTo888_AVX2(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
v256u16 src_v256u16[2];
@ -636,7 +702,7 @@ size_t ColorspaceConvertBuffer555XTo888_AVX2(const u16 *__restrict src, u8 *__re
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo888_AVX2(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec256)
size_t ColorspaceConvertBuffer888xTo888_AVX2(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
v256u32 src_v256u32[4];
@ -905,51 +971,99 @@ size_t ColorspaceApplyIntensityToBuffer32_AVX2(u32 *dst, size_t pixCountVec256,
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_AVX2<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_AVX2<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_AVX2<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_AVX2<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_AVX2<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_AVX2<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_AVX2<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_AVX2<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX2<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX2<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX2<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX2<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX2<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX2<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX2<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
@ -1032,64 +1146,64 @@ size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const
return ColorspaceConvertBuffer6665To5551_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AVX2<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AVX2<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AVX2<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AVX2<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_AVX2<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_AVX2<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_AVX2<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_AVX2<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_AVX2<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_AVX2<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_AVX2<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_AVX2<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const
@ -1152,23 +1266,23 @@ size_t ColorspaceHandler_AVX2::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *
return ColorspaceApplyIntensityToBuffer32_AVX2<true, true>(dst, pixCount, intensity);
}
template void ColorspaceConvert555To8888_AVX2<true>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888_AVX2<false>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555aTo8888_AVX2<true>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555aTo8888_AVX2<false>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555XTo888X_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555XTo888X_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555xTo888x_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555xTo888x_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665_AVX2<true>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665_AVX2<false>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555aTo6665_AVX2<true>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555aTo6665_AVX2<false>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555XTo666X_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555XTo666X_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555xTo666x_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555xTo666x_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template v256u32 ColorspaceConvert8888To6665_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceConvert8888To6665_AVX2<false>(const v256u32 &src);
@ -1182,8 +1296,8 @@ template v256u16 ColorspaceConvert8888To5551_AVX2<false>(const v256u32 &srcLo, c
template v256u16 ColorspaceConvert6665To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u16 ColorspaceConvert6665To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u32 ColorspaceConvert888XTo8888Opaque_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceConvert888XTo8888Opaque_AVX2<false>(const v256u32 &src);
template v256u32 ColorspaceConvert888xTo8888Opaque_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceConvert888xTo8888Opaque_AVX2<false>(const v256u32 &src);
template v256u16 ColorspaceCopy16_AVX2<true>(const v256u16 &src);
template v256u16 ColorspaceCopy16_AVX2<false>(const v256u16 &src);

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2021 DeSmuME team
Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -24,17 +24,19 @@
#warning This header requires AVX2 support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555aTo8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo888x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555aTo6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo666x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To8888_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To6665_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
template<bool SWAP_RB> v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
template<bool SWAP_RB> v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u32 ColorspaceConvert888xTo8888Opaque_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u16 ColorspaceCopy16_AVX2(const v256u16 &src);
template<bool SWAP_RB> v256u32 ColorspaceCopy32_AVX2(const v256u32 &src);
@ -47,15 +49,25 @@ class ColorspaceHandler_AVX2 : public ColorspaceHandler
public:
ColorspaceHandler_AVX2() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
@ -77,20 +89,20 @@ public:
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const;

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2021 DeSmuME team
Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -25,7 +25,7 @@
#include <immintrin.h>
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi)
FORCEINLINE void ColorspaceConvert555aTo8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -44,7 +44,7 @@ FORCEINLINE void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, cons
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo888x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -62,7 +62,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v51
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi)
FORCEINLINE void ColorspaceConvert555aTo6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -81,7 +81,7 @@ FORCEINLINE void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, cons
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo666x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -99,17 +99,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v51
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
{
const v512u16 srcAlphaBits16 = _mm512_set1_epi16(0xFF00);
ColorspaceConvert555To8888_AVX512<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
ColorspaceConvert555aTo8888_AVX512<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
{
const v512u16 srcAlphaBits16 = _mm512_set1_epi16(0x1F00);
ColorspaceConvert555To6665_AVX512<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
ColorspaceConvert555aTo6665_AVX512<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To8888_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
{
const v512u16 srcAlphaBits16 = _mm512_and_si512( _mm512_cmpgt_epi16(srcColor, _mm512_set1_epi16(0xFFFF)), _mm512_set1_epi16(0xFF00) );
ColorspaceConvert555aTo8888_AVX512<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To6665_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
{
const v512u16 srcAlphaBits16 = _mm512_and_si512( _mm512_cmpgt_epi16(srcColor, _mm512_set1_epi16(0xFFFF)), _mm512_set1_epi16(0x1F00) );
ColorspaceConvert555aTo6665_AVX512<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
@ -239,7 +253,7 @@ FORCEINLINE v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, con
}
template <bool SWAP_RB>
FORCEINLINE v512u32 ColorspaceConvert888XTo8888Opaque_AVX512(const v512u32 &src)
FORCEINLINE v512u32 ColorspaceConvert888xTo8888Opaque_AVX512(const v512u32 &src)
{
if (SWAP_RB)
{
@ -326,7 +340,7 @@ FORCEINLINE v512u32 ColorspaceApplyIntensity32_AVX512(const v512u32 &src, float
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec512)
static size_t ColorspaceConvertBuffer555xTo8888Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec512)
{
size_t i = 0;
@ -334,7 +348,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX512(const u16 *__restric
{
v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i));
v512u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_AVX512<SWAP_RB>(src_vec512, dstConvertedLo, dstConvertedHi);
ColorspaceConvert555xTo8888Opaque_AVX512<SWAP_RB>(src_vec512, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
@ -352,7 +366,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX512(const u16 *__restric
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555To6665Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec512)
size_t ColorspaceConvertBuffer555xTo6665Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec512)
{
size_t i = 0;
@ -360,7 +374,59 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AVX512(const u16 *__restrict src,
{
v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i));
v512u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_AVX512<SWAP_RB>(src_vec512, dstConvertedLo, dstConvertedHi);
ColorspaceConvert555xTo6665Opaque_AVX512<SWAP_RB>(src_vec512, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi);
}
else
{
_mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer5551To8888_AVX512(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec512)
{
size_t i = 0;
for (; i < pixCountVec512; i+=(sizeof(v512u16)/sizeof(u16)))
{
v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i));
v512u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To8888_AVX512<SWAP_RB>(src_vec512, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi);
}
else
{
_mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer5551To6665_AVX512(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec512)
{
size_t i = 0;
for (; i < pixCountVec512; i+=(sizeof(v512u16)/sizeof(u16)))
{
v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i));
v512u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To6665_AVX512<SWAP_RB>(src_vec512, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
@ -458,7 +524,7 @@ size_t ColorspaceConvertBuffer6665To5551_AVX512(const u32 *__restrict src, u16 *
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX512(const u32 *src, u32 *dst, size_t pixCountVec512)
size_t ColorspaceConvertBuffer888xTo8888Opaque_AVX512(const u32 *src, u32 *dst, size_t pixCountVec512)
{
size_t i = 0;
@ -466,11 +532,11 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX512(const u32 *src, u32 *dst,
{
if (IS_UNALIGNED)
{
_mm512_storeu_si512( (v512u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX512<SWAP_RB>(_mm512_loadu_si512((v512u32 *)(src+i))) );
_mm512_storeu_si512( (v512u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX512<SWAP_RB>(_mm512_loadu_si512((v512u32 *)(src+i))) );
}
else
{
_mm512_store_si512( (v512u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX512<SWAP_RB>(_mm512_load_si512((v512u32 *)(src+i))) );
_mm512_store_si512( (v512u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX512<SWAP_RB>(_mm512_load_si512((v512u32 *)(src+i))) );
}
}
@ -478,7 +544,7 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX512(const u32 *src, u32 *dst,
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555XTo888_AVX512(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec512)
size_t ColorspaceConvertBuffer555xTo888_AVX512(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec512)
{
size_t i = 0;
v512u16 src_v512u16[2];
@ -572,7 +638,7 @@ size_t ColorspaceConvertBuffer555XTo888_AVX512(const u16 *__restrict src, u8 *__
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo888_AVX512(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec512)
size_t ColorspaceConvertBuffer888xTo888_AVX512(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec512)
{
size_t i = 0;
v512u32 src_v512u32[4];
@ -858,51 +924,99 @@ size_t ColorspaceApplyIntensityToBuffer32_AVX512(u32 *dst, size_t pixCountVec512
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX512<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_AVX512<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX512<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_AVX512<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX512<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_AVX512<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX512<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_AVX512<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX512<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_AVX512<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX512<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_AVX512<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX512<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_AVX512<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX512<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_AVX512<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX512<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX512<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX512<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX512<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX512<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX512<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX512<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX512<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
@ -985,64 +1099,64 @@ size_t ColorspaceHandler_AVX512::ConvertBuffer6665To5551_SwapRB_IsUnaligned(cons
return ColorspaceConvertBuffer6665To5551_AVX512<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AVX512<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_AVX512<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AVX512<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_AVX512<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AVX512<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_AVX512<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AVX512<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_AVX512<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_AVX512<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_AVX512<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_AVX512<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_AVX512<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_AVX512<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_AVX512<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_AVX512<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_AVX512<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_AVX512<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_AVX512<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_AVX512<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_AVX512<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_AVX512<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_AVX512<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_AVX512<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_AVX512<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX512::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const
@ -1105,23 +1219,29 @@ size_t ColorspaceHandler_AVX512::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32
return ColorspaceApplyIntensityToBuffer32_AVX512<true, true>(dst, pixCount, intensity);
}
template void ColorspaceConvert555To8888_AVX512<true>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To8888_AVX512<false>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555aTo8888_AVX512<true>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555aTo8888_AVX512<false>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555XTo888X_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555XTo888X_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555xTo888x_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555xTo888x_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To6665_AVX512<true>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To6665_AVX512<false>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555aTo6665_AVX512<true>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555aTo6665_AVX512<false>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555XTo666X_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555XTo666X_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555xTo666x_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555xTo666x_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert5551To8888_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert5551To8888_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert5551To6665_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert5551To6665_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template v512u32 ColorspaceConvert8888To6665_AVX512<true>(const v512u32 &src);
template v512u32 ColorspaceConvert8888To6665_AVX512<false>(const v512u32 &src);
@ -1135,8 +1255,8 @@ template v512u16 ColorspaceConvert8888To5551_AVX512<false>(const v512u32 &srcLo,
template v512u16 ColorspaceConvert6665To5551_AVX512<true>(const v512u32 &srcLo, const v512u32 &srcHi);
template v512u16 ColorspaceConvert6665To5551_AVX512<false>(const v512u32 &srcLo, const v512u32 &srcHi);
template v512u32 ColorspaceConvert888XTo8888Opaque_AVX512<true>(const v512u32 &src);
template v512u32 ColorspaceConvert888XTo8888Opaque_AVX512<false>(const v512u32 &src);
template v512u32 ColorspaceConvert888xTo8888Opaque_AVX512<true>(const v512u32 &src);
template v512u32 ColorspaceConvert888xTo8888Opaque_AVX512<false>(const v512u32 &src);
template v512u16 ColorspaceCopy16_AVX512<true>(const v512u16 &src);
template v512u16 ColorspaceCopy16_AVX512<false>(const v512u16 &src);

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2021 DeSmuME team
Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -24,17 +24,19 @@
#warning This header requires AVX-512 Tier-1 support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555aTo8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo888x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555aTo6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo666x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To8888_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To6665_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> v512u32 ColorspaceConvert8888To6665_AVX512(const v512u32 &src);
template<bool SWAP_RB> v512u32 ColorspaceConvert6665To8888_AVX512(const v512u32 &src);
template<bool SWAP_RB> v512u16 ColorspaceConvert8888To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi);
template<bool SWAP_RB> v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi);
template<bool SWAP_RB> v512u32 ColorspaceConvert888XTo8888Opaque_AVX512(const v512u32 &src);
template<bool SWAP_RB> v512u32 ColorspaceConvert888xTo8888Opaque_AVX512(const v512u32 &src);
template<bool SWAP_RB> v512u16 ColorspaceCopy16_AVX512(const v512u16 &src);
template<bool SWAP_RB> v512u32 ColorspaceCopy32_AVX512(const v512u32 &src);
@ -47,15 +49,25 @@ class ColorspaceHandler_AVX512 : public ColorspaceHandler
public:
ColorspaceHandler_AVX512() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
@ -77,20 +89,20 @@ public:
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const;

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2022 DeSmuME team
Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -24,7 +24,7 @@
#include <string.h>
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -65,14 +65,14 @@ FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, con
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0};
ColorspaceConvert555To8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
ColorspaceConvert555aTo8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -113,24 +113,38 @@ FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, con
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0};
ColorspaceConvert555To6665_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
ColorspaceConvert555aTo6665_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF};
ColorspaceConvert555To8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
ColorspaceConvert555aTo8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = {0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F};
ColorspaceConvert555To6665_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
ColorspaceConvert555aTo6665_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert5551To8888_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = (v128u16)vec_cmpgt( (v128s16)srcColor, ((v128s16){0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF}) );
ColorspaceConvert555aTo8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert5551To6665_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = vec_and( (v128u16)vec_cmpgt( (v128s16)srcColor, ((v128s16){0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF}) ), ((v128u16){0x1F1F,0x1F1F,0x1F1F,0x1F1F, 0x1F1F,0x1F1F,0x1F1F,0x1F1F, 0x1F1F,0x1F1F,0x1F1F,0x1F1F, 0x1F1F,0x1F1F,0x1F1F,0x1F1F}) );
ColorspaceConvert555aTo6665_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
@ -230,7 +244,7 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, co
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src)
FORCEINLINE v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec(const v128u32 &src)
{
if (SWAP_RB)
{
@ -263,7 +277,7 @@ FORCEINLINE v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src)
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
static size_t ColorspaceConvertBuffer555xTo8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
@ -271,7 +285,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restri
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
ColorspaceConvert555xTo8888Opaque_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
}
@ -280,7 +294,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restri
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
size_t ColorspaceConvertBuffer555xTo6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
@ -288,7 +302,41 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src,
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
ColorspaceConvert555xTo6665Opaque_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
}
return i;
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
static size_t ColorspaceConvertBuffer5551To8888_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=sizeof(v128u16)/sizeof(u16))
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To8888_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
}
return i;
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
size_t ColorspaceConvertBuffer5551To6665_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=sizeof(v128u16)/sizeof(u16))
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To6665_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
}
@ -349,20 +397,20 @@ size_t ColorspaceConvertBuffer6665To5551_AltiVec(const u32 *__restrict src, u16
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer888XTo8888Opaque_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
size_t ColorspaceConvertBuffer888xTo8888Opaque_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
vec_st( ColorspaceConvert888XTo8888Opaque_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
vec_st( ColorspaceConvert888xTo8888Opaque_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer555XTo888_AltiVec(const u16 *src, u8 *dst, size_t pixCountVec128)
size_t ColorspaceConvertBuffer555xTo888_AltiVec(const u16 *src, u8 *dst, size_t pixCountVec128)
{
size_t i = 0;
v128u16 src_v128u16[2];
@ -405,7 +453,7 @@ size_t ColorspaceConvertBuffer555XTo888_AltiVec(const u16 *src, u8 *dst, size_t
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer888XTo888_AltiVec(const u32 *src, u8 *dst, size_t pixCountVec128)
size_t ColorspaceConvertBuffer888xTo888_AltiVec(const u32 *src, u8 *dst, size_t pixCountVec128)
{
size_t i = 0;
v128u32 src_v128u32[4];
@ -477,27 +525,51 @@ size_t ColorspaceCopyBuffer32_AltiVec(const u32 *src, u32 *dst, size_t pixCountV
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<false, BE_BYTESWAP>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_AltiVec<false, BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<true, BE_BYTESWAP>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_AltiVec<true, BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<false, BE_BYTESWAP>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_AltiVec<false, BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<true, BE_BYTESWAP>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_AltiVec<true, BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AltiVec<false, BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AltiVec<true, BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AltiVec<false, BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AltiVec<true, BE_BYTESWAP>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
@ -540,34 +612,34 @@ size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551_SwapRB(const u32 *__re
return ColorspaceConvertBuffer6665To5551_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AltiVec<false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_AltiVec<true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_AltiVec<false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_AltiVec<true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_AltiVec<false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_AltiVec<true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const
@ -580,59 +652,59 @@ size_t ColorspaceHandler_AltiVec::CopyBuffer32_SwapRB(const u32 *src, u32 *dst,
return ColorspaceCopyBuffer32_AltiVec<true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_AltiVec<true, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<false, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<true, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<false, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo8888_AltiVec<true, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo8888_AltiVec<false, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo8888_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo8888_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo8888_AltiVec<true, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo8888_AltiVec<false, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo8888_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo8888_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo888x_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo888x_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo888x_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo888x_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo888x_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo888x_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo888x_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo888x_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<true, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<false, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<true, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<false, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo6665_AltiVec<true, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo6665_AltiVec<false, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo6665_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo6665_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo6665_AltiVec<true, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo6665_AltiVec<false, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo6665_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo6665_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo666x_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo666x_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo666x_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo666x_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo666x_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo666x_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo666x_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo666x_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template v128u32 ColorspaceConvert8888To6665_AltiVec<true>(const v128u32 &src);
template v128u32 ColorspaceConvert8888To6665_AltiVec<false>(const v128u32 &src);
@ -646,8 +718,8 @@ template v128u16 ColorspaceConvert8888To5551_AltiVec<false>(const v128u32 &srcLo
template v128u16 ColorspaceConvert6665To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec<true>(const v128u32 &src);
template v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec<false>(const v128u32 &src);
template v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec<true>(const v128u32 &src);
template v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec<false>(const v128u32 &src);
template v128u16 ColorspaceCopy16_AltiVec<true>(const v128u16 &src);
template v128u16 ColorspaceCopy16_AltiVec<false>(const v128u16 &src);

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2021 DeSmuME team
Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -24,17 +24,17 @@
#warning This header requires PowerPC AltiVec support.
#else
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceCopy16_AltiVec(const v128u16 &src);
template<bool SWAP_RB> v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src);
@ -46,11 +46,17 @@ class ColorspaceHandler_AltiVec : public ColorspaceHandler
public:
ColorspaceHandler_AltiVec() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
@ -64,14 +70,14 @@ public:
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2022 DeSmuME team
Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -28,7 +28,7 @@
#define COLOR32_SWAPRB_NEON(src) vreinterpretq_u32_u8( vqtbl1q_u8(vreinterpretq_u8_u32(src), ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15})) )
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555aTo8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -60,7 +60,7 @@ FORCEINLINE void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo888x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -90,7 +90,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555aTo6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -122,7 +122,7 @@ FORCEINLINE void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo666x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -152,17 +152,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = vdupq_n_u16(0xFF00);
ColorspaceConvert555To8888_NEON<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
ColorspaceConvert555aTo8888_NEON<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = vdupq_n_u16(0x1F00);
ColorspaceConvert555To6665_NEON<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
ColorspaceConvert555aTo6665_NEON<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To8888_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128s16 srcAlphaBits16 = vandq_s16( vcgtq_s16(vreinterpretq_u16_s16(srcColor), vdupq_n_s16(0xFFFF)), vdupq_n_s16(0xFF00) );
ColorspaceConvert555aTo8888_NEON<SWAP_RB>(srcColor, vreinterpretq_s16_u16(srcAlphaBits16), dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To6665_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128s16 srcAlphaBits16 = vandq_s16( vcgtq_s16(vreinterpretq_u16_s16(srcColor), vdupq_n_s16(0xFFFF)), vdupq_n_s16(0x1F00) );
ColorspaceConvert555aTo6665_NEON<SWAP_RB>(srcColor, vreinterpretq_s16_u16(srcAlphaBits16), dstLo, dstHi);
}
template <bool SWAP_RB>
@ -290,7 +304,7 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src)
FORCEINLINE v128u32 ColorspaceConvert888xTo8888Opaque_NEON(const v128u32 &src)
{
if (SWAP_RB)
{
@ -377,7 +391,7 @@ FORCEINLINE v128u32 ColorspaceApplyIntensity32_NEON(const v128u32 &src, float in
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
static size_t ColorspaceConvertBuffer555xTo8888Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
v128u16 srcVec;
@ -386,7 +400,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_NEON(const u16 *__restrict
for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16)))
{
srcVec = vld1q_u16(src+i);
ColorspaceConvert555To8888Opaque_NEON<SWAP_RB>(srcVec, dstVec.val[0], dstVec.val[1]);
ColorspaceConvert555xTo8888Opaque_NEON<SWAP_RB>(srcVec, dstVec.val[0], dstVec.val[1]);
vst1q_u32_x2(dst+i, dstVec);
}
@ -394,7 +408,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_NEON(const u16 *__restrict
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555To6665Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
size_t ColorspaceConvertBuffer555xTo6665Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
v128u16 srcVec;
@ -403,7 +417,41 @@ size_t ColorspaceConvertBuffer555To6665Opaque_NEON(const u16 *__restrict src, u3
for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16)))
{
srcVec = vld1q_u16(src+i);
ColorspaceConvert555To6665Opaque_NEON<SWAP_RB>(srcVec, dstVec.val[0], dstVec.val[1]);
ColorspaceConvert555xTo6665Opaque_NEON<SWAP_RB>(srcVec, dstVec.val[0], dstVec.val[1]);
vst1q_u32_x2(dst+i, dstVec);
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer5551To8888_NEON(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
v128u16 srcVec;
uint32x4x2_t dstVec;
for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16)))
{
srcVec = vld1q_u16(src+i);
ColorspaceConvert5551To8888_NEON<SWAP_RB>(srcVec, dstVec.val[0], dstVec.val[1]);
vst1q_u32_x2(dst+i, dstVec);
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer5551To6665_NEON(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
v128u16 srcVec;
uint32x4x2_t dstVec;
for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16)))
{
srcVec = vld1q_u16(src+i);
ColorspaceConvert5551To6665_NEON<SWAP_RB>(srcVec, dstVec.val[0], dstVec.val[1]);
vst1q_u32_x2(dst+i, dstVec);
}
@ -467,7 +515,7 @@ size_t ColorspaceConvertBuffer6665To5551_NEON(const u32 *__restrict src, u16 *__
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo8888Opaque_NEON(const u32 *src, u32 *dst, size_t pixCountVec128)
size_t ColorspaceConvertBuffer888xTo8888Opaque_NEON(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
uint8x16x4_t srcVec_x4;
@ -491,7 +539,7 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_NEON(const u32 *src, u32 *dst, si
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555XTo888_NEON(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec128)
size_t ColorspaceConvertBuffer555xTo888_NEON(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
uint16x8x2_t srcVec;
@ -529,7 +577,7 @@ size_t ColorspaceConvertBuffer555XTo888_NEON(const u16 *__restrict src, u8 *__re
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo888_NEON(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec128)
size_t ColorspaceConvertBuffer888xTo888_NEON(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
uint8x16x4_t srcVec_x4;
@ -723,51 +771,99 @@ size_t ColorspaceApplyIntensityToBuffer32_NEON(u32 *dst, size_t pixCountVec128,
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_NEON<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_NEON<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_NEON<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_NEON<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_NEON<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_NEON<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_NEON<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo8888Opaque_NEON<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_NEON<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_NEON<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_NEON<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_NEON<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_NEON<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_NEON<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_NEON<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo6665Opaque_NEON<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_NEON<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_NEON<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_NEON<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_NEON<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_NEON<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_NEON<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_NEON<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_NEON<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
@ -850,64 +946,64 @@ size_t ColorspaceHandler_NEON::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const
return ColorspaceConvertBuffer6665To5551_NEON<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_NEON<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_NEON<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_NEON<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_NEON<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_NEON<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_NEON<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_NEON<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo8888Opaque_NEON<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_NEON<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_NEON<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_NEON<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_NEON<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_NEON<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_NEON<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_NEON<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer555xTo888_NEON<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_NEON<false, false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_NEON<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_NEON<true, false>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_NEON<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_NEON<false, true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_NEON<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_NEON<true, true>(src, dst, pixCount);
return ColorspaceConvertBuffer888xTo888_NEON<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_NEON::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const
@ -970,23 +1066,29 @@ size_t ColorspaceHandler_NEON::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *
return ColorspaceApplyIntensityToBuffer32_NEON<true, true>(dst, pixCount, intensity);
}
template void ColorspaceConvert555To8888_NEON<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_NEON<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo8888_NEON<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo8888_NEON<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo888x_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo888x_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_NEON<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_NEON<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo6665_NEON<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo6665_NEON<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo666x_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo666x_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert5551To8888_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert5551To8888_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert5551To6665_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert5551To6665_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template v128u32 ColorspaceConvert8888To6665_NEON<true>(const v128u32 &src);
template v128u32 ColorspaceConvert8888To6665_NEON<false>(const v128u32 &src);
@ -1000,8 +1102,8 @@ template v128u16 ColorspaceConvert8888To5551_NEON<false>(const v128u32 &srcLo, c
template v128u16 ColorspaceConvert6665To5551_NEON<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_NEON<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u32 ColorspaceConvert888XTo8888Opaque_NEON<true>(const v128u32 &src);
template v128u32 ColorspaceConvert888XTo8888Opaque_NEON<false>(const v128u32 &src);
template v128u32 ColorspaceConvert888xTo8888Opaque_NEON<true>(const v128u32 &src);
template v128u32 ColorspaceConvert888xTo8888Opaque_NEON<false>(const v128u32 &src);
template v128u16 ColorspaceCopy16_NEON<true>(const v128u16 &src);
template v128u16 ColorspaceCopy16_NEON<false>(const v128u16 &src);

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2022 DeSmuME team
Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -24,17 +24,19 @@
#warning This header requires ARM64 NEON support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555aTo8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo888x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555aTo6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo666x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To8888_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To6665_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_NEON(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_NEON(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u32 C6olorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert888xTo8888Opaque_NEON(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceCopy16_NEON(const v128u16 &src);
template<bool SWAP_RB> v128u32 ColorspaceCopy32_NEON(const v128u32 &src);
@ -47,15 +49,25 @@ class ColorspaceHandler_NEON : public ColorspaceHandler
public:
ColorspaceHandler_NEON() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
@ -77,20 +89,20 @@ public:
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const;

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2021 DeSmuME team
Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -33,7 +33,7 @@
#endif
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555aTo8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -66,7 +66,7 @@ FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo888x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -97,7 +97,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555aTo6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -131,7 +131,7 @@ FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo666x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -162,17 +162,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = _mm_set1_epi16(0xFF00);
ColorspaceConvert555To8888_SSE2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
ColorspaceConvert555aTo8888_SSE2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
FORCEINLINE void ColorspaceConvert555xTo6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = _mm_set1_epi16(0x1F00);
ColorspaceConvert555To6665_SSE2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
ColorspaceConvert555aTo6665_SSE2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To8888_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = _mm_and_si128( _mm_cmpgt_epi16(srcColor, _mm_set1_epi16(0xFFFF)), _mm_set1_epi16(0xFF00) );
ColorspaceConvert555aTo8888_SSE2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To6665_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = _mm_and_si128( _mm_cmpgt_epi16(srcColor, _mm_set1_epi16(0xFFFF)), _mm_set1_epi16(0x1F00) );
ColorspaceConvert555aTo6665_SSE2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
@ -315,7 +329,7 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src)
FORCEINLINE v128u32 ColorspaceConvert888xTo8888Opaque_SSE2(const v128u32 &src)
{
if (SWAP_RB)
{
@ -422,7 +436,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_SSE2(const u16 *__restrict
{
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
ColorspaceConvert555xTo8888Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
@ -448,7 +462,59 @@ size_t ColorspaceConvertBuffer555To6665Opaque_SSE2(const u16 *__restrict src, u3
{
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
ColorspaceConvert555xTo6665Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi);
}
else
{
_mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer5551To8888_SSE2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16)))
{
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To8888_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi);
}
else
{
_mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer5551To6665_SSE2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16)))
{
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To6665_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
@ -554,11 +620,11 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_SSE2(const u32 *src, u32 *dst, si
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
}
else
{
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
}
}
@ -937,53 +1003,101 @@ size_t ColorspaceApplyIntensityToBuffer32_SSE2(u32 *dst, size_t pixCountVec128,
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_SSE2<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_SSE2<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_SSE2<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_SSE2<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_SSE2<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_SSE2<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_SSE2<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<false, false>(src, dst, pixCount);
@ -1064,64 +1178,64 @@ size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const
return ColorspaceConvertBuffer6665To5551_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<true, true>(src, dst, pixCount);
}
#ifdef ENABLE_SSSE3
size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_SSSE3<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_SSSE3<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_SSSE3<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555XTo888_SSSE3<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_SSSE3<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_SSSE3<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_SSSE3<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer888XTo888_SSSE3<true, true>(src, dst, pixCount);
}
@ -1188,23 +1302,23 @@ size_t ColorspaceHandler_SSE2::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *
return ColorspaceApplyIntensityToBuffer32_SSE2<true, true>(dst, pixCount, intensity);
}
template void ColorspaceConvert555To8888_SSE2<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_SSE2<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo8888_SSE2<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo8888_SSE2<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo888x_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo888x_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_SSE2<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_SSE2<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo6665_SSE2<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555aTo6665_SSE2<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo666x_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo666x_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo8888Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555xTo6665Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template v128u32 ColorspaceConvert8888To6665_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceConvert8888To6665_SSE2<false>(const v128u32 &src);
@ -1218,8 +1332,8 @@ template v128u16 ColorspaceConvert8888To5551_SSE2<false>(const v128u32 &srcLo, c
template v128u16 ColorspaceConvert6665To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u32 ColorspaceConvert888XTo8888Opaque_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceConvert888XTo8888Opaque_SSE2<false>(const v128u32 &src);
template v128u32 ColorspaceConvert888xTo8888Opaque_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceConvert888xTo8888Opaque_SSE2<false>(const v128u32 &src);
template v128u16 ColorspaceCopy16_SSE2<true>(const v128u16 &src);
template v128u16 ColorspaceCopy16_SSE2<false>(const v128u16 &src);

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2021 DeSmuME team
Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -24,17 +24,19 @@
#warning This header requires SSE2 support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555aTo8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo888x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555aTo6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo666x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555xTo6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To8888_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To6665_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert888xTo8888Opaque_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceCopy16_SSE2(const v128u16 &src);
template<bool SWAP_RB> v128u32 ColorspaceCopy32_SSE2(const v128u32 &src);
@ -47,15 +49,25 @@ class ColorspaceHandler_SSE2 : public ColorspaceHandler
public:
ColorspaceHandler_SSE2() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
@ -77,21 +89,21 @@ public:
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
#ifdef ENABLE_SSSE3
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
#endif
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;