Colorspace Handler: Add new functions for 16-bit to 18-bit and 32-bit color conversion, now respecting the 16-bit color's alpha bit.

- ColorspaceConvert5551To8888()
- ColorspaceConvert5551To6665()
- ColorspaceConvertBuffer5551To8888()
- ColorspaceConvertBuffer5551To6665()
- Also rename the existing 16-bit color conversion functions to help further distinguish the functions from one another.
This commit is contained in:
rogerman 2024-07-15 16:24:02 -07:00
parent 4b0805e139
commit 586aea5310
29 changed files with 1680 additions and 781 deletions

View File

@ -1,7 +1,7 @@
/* /*
Copyright 2006 yopyop Copyright 2006 yopyop
Copyright 2007 shash Copyright 2007 shash
Copyright 2007-2023 DeSmuME team Copyright 2007-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -489,11 +489,11 @@ void DISP_FIFOrecv_LineOpaque(u32 *__restrict dst)
} }
else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapDst>((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
} }
else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{ {
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
} }
_DISP_FIFOrecv_LineAdvance(); _DISP_FIFOrecv_LineAdvance();

View File

@ -2,7 +2,7 @@
Copyright (C) 2006 yopyop Copyright (C) 2006 yopyop
Copyright (C) 2006-2007 Theo Berkau Copyright (C) 2006-2007 Theo Berkau
Copyright (C) 2007 shash Copyright (C) 2007 shash
Copyright (C) 2008-2023 DeSmuME team Copyright (C) 2008-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -1032,12 +1032,12 @@ void GPUEngineBase::_TransitionLineNativeToCustom(GPUEngineCompositorInfo &compI
{ {
if ( (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (compInfo.line.renderCount == 1) ) if ( (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (compInfo.line.renderCount == 1) )
{ {
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH);
} }
else else
{ {
u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32(); u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32();
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH);
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(compInfo.line, workingNativeBuffer32 + compInfo.line.blockOffsetNative, compInfo.target.lineColorHeadCustom); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(compInfo.line, workingNativeBuffer32 + compInfo.line.blockOffsetNative, compInfo.target.lineColorHeadCustom);
} }
break; break;
@ -1047,12 +1047,12 @@ void GPUEngineBase::_TransitionLineNativeToCustom(GPUEngineCompositorInfo &compI
{ {
if ( (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (compInfo.line.renderCount == 1) ) if ( (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (compInfo.line.renderCount == 1) )
{ {
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH);
} }
else else
{ {
u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32(); u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32();
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH);
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(compInfo.line, workingNativeBuffer32 + compInfo.line.blockOffsetNative, compInfo.target.lineColorHeadCustom); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(compInfo.line, workingNativeBuffer32 + compInfo.line.blockOffsetNative, compInfo.target.lineColorHeadCustom);
} }
break; break;
@ -3729,7 +3729,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA
{ {
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{ {
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH);
} }
this->_RenderLine_DispCapture_Copy<OUTPUTFORMAT, 1, CAPTURELENGTH, true, false>(lineInfo, srcBPtr, dstCustomPtr, captureLengthExt); this->_RenderLine_DispCapture_Copy<OUTPUTFORMAT, 1, CAPTURELENGTH, true, false>(lineInfo, srcBPtr, dstCustomPtr, captureLengthExt);
@ -3755,7 +3755,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA
else else
{ {
u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32(); u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32();
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapNone>((u16 *)srcAPtr, workingNativeBuffer32 + lineInfo.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapNone>((u16 *)srcAPtr, workingNativeBuffer32 + lineInfo.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH);
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, workingNativeBuffer32 + lineInfo.blockOffsetNative, this->_captureWorkingA32); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, workingNativeBuffer32 + lineInfo.blockOffsetNative, this->_captureWorkingA32);
srcAPtr = this->_captureWorkingA32; srcAPtr = this->_captureWorkingA32;
} }
@ -3772,7 +3772,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA
{ {
if ((OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) && (DISPCAPCNT.SrcB != 0)) if ((OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) && (DISPCAPCNT.SrcB != 0))
{ {
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH);
} }
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, srcBPtr, this->_captureWorkingB32); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, srcBPtr, this->_captureWorkingB32);
@ -3919,7 +3919,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI
{ {
if (willReadNativeVRAM) if (willReadNativeVRAM)
{ {
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(vramNative16, (u32 *)vramCustom32, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(vramNative16, (u32 *)vramCustom32, GPU_FRAMEBUFFER_NATIVE_WIDTH);
} }
} }
@ -4435,7 +4435,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const GPUEngineLineInfo &lineInfo)
{ {
const u16 *src = (u16 *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + lineInfo.blockOffsetCustom; const u16 *src = (u16 *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + lineInfo.blockOffsetCustom;
u32 *dst = (u32 *)customBuffer + lineInfo.blockOffsetCustom; u32 *dst = (u32 *)customBuffer + lineInfo.blockOffsetCustom;
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapSrcDst>(src, dst, lineInfo.pixelCount); ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapSrcDst>(src, dst, lineInfo.pixelCount);
break; break;
} }
@ -5780,11 +5780,11 @@ void GPUSubsystem::_ConvertAndUpscaleForLoadstate(const NDSDisplayID displayID,
switch (this->_display[displayID]->GetColorFormat()) switch (this->_display[displayID]->GetColorFormat())
{ {
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
break; break;
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
break; break;
default: default:
@ -6212,11 +6212,11 @@ void NDSDisplay::ResolveLinesDisplayedNative()
{ {
if (this->_customColorFormat == NDSColorFormat_BGR888_Rev) if (this->_customColorFormat == NDSColorFormat_BGR888_Rev)
{ {
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH);
} }
else else
{ {
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH);
} }
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, working, dst); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, working, dst);
@ -6256,7 +6256,7 @@ void NDSDisplay::ResolveFramebufferToCustom(NDSDisplayInfo &mutableInfo)
{ {
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
break; break;
default: default:
@ -6298,7 +6298,7 @@ void NDSDisplay::ResolveFramebufferToCustom(NDSDisplayInfo &mutableInfo)
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(this->_nativeBuffer16, (u32 *)this->_customBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(this->_nativeBuffer16, (u32 *)this->_customBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
break; break;
} }
} }

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2021-2023 DeSmuME team Copyright (C) 2021-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -1128,13 +1128,13 @@ FORCEINLINE void PixelOperation_AVX2::_copy16(GPUEngineCompositorInfo &compInfo,
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555To6665Opaque_AVX2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo6665Opaque_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To6665Opaque_AVX2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo6665Opaque_AVX2<false>(src1, src32[2], src32[3]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_AVX2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo8888Opaque_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To8888Opaque_AVX2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo8888Opaque_AVX2<false>(src1, src32[2], src32[3]);
} }
_mm256_store_si256( (v256u32 *)compInfo.target.lineColor32 + 0, src32[0] ); _mm256_store_si256( (v256u32 *)compInfo.target.lineColor32 + 0, src32[0] );
@ -1205,13 +1205,13 @@ FORCEINLINE void PixelOperation_AVX2::_copyMask16(GPUEngineCompositorInfo &compI
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555To6665Opaque_AVX2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo6665Opaque_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To6665Opaque_AVX2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo6665Opaque_AVX2<false>(src1, src32[2], src32[3]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_AVX2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo8888Opaque_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To8888Opaque_AVX2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo8888Opaque_AVX2<false>(src1, src32[2], src32[3]);
} }
passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8); passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8);
@ -1304,13 +1304,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessUp16(GPUEngineCompositorInfo &c
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555XTo666X_AVX2<false>(src0, dst[0], dst[1]); ColorspaceConvert555xTo666x_AVX2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo666X_AVX2<false>(src1, dst[2], dst[3]); ColorspaceConvert555xTo666x_AVX2<false>(src1, dst[2], dst[3]);
} }
else else
{ {
ColorspaceConvert555XTo888X_AVX2<false>(src0, dst[0], dst[1]); ColorspaceConvert555xTo888x_AVX2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo888X_AVX2<false>(src1, dst[2], dst[3]); ColorspaceConvert555xTo888x_AVX2<false>(src1, dst[2], dst[3]);
} }
const v256u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm256_set1_epi32(0x1F000000) : _mm256_set1_epi32(0xFF000000); const v256u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm256_set1_epi32(0x1F000000) : _mm256_set1_epi32(0xFF000000);
@ -1377,13 +1377,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessUpMask16(GPUEngineCompositorInf
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555XTo666X_AVX2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo666x_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo666X_AVX2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo666x_AVX2<false>(src1, src32[2], src32[3]);
} }
else else
{ {
ColorspaceConvert555XTo888X_AVX2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo888x_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo888X_AVX2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo888x_AVX2<false>(src1, src32[2], src32[3]);
} }
passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8); passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8);
@ -1471,13 +1471,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessDown16(GPUEngineCompositorInfo
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555XTo666X_AVX2<false>(src0, dst[0], dst[1]); ColorspaceConvert555xTo666x_AVX2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo666X_AVX2<false>(src1, dst[2], dst[3]); ColorspaceConvert555xTo666x_AVX2<false>(src1, dst[2], dst[3]);
} }
else else
{ {
ColorspaceConvert555XTo888X_AVX2<false>(src0, dst[0], dst[1]); ColorspaceConvert555xTo888x_AVX2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo888X_AVX2<false>(src1, dst[2], dst[3]); ColorspaceConvert555xTo888x_AVX2<false>(src1, dst[2], dst[3]);
} }
const v256u32 alphaBits = _mm256_set1_epi32((OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F000000 : 0xFF000000); const v256u32 alphaBits = _mm256_set1_epi32((OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F000000 : 0xFF000000);
@ -1544,13 +1544,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessDownMask16(GPUEngineCompositorI
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555XTo666X_AVX2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo666x_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo666X_AVX2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo666x_AVX2<false>(src1, src32[2], src32[3]);
} }
else else
{ {
ColorspaceConvert555XTo888X_AVX2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo888x_AVX2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo888X_AVX2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo888x_AVX2<false>(src1, src32[2], src32[3]);
} }
passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8); passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8);
@ -1674,13 +1674,13 @@ FORCEINLINE void PixelOperation_AVX2::_unknownEffectMask16(GPUEngineCompositorIn
} }
else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555XTo666X_AVX2<false>(src0, tmpSrc[0], tmpSrc[1]); ColorspaceConvert555xTo666x_AVX2<false>(src0, tmpSrc[0], tmpSrc[1]);
ColorspaceConvert555XTo666X_AVX2<false>(src1, tmpSrc[2], tmpSrc[3]); ColorspaceConvert555xTo666x_AVX2<false>(src1, tmpSrc[2], tmpSrc[3]);
} }
else else
{ {
ColorspaceConvert555XTo888X_AVX2<false>(src0, tmpSrc[0], tmpSrc[1]); ColorspaceConvert555xTo888x_AVX2<false>(src0, tmpSrc[0], tmpSrc[1]);
ColorspaceConvert555XTo888X_AVX2<false>(src1, tmpSrc[2], tmpSrc[3]); ColorspaceConvert555xTo888x_AVX2<false>(src1, tmpSrc[2], tmpSrc[3]);
} }
switch (compInfo.renderState.colorEffect) switch (compInfo.renderState.colorEffect)

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2021-2023 DeSmuME team Copyright (C) 2021-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -922,13 +922,13 @@ FORCEINLINE void PixelOperation_SSE2::_copy16(GPUEngineCompositorInfo &compInfo,
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555To6665Opaque_SSE2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo6665Opaque_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo6665Opaque_SSE2<false>(src1, src32[2], src32[3]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_SSE2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo8888Opaque_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo8888Opaque_SSE2<false>(src1, src32[2], src32[3]);
} }
_mm_store_si128( (v128u32 *)compInfo.target.lineColor32 + 0, src32[0] ); _mm_store_si128( (v128u32 *)compInfo.target.lineColor32 + 0, src32[0] );
@ -999,13 +999,13 @@ FORCEINLINE void PixelOperation_SSE2::_copyMask16(GPUEngineCompositorInfo &compI
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555To6665Opaque_SSE2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo6665Opaque_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo6665Opaque_SSE2<false>(src1, src32[2], src32[3]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_SSE2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo8888Opaque_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo8888Opaque_SSE2<false>(src1, src32[2], src32[3]);
} }
const v128u32 dst32[4] = { const v128u32 dst32[4] = {
@ -1104,13 +1104,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessUp16(GPUEngineCompositorInfo &c
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555XTo666X_SSE2<false>(src0, dst[0], dst[1]); ColorspaceConvert555xTo666x_SSE2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo666X_SSE2<false>(src1, dst[2], dst[3]); ColorspaceConvert555xTo666x_SSE2<false>(src1, dst[2], dst[3]);
} }
else else
{ {
ColorspaceConvert555XTo888X_SSE2<false>(src0, dst[0], dst[1]); ColorspaceConvert555xTo888x_SSE2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo888X_SSE2<false>(src1, dst[2], dst[3]); ColorspaceConvert555xTo888x_SSE2<false>(src1, dst[2], dst[3]);
} }
const v128u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm_set1_epi32(0x1F000000) : _mm_set1_epi32(0xFF000000); const v128u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm_set1_epi32(0x1F000000) : _mm_set1_epi32(0xFF000000);
@ -1182,13 +1182,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessUpMask16(GPUEngineCompositorInf
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555XTo666X_SSE2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo666x_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo666X_SSE2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo666x_SSE2<false>(src1, src32[2], src32[3]);
} }
else else
{ {
ColorspaceConvert555XTo888X_SSE2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo888x_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo888X_SSE2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo888x_SSE2<false>(src1, src32[2], src32[3]);
} }
const v128u32 dst32[4] = { const v128u32 dst32[4] = {
@ -1275,13 +1275,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessDown16(GPUEngineCompositorInfo
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555XTo666X_SSE2<false>(src0, dst[0], dst[1]); ColorspaceConvert555xTo666x_SSE2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo666X_SSE2<false>(src1, dst[2], dst[3]); ColorspaceConvert555xTo666x_SSE2<false>(src1, dst[2], dst[3]);
} }
else else
{ {
ColorspaceConvert555XTo888X_SSE2<false>(src0, dst[0], dst[1]); ColorspaceConvert555xTo888x_SSE2<false>(src0, dst[0], dst[1]);
ColorspaceConvert555XTo888X_SSE2<false>(src1, dst[2], dst[3]); ColorspaceConvert555xTo888x_SSE2<false>(src1, dst[2], dst[3]);
} }
const v128u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm_set1_epi32(0x1F000000) : _mm_set1_epi32(0xFF000000); const v128u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm_set1_epi32(0x1F000000) : _mm_set1_epi32(0xFF000000);
@ -1353,13 +1353,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessDownMask16(GPUEngineCompositorI
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555XTo666X_SSE2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo666x_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo666X_SSE2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo666x_SSE2<false>(src1, src32[2], src32[3]);
} }
else else
{ {
ColorspaceConvert555XTo888X_SSE2<false>(src0, src32[0], src32[1]); ColorspaceConvert555xTo888x_SSE2<false>(src0, src32[0], src32[1]);
ColorspaceConvert555XTo888X_SSE2<false>(src1, src32[2], src32[3]); ColorspaceConvert555xTo888x_SSE2<false>(src1, src32[2], src32[3]);
} }
const v128u32 dst32[4] = { const v128u32 dst32[4] = {
@ -1494,13 +1494,13 @@ FORCEINLINE void PixelOperation_SSE2::_unknownEffectMask16(GPUEngineCompositorIn
} }
else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvert555XTo666X_SSE2<false>(src0, tmpSrc[0], tmpSrc[1]); ColorspaceConvert555xTo666x_SSE2<false>(src0, tmpSrc[0], tmpSrc[1]);
ColorspaceConvert555XTo666X_SSE2<false>(src1, tmpSrc[2], tmpSrc[3]); ColorspaceConvert555xTo666x_SSE2<false>(src1, tmpSrc[2], tmpSrc[3]);
} }
else else
{ {
ColorspaceConvert555XTo888X_SSE2<false>(src0, tmpSrc[0], tmpSrc[1]); ColorspaceConvert555xTo888x_SSE2<false>(src0, tmpSrc[0], tmpSrc[1]);
ColorspaceConvert555XTo888X_SSE2<false>(src1, tmpSrc[2], tmpSrc[3]); ColorspaceConvert555xTo888x_SSE2<false>(src1, tmpSrc[2], tmpSrc[3]);
} }
switch (compInfo.renderState.colorEffect) switch (compInfo.renderState.colorEffect)

View File

@ -484,7 +484,7 @@ void ClientAVCaptureObject::ConvertVideoSlice555Xto888(const VideoConvertParam &
const u16 *__restrict src = (const u16 *__restrict)param.src; const u16 *__restrict src = (const u16 *__restrict)param.src;
u8 *__restrict dst = param.dst; u8 *__restrict dst = param.dst;
ColorspaceConvertBuffer555XTo888<false, false>(src, dst, param.frameWidth * lineCount); ColorspaceConvertBuffer555xTo888<false, false>(src, dst, param.frameWidth * lineCount);
} }
//converts 32bpp to 24bpp and flips //converts 32bpp to 24bpp and flips
@ -494,7 +494,7 @@ void ClientAVCaptureObject::ConvertVideoSlice888Xto888(const VideoConvertParam &
const u32 *__restrict src = (const u32 *__restrict)param.src; const u32 *__restrict src = (const u32 *__restrict)param.src;
u8 *__restrict dst = param.dst; u8 *__restrict dst = param.dst;
ColorspaceConvertBuffer888XTo888<false, false>(src, dst, param.frameWidth * lineCount); ColorspaceConvertBuffer888xTo888<false, false>(src, dst, param.frameWidth * lineCount);
} }
void ClientAVCaptureObject::CaptureVideoFrame(const void *srcVideoFrame, const size_t inFrameWidth, const size_t inFrameHeight, const NDSColorFormat colorFormat) void ClientAVCaptureObject::CaptureVideoFrame(const void *srcVideoFrame, const size_t inFrameWidth, const size_t inFrameHeight, const NDSColorFormat colorFormat)

6
desmume/src/frontend/cocoa/OGLDisplayOutput.cpp Executable file → Normal file
View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2014-2023 DeSmuME team Copyright (C) 2014-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -4719,7 +4719,7 @@ void OGLClientSharedData::FetchNativeDisplayToSrcClone(const NDSDisplayInfo *dis
return; return;
} }
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapNone>(displayInfoList[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapNone>(displayInfoList[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
this->_srcCloneNeedsUpdate[displayID][bufferIndex] = false; this->_srcCloneNeedsUpdate[displayID][bufferIndex] = false;
if (needsLock) if (needsLock)
@ -4744,7 +4744,7 @@ void OGLClientSharedData::FetchCustomDisplayToSrcClone(const NDSDisplayInfo *dis
return; return;
} }
ColorspaceConvertBuffer888XTo8888Opaque<false, false>((u32 *)displayInfoList[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); ColorspaceConvertBuffer888xTo8888Opaque<false, false>((u32 *)displayInfoList[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
this->_srcCloneNeedsUpdate[displayID][bufferIndex] = false; this->_srcCloneNeedsUpdate[displayID][bufferIndex] = false;
if (needsLock) if (needsLock)

View File

@ -1,6 +1,6 @@
/* /*
Copyright (C) 2011 Roger Manuel Copyright (C) 2011 Roger Manuel
Copyright (C) 2011-2022 DeSmuME team Copyright (C) 2011-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -674,7 +674,7 @@ void RomIconToRGBA8888(uint32_t *bitmapData)
// The first entry always represents the alpha, so just set it to 0. // The first entry always represents the alpha, so just set it to 0.
const uint16_t *clut4 = (uint16_t *)ndsRomBanner.palette; const uint16_t *clut4 = (uint16_t *)ndsRomBanner.palette;
CACHE_ALIGN uint32_t clut32[16]; CACHE_ALIGN uint32_t clut32[16];
ColorspaceConvertBuffer555To8888Opaque<false, true, BESwapNone>(clut4, clut32, 16); ColorspaceConvertBuffer555xTo8888Opaque<false, true, BESwapNone>(clut4, clut32, 16);
clut32[0] = 0x00000000; clut32[0] = 0x00000000;
// Load the image from the icon pixel data. // Load the image from the icon pixel data.

View File

@ -1,6 +1,6 @@
/* /*
Copyright (C) 2011 Roger Manuel Copyright (C) 2011 Roger Manuel
Copyright (C) 2013 DeSmuME team Copyright (C) 2013-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -139,7 +139,7 @@
} }
uint32_t *bitmapData = (uint32_t *)[imageRep bitmapData]; uint32_t *bitmapData = (uint32_t *)[imageRep bitmapData];
ColorspaceConvertBuffer888XTo8888Opaque<false, true>((const uint32_t *)[self runFilter], bitmapData, w * h); ColorspaceConvertBuffer888xTo8888Opaque<false, true>((const uint32_t *)[self runFilter], bitmapData, w * h);
#ifdef MSB_FIRST #ifdef MSB_FIRST
for (size_t i = 0; i < w * h; i++) for (size_t i = 0; i < w * h; i++)

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2017-2023 DeSmuME team Copyright (C) 2017-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -2556,7 +2556,7 @@ void MacMetalFetchObject::_FetchNativeDisplayByID(const NDSDisplayID displayID,
GPU->PostprocessDisplay(displayID, this->_fetchDisplayInfo[bufferIndex]); GPU->PostprocessDisplay(displayID, this->_fetchDisplayInfo[bufferIndex]);
pthread_rwlock_wrlock(&this->_srcCloneRWLock[displayID][bufferIndex]); pthread_rwlock_wrlock(&this->_srcCloneRWLock[displayID][bufferIndex]);
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(this->_fetchDisplayInfo[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(this->_fetchDisplayInfo[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
pthread_rwlock_unlock(&this->_srcCloneRWLock[displayID][bufferIndex]); pthread_rwlock_unlock(&this->_srcCloneRWLock[displayID][bufferIndex]);
} }
@ -2570,7 +2570,7 @@ void MacMetalFetchObject::_FetchCustomDisplayByID(const NDSDisplayID displayID,
GPU->PostprocessDisplay(displayID, this->_fetchDisplayInfo[bufferIndex]); GPU->PostprocessDisplay(displayID, this->_fetchDisplayInfo[bufferIndex]);
pthread_rwlock_wrlock(&this->_srcCloneRWLock[displayID][bufferIndex]); pthread_rwlock_wrlock(&this->_srcCloneRWLock[displayID][bufferIndex]);
ColorspaceConvertBuffer888XTo8888Opaque<false, false>((u32 *)this->_fetchDisplayInfo[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); ColorspaceConvertBuffer888xTo8888Opaque<false, false>((u32 *)this->_fetchDisplayInfo[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
pthread_rwlock_unlock(&this->_srcCloneRWLock[displayID][bufferIndex]); pthread_rwlock_unlock(&this->_srcCloneRWLock[displayID][bufferIndex]);
} }

View File

@ -1380,7 +1380,7 @@ static int ConfigureDrawingArea(GtkWidget *widget, GdkEventConfigure *event, gpo
static inline void gpu_screen_to_rgb(u32* dst) static inline void gpu_screen_to_rgb(u32* dst)
{ {
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16, ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16,
dst, real_framebuffer_width * real_framebuffer_height * 2); dst, real_framebuffer_width * real_framebuffer_height * 2);
} }
@ -1591,7 +1591,7 @@ static gboolean ExposeDrawingArea (GtkWidget *widget, GdkEventExpose *event, gpo
} }
static void RedrawScreen() { static void RedrawScreen() {
ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapDst>( ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapDst>(
GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16, GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16,
(uint32_t *)video->GetSrcBufferPtr(), real_framebuffer_width * real_framebuffer_height * 2); (uint32_t *)video->GetSrcBufferPtr(), real_framebuffer_width * real_framebuffer_height * 2);
#ifdef HAVE_LIBAGG #ifdef HAVE_LIBAGG

View File

@ -1666,7 +1666,7 @@ static int ConfigureDrawingArea(GtkWidget *widget, GdkEventConfigure *event, gpo
static inline void gpu_screen_to_rgb(u32* dst) static inline void gpu_screen_to_rgb(u32* dst)
{ {
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(GPU->GetDisplayInfo().masterNativeBuffer16, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>(GPU->GetDisplayInfo().masterNativeBuffer16, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2);
} }
static inline void drawScreen(cairo_t* cr, u32* buf, gint w, gint h) { static inline void drawScreen(cairo_t* cr, u32* buf, gint w, gint h) {
@ -1791,7 +1791,7 @@ static gboolean ExposeDrawingArea (GtkWidget *widget, GdkEventExpose *event, gpo
} }
static void RedrawScreen() { static void RedrawScreen() {
ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapDst>(GPU->GetDisplayInfo().masterNativeBuffer16, (uint32_t *)video->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapDst>(GPU->GetDisplayInfo().masterNativeBuffer16, (uint32_t *)video->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2);
#ifdef HAVE_LIBAGG #ifdef HAVE_LIBAGG
aggDraw.hud->attach((u8*)video->GetSrcBufferPtr(), 256, 384, 1024); aggDraw.hud->attach((u8*)video->GetSrcBufferPtr(), 256, 384, 1024);
osd->update(); osd->update();

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2006-2018 DeSmuME team Copyright (C) 2006-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -736,7 +736,7 @@ void NDSCaptureObject::ConvertVideoSlice555Xto888(const VideoConvertParam &param
for (size_t y = param.firstLineIndex; y <= param.lastLineIndex; y++) for (size_t y = param.firstLineIndex; y <= param.lastLineIndex; y++)
{ {
ColorspaceConvertBuffer555XTo888<true, false>(src, dst, param.frameWidth); ColorspaceConvertBuffer555xTo888<true, false>(src, dst, param.frameWidth);
src += param.frameWidth; src += param.frameWidth;
dst -= param.frameWidth * 3; dst -= param.frameWidth * 3;
} }
@ -750,7 +750,7 @@ void NDSCaptureObject::ConvertVideoSlice888Xto888(const VideoConvertParam &param
for (size_t y = param.firstLineIndex; y <= param.lastLineIndex; y++) for (size_t y = param.firstLineIndex; y <= param.lastLineIndex; y++)
{ {
ColorspaceConvertBuffer888XTo888<true, false>(src, dst, param.frameWidth); ColorspaceConvertBuffer888xTo888<true, false>(src, dst, param.frameWidth);
src += param.frameWidth; src += param.frameWidth;
dst -= param.frameWidth * 3; dst -= param.frameWidth * 3;
} }

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2018 DeSmuME team Copyright (C) 2018-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -683,9 +683,9 @@ void DoDisplay()
//we have to do a copy here because we're about to draw the OSD onto it. bummer. //we have to do a copy here because we're about to draw the OSD onto it. bummer.
if (gpu_bpp == 15) if (gpu_bpp == 15)
ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapNone>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / 2); ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapNone>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / 2);
else else
ColorspaceConvertBuffer888XTo8888Opaque<true, false>((u32*)video.srcBuffer, video.buffer, video.srcBufferSize / 4); ColorspaceConvertBuffer888xTo8888Opaque<true, false>((u32*)video.srcBuffer, video.buffer, video.srcBufferSize / 4);
//some games use the backlight for fading effects //some games use the backlight for fading effects
const size_t pixCount = video.prefilterWidth * video.prefilterHeight / 2; const size_t pixCount = video.prefilterWidth * video.prefilterHeight / 2;

View File

@ -3,7 +3,7 @@
licensed under the terms supplied at the end of this file (for the terms are very long!) licensed under the terms supplied at the end of this file (for the terms are very long!)
Differences from that baseline version are: Differences from that baseline version are:
Copyright (C) 2009-2019 DeSmuME team Copyright (C) 2009-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -174,7 +174,7 @@ static void DoScreenshot(const char* fname)
else else
{ {
u32* swapbuf = (u32*)malloc_alignedCacheLine(dispInfo.customWidth * dispInfo.customHeight * 2 * 4); u32* swapbuf = (u32*)malloc_alignedCacheLine(dispInfo.customWidth * dispInfo.customHeight * 2 * 4);
ColorspaceConvertBuffer888XTo8888Opaque<true, true>((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2); ColorspaceConvertBuffer888xTo8888Opaque<true, true>((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2);
NDS_WritePNG_32bppBuffer(dispInfo.customWidth, dispInfo.customHeight*2, swapbuf, fname); NDS_WritePNG_32bppBuffer(dispInfo.customWidth, dispInfo.customHeight*2, swapbuf, fname);
free_aligned(swapbuf); free_aligned(swapbuf);
} }
@ -189,7 +189,7 @@ static void DoScreenshot(const char* fname)
else else
{ {
u32* swapbuf = (u32*)malloc_alignedCacheLine(dispInfo.customWidth * dispInfo.customHeight * 2 * 4); u32* swapbuf = (u32*)malloc_alignedCacheLine(dispInfo.customWidth * dispInfo.customHeight * 2 * 4);
ColorspaceConvertBuffer888XTo8888Opaque<true, true>((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2); ColorspaceConvertBuffer888xTo8888Opaque<true, true>((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2);
NDS_WriteBMP_32bppBuffer(dispInfo.customWidth, dispInfo.customHeight *2, swapbuf, fname); NDS_WriteBMP_32bppBuffer(dispInfo.customWidth, dispInfo.customHeight *2, swapbuf, fname);
free_aligned(swapbuf); free_aligned(swapbuf);
} }

View File

@ -3441,7 +3441,7 @@ void ScreenshotToClipboard(bool extraInfo)
else else
{ {
u32* swapbuf = (u32*)malloc_alignedPage(width*height * 4); u32* swapbuf = (u32*)malloc_alignedPage(width*height * 4);
ColorspaceConvertBuffer888XTo8888Opaque<true, false>((const u32*)dispInfo.masterCustomBuffer, swapbuf, width * height); ColorspaceConvertBuffer888xTo8888Opaque<true, false>((const u32*)dispInfo.masterCustomBuffer, swapbuf, width * height);
SetDIBitsToDevice(hMemDC, 0, 0, width, height, 0, 0, 0, height, swapbuf, (BITMAPINFO*)&bmi, DIB_RGB_COLORS); SetDIBitsToDevice(hMemDC, 0, 0, width, height, 0, 0, 0, height, swapbuf, (BITMAPINFO*)&bmi, DIB_RGB_COLORS);

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2009-2023 DeSmuME team Copyright (C) 2009-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -2032,7 +2032,7 @@ Render3DError SoftRasterizerRenderer::BeginRender(const GFX3D_State &renderState
} }
// Convert the toon table colors // Convert the toon table colors
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>(renderState.toonTable16, (u32 *)this->toonColor32LUT, 32); ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapDst>(renderState.toonTable16, (u32 *)this->toonColor32LUT, 32);
if (this->_enableEdgeMark) if (this->_enableEdgeMark)
{ {

View File

@ -1,7 +1,7 @@
/* /*
Copyright (C) 2006 yopyop Copyright (C) 2006 yopyop
Copyright (C) 2006-2007 shash Copyright (C) 2006-2007 shash
Copyright (C) 2008-2023 DeSmuME team Copyright (C) 2008-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -867,13 +867,13 @@ void __NDSTextureUnpackI2_AVX2(const size_t texelCount, const u8 *__restrict src
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555xTo6665Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555xTo6665Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555xTo8888Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555xTo8888Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
// Set converted colors to 0 if the palette index is 0. // Set converted colors to 0 if the palette index is 0.
@ -923,13 +923,13 @@ void __NDSTextureUnpackI2_SSSE3(const size_t texelCount, const u8 *__restrict sr
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555xTo6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555xTo6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555xTo8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555xTo8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
// Set converted colors to 0 if the palette index is 0. // Set converted colors to 0 if the palette index is 0.
@ -977,13 +977,13 @@ void __NDSTextureUnpackI2_NEON(const size_t texelCount, const u8 *__restrict src
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]); ColorspaceConvert555xTo6665Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To6665Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]); ColorspaceConvert555xTo6665Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]); ColorspaceConvert555xTo8888Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To8888Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]); ColorspaceConvert555xTo8888Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]);
} }
// Set converted colors to 0 if the palette index is 0. // Set converted colors to 0 if the palette index is 0.
@ -1028,13 +1028,13 @@ void __NDSTextureUnpackI2_AltiVec(const size_t texelCount, const u8 *__restrict
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]); ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]); ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]); ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]); ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]);
} }
// Set converted colors to 0 if the palette index is 0. // Set converted colors to 0 if the palette index is 0.
@ -1146,13 +1146,13 @@ void __NDSTextureUnpackI4_AVX2(const size_t texelCount, const u8 *__restrict src
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555xTo6665Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555xTo6665Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555xTo8888Opaque_AVX2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555xTo8888Opaque_AVX2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
// Set converted colors to 0 if the palette index is 0. // Set converted colors to 0 if the palette index is 0.
@ -1208,13 +1208,13 @@ void __NDSTextureUnpackI4_SSSE3(const size_t texelCount, const u8 *__restrict sr
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555xTo6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555xTo6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555xTo8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555xTo8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
// Set converted colors to 0 if the palette index is 0. // Set converted colors to 0 if the palette index is 0.
@ -1261,13 +1261,13 @@ void __NDSTextureUnpackI4_NEON(const size_t texelCount, const u8 *__restrict src
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]); ColorspaceConvert555xTo6665Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To6665Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]); ColorspaceConvert555xTo6665Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]); ColorspaceConvert555xTo8888Opaque_NEON<false>(palColor0, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To8888Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]); ColorspaceConvert555xTo8888Opaque_NEON<false>(palColor1, convertedColor.val[2], convertedColor.val[3]);
} }
// Set converted colors to 0 if the palette index is 0. // Set converted colors to 0 if the palette index is 0.
@ -1312,13 +1312,13 @@ void __NDSTextureUnpackI4_AltiVec(const size_t texelCount, const u8 *__restrict
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]); ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]); ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]); ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapDst>(palColor0, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]); ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapDst>(palColor1, convertedColor[3], convertedColor[2]);
} }
// Set converted colors to 0 if the palette index is 0. // Set converted colors to 0 if the palette index is 0.
@ -1434,13 +1434,13 @@ void __NDSTextureUnpackA3I5_NEON(const size_t texelCount, const u8 *__restrict s
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); ColorspaceConvert555aTo6665_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To6665_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); ColorspaceConvert555aTo6665_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]);
} }
else else
{ {
ColorspaceConvert555To8888_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); ColorspaceConvert555aTo8888_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To8888_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); ColorspaceConvert555aTo8888_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]);
} }
vst1q_u32_x4(dstBuffer + i, convertedColor); vst1q_u32_x4(dstBuffer + i, convertedColor);
@ -1486,13 +1486,13 @@ void __NDSTextureUnpackA3I5_AltiVec(const size_t texelCount, const u8 *__restric
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]); ColorspaceConvert555aTo6665_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To6665_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]); ColorspaceConvert555aTo6665_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
} }
else else
{ {
ColorspaceConvert555To8888_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]); ColorspaceConvert555aTo8888_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To8888_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]); ColorspaceConvert555aTo8888_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
} }
vec_st(convertedColor[0], 0, dstBuffer); vec_st(convertedColor[0], 0, dstBuffer);
@ -1566,8 +1566,8 @@ void __NDSTextureUnpackA5I3_AVX2(const size_t texelCount, const u8 *__restrict s
const v256u16 alphaLo = _mm256_unpacklo_epi8(_mm256_setzero_si256(), alpha); const v256u16 alphaLo = _mm256_unpacklo_epi8(_mm256_setzero_si256(), alpha);
const v256u16 alphaHi = _mm256_unpackhi_epi8(_mm256_setzero_si256(), alpha); const v256u16 alphaHi = _mm256_unpackhi_epi8(_mm256_setzero_si256(), alpha);
ColorspaceConvert555To6665_AVX2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]); ColorspaceConvert555aTo6665_AVX2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665_AVX2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]); ColorspaceConvert555aTo6665_AVX2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]);
} }
else else
{ {
@ -1577,8 +1577,8 @@ void __NDSTextureUnpackA5I3_AVX2(const size_t texelCount, const u8 *__restrict s
const v256u16 alphaLo = _mm256_unpacklo_epi8(_mm256_setzero_si256(), alpha); const v256u16 alphaLo = _mm256_unpacklo_epi8(_mm256_setzero_si256(), alpha);
const v256u16 alphaHi = _mm256_unpackhi_epi8(_mm256_setzero_si256(), alpha); const v256u16 alphaHi = _mm256_unpackhi_epi8(_mm256_setzero_si256(), alpha);
ColorspaceConvert555To8888_AVX2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]); ColorspaceConvert555aTo8888_AVX2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888_AVX2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]); ColorspaceConvert555aTo8888_AVX2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]);
} }
_mm256_store_si256((v256u32 *)dstBuffer + 0, convertedColor[0]); _mm256_store_si256((v256u32 *)dstBuffer + 0, convertedColor[0]);
@ -1615,8 +1615,8 @@ void __NDSTextureUnpackA5I3_SSSE3(const size_t texelCount, const u8 *__restrict
const v128u16 alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); const v128u16 alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha);
const v128u16 alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); const v128u16 alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha);
ColorspaceConvert555To6665_SSE2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]); ColorspaceConvert555aTo6665_SSE2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665_SSE2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]); ColorspaceConvert555aTo6665_SSE2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]);
} }
else else
{ {
@ -1624,8 +1624,8 @@ void __NDSTextureUnpackA5I3_SSSE3(const size_t texelCount, const u8 *__restrict
const v128u16 alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); const v128u16 alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha);
const v128u16 alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); const v128u16 alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha);
ColorspaceConvert555To8888_SSE2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]); ColorspaceConvert555aTo8888_SSE2<false>(palColor0, alphaLo, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888_SSE2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]); ColorspaceConvert555aTo8888_SSE2<false>(palColor1, alphaHi, convertedColor[2], convertedColor[3]);
} }
_mm_store_si128((v128u32 *)(dstBuffer + i) + 0, convertedColor[0]); _mm_store_si128((v128u32 *)(dstBuffer + i) + 0, convertedColor[0]);
@ -1661,8 +1661,8 @@ void __NDSTextureUnpackA5I3_NEON(const size_t texelCount, const u8 *__restrict s
const v128u16 alphaLo = vreinterpretq_u16_u8( vzip1q_u8(vdupq_n_u8(0), alpha) ); const v128u16 alphaLo = vreinterpretq_u16_u8( vzip1q_u8(vdupq_n_u8(0), alpha) );
const v128u16 alphaHi = vreinterpretq_u16_u8( vzip2q_u8(vdupq_n_u8(0), alpha) ); const v128u16 alphaHi = vreinterpretq_u16_u8( vzip2q_u8(vdupq_n_u8(0), alpha) );
ColorspaceConvert555To6665_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); ColorspaceConvert555aTo6665_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To6665_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); ColorspaceConvert555aTo6665_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]);
} }
else else
{ {
@ -1670,8 +1670,8 @@ void __NDSTextureUnpackA5I3_NEON(const size_t texelCount, const u8 *__restrict s
const v128u16 alphaLo = vreinterpretq_u16_u8( vzip1q_u8(vdupq_n_u8(0), alpha) ); const v128u16 alphaLo = vreinterpretq_u16_u8( vzip1q_u8(vdupq_n_u8(0), alpha) );
const v128u16 alphaHi = vreinterpretq_u16_u8( vzip2q_u8(vdupq_n_u8(0), alpha) ); const v128u16 alphaHi = vreinterpretq_u16_u8( vzip2q_u8(vdupq_n_u8(0), alpha) );
ColorspaceConvert555To8888_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); ColorspaceConvert555aTo8888_NEON<false>(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]);
ColorspaceConvert555To8888_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); ColorspaceConvert555aTo8888_NEON<false>(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]);
} }
vst1q_u32_x4(dstBuffer + i, convertedColor); vst1q_u32_x4(dstBuffer + i, convertedColor);
@ -1707,8 +1707,8 @@ void __NDSTextureUnpackA5I3_AltiVec(const size_t texelCount, const u8 *__restric
const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) ); const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) );
const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) ); const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) );
ColorspaceConvert555To6665_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]); ColorspaceConvert555aTo6665_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To6665_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]); ColorspaceConvert555aTo6665_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
} }
else else
{ {
@ -1716,8 +1716,8 @@ void __NDSTextureUnpackA5I3_AltiVec(const size_t texelCount, const u8 *__restric
const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) ); const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) );
const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) ); const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) );
ColorspaceConvert555To8888_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]); ColorspaceConvert555aTo8888_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555To8888_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]); ColorspaceConvert555aTo8888_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
} }
vec_st(convertedColor[0], 0, dstBuffer); vec_st(convertedColor[0], 0, dstBuffer);
@ -1900,11 +1900,11 @@ void __NDSTextureUnpackDirect16Bit_AVX2(const size_t texelCount, const u16 *__re
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665Opaque_AVX2<false>(c, convertedColor[0], convertedColor[1]); ColorspaceConvert555xTo6665Opaque_AVX2<false>(c, convertedColor[0], convertedColor[1]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_AVX2<false>(c, convertedColor[0], convertedColor[1]); ColorspaceConvert555xTo8888Opaque_AVX2<false>(c, convertedColor[0], convertedColor[1]);
} }
v256u16 alpha = _mm256_cmpeq_epi16(_mm256_srli_epi16(c, 15), _mm256_set1_epi16(1)); v256u16 alpha = _mm256_cmpeq_epi16(_mm256_srli_epi16(c, 15), _mm256_set1_epi16(1));
@ -1930,11 +1930,11 @@ void __NDSTextureUnpackDirect16Bit_SSE2(const size_t texelCount, const u16 *__re
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665Opaque_SSE2<false>(c, convertedColor[0], convertedColor[1]); ColorspaceConvert555xTo6665Opaque_SSE2<false>(c, convertedColor[0], convertedColor[1]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_SSE2<false>(c, convertedColor[0], convertedColor[1]); ColorspaceConvert555xTo8888Opaque_SSE2<false>(c, convertedColor[0], convertedColor[1]);
} }
const v128u16 alpha = _mm_cmpeq_epi16(_mm_srli_epi16(c, 15), _mm_set1_epi16(1)); const v128u16 alpha = _mm_cmpeq_epi16(_mm_srli_epi16(c, 15), _mm_set1_epi16(1));
@ -1959,11 +1959,11 @@ void __NDSTextureUnpackDirect16Bit_NEON(const size_t texelCount, const u16 *__re
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665Opaque_NEON<false>(c, convertedColor.val[0], convertedColor.val[1]); ColorspaceConvert555xTo6665Opaque_NEON<false>(c, convertedColor.val[0], convertedColor.val[1]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_NEON<false>(c, convertedColor.val[0], convertedColor.val[1]); ColorspaceConvert555xTo8888Opaque_NEON<false>(c, convertedColor.val[0], convertedColor.val[1]);
} }
const v128u16 alpha = vceqq_u16(vshrq_n_u16(c,15), vdupq_n_u16(1)); const v128u16 alpha = vceqq_u16(vshrq_n_u16(c,15), vdupq_n_u16(1));
@ -1987,11 +1987,11 @@ void __NDSTextureUnpackDirect16Bit_AltiVec(const size_t texelCount, const u16 *_
if (TEXCACHEFORMAT == TexFormat_15bpp) if (TEXCACHEFORMAT == TexFormat_15bpp)
{ {
ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapSrcDst>(c, convertedColor[1], convertedColor[0]); ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapSrcDst>(c, convertedColor[1], convertedColor[0]);
} }
else else
{ {
ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapSrcDst>(c, convertedColor[1], convertedColor[0]); ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapSrcDst>(c, convertedColor[1], convertedColor[0]);
} }
const v128u16 alpha = vec_and(c, ((v128u16){0x0080,0x0080,0x0080,0x0080,0x0080,0x0080,0x0080,0x0080})); const v128u16 alpha = vec_and(c, ((v128u16){0x0080,0x0080,0x0080,0x0080,0x0080,0x0080,0x0080,0x0080}));

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2023 DeSmuME team Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -187,7 +187,7 @@ void ColorspaceHandlerInit()
} }
template <bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> template <bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP>
void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{ {
size_t i = 0; size_t i = 0;
@ -198,22 +198,22 @@ void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__re
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
i = csh.ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector); i = csh.ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
} }
else else
{ {
i = csh.ConvertBuffer555To8888Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCountVector); i = csh.ConvertBuffer555xTo8888Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCountVector);
} }
} }
else else
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
i = csh.ConvertBuffer555To8888Opaque_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector); i = csh.ConvertBuffer555xTo8888Opaque_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
} }
else else
{ {
i = csh.ConvertBuffer555To8888Opaque<BE_BYTESWAP>(src, dst, pixCountVector); i = csh.ConvertBuffer555xTo8888Opaque<BE_BYTESWAP>(src, dst, pixCountVector);
} }
} }
@ -243,7 +243,7 @@ void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__re
} }
template <bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> template <bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP>
void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{ {
size_t i = 0; size_t i = 0;
@ -254,22 +254,22 @@ void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__re
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
i = csh.ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector); i = csh.ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
} }
else else
{ {
i = csh.ConvertBuffer555To6665Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCountVector); i = csh.ConvertBuffer555xTo6665Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCountVector);
} }
} }
else else
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
i = csh.ConvertBuffer555To6665Opaque_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector); i = csh.ConvertBuffer555xTo6665Opaque_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
} }
else else
{ {
i = csh.ConvertBuffer555To6665Opaque<BE_BYTESWAP>(src, dst, pixCountVector); i = csh.ConvertBuffer555xTo6665Opaque<BE_BYTESWAP>(src, dst, pixCountVector);
} }
} }
@ -298,6 +298,119 @@ void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__re
} }
} }
template <bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP>
void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
const size_t pixCountVector = pixCount - (pixCount % (VECTORSIZE / sizeof(u16)));
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer5551To8888_SwapRB_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer5551To8888_SwapRB<BE_BYTESWAP>(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer5551To8888_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer5551To8888<BE_BYTESWAP>(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
switch (BE_BYTESWAP)
{
case BESwapNone:
dst[i] = ColorspaceConvert5551To8888<SWAP_RB>(src[i]);
break;
case BESwapIn:
dst[i] = ColorspaceConvert5551To8888<SWAP_RB>(LE_TO_LOCAL_16(src[i]));
break;
case BESwapOut:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888<SWAP_RB>(src[i]) );
break;
case BESwapInOut:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888<SWAP_RB>(LE_TO_LOCAL_16(src[i])) );
break;
}
}
}
template <bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP>
void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
const size_t pixCountVector = pixCount - (pixCount % (VECTORSIZE / sizeof(u16)));
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer5551To6665_SwapRB_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer5551To6665_SwapRB<BE_BYTESWAP>(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer5551To6665_IsUnaligned<BE_BYTESWAP>(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer5551To6665<BE_BYTESWAP>(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
switch (BE_BYTESWAP)
{
case BESwapNone:
dst[i] = ColorspaceConvert5551To6665<SWAP_RB>(src[i]);
break;
case BESwapIn:
dst[i] = ColorspaceConvert5551To6665<SWAP_RB>(LE_TO_LOCAL_16(src[i]));
break;
case BESwapOut:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665<SWAP_RB>(src[i]) );
break;
case BESwapInOut:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665<SWAP_RB>(LE_TO_LOCAL_16(src[i])) );
break;
}
}
}
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
{ {
@ -455,7 +568,7 @@ void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restric
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount)
{ {
size_t i = 0; size_t i = 0;
@ -466,22 +579,22 @@ void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pi
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
i = csh.ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); i = csh.ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
} }
else else
{ {
i = csh.ConvertBuffer888XTo8888Opaque_SwapRB(src, dst, pixCountVector); i = csh.ConvertBuffer888xTo8888Opaque_SwapRB(src, dst, pixCountVector);
} }
} }
else else
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
i = csh.ConvertBuffer888XTo8888Opaque_IsUnaligned(src, dst, pixCountVector); i = csh.ConvertBuffer888xTo8888Opaque_IsUnaligned(src, dst, pixCountVector);
} }
else else
{ {
i = csh.ConvertBuffer888XTo8888Opaque(src, dst, pixCountVector); i = csh.ConvertBuffer888xTo8888Opaque(src, dst, pixCountVector);
} }
} }
@ -494,7 +607,7 @@ void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pi
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount)
{ {
size_t i = 0; size_t i = 0;
@ -505,22 +618,22 @@ void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
i = csh.ConvertBuffer555XTo888_SwapRB_IsUnaligned(src, dst, pixCountVector); i = csh.ConvertBuffer555xTo888_SwapRB_IsUnaligned(src, dst, pixCountVector);
} }
else else
{ {
i = csh.ConvertBuffer555XTo888_SwapRB(src, dst, pixCountVector); i = csh.ConvertBuffer555xTo888_SwapRB(src, dst, pixCountVector);
} }
} }
else else
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
i = csh.ConvertBuffer555XTo888_IsUnaligned(src, dst, pixCountVector); i = csh.ConvertBuffer555xTo888_IsUnaligned(src, dst, pixCountVector);
} }
else else
{ {
i = csh.ConvertBuffer555XTo888(src, dst, pixCountVector); i = csh.ConvertBuffer555xTo888(src, dst, pixCountVector);
} }
} }
@ -533,7 +646,7 @@ void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount)
{ {
size_t i = 0; size_t i = 0;
@ -544,22 +657,22 @@ void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
i = csh.ConvertBuffer888XTo888_SwapRB_IsUnaligned(src, dst, pixCountVector); i = csh.ConvertBuffer888xTo888_SwapRB_IsUnaligned(src, dst, pixCountVector);
} }
else else
{ {
i = csh.ConvertBuffer888XTo888_SwapRB(src, dst, pixCountVector); i = csh.ConvertBuffer888xTo888_SwapRB(src, dst, pixCountVector);
} }
} }
else else
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
i = csh.ConvertBuffer888XTo888_IsUnaligned(src, dst, pixCountVector); i = csh.ConvertBuffer888xTo888_IsUnaligned(src, dst, pixCountVector);
} }
else else
{ {
i = csh.ConvertBuffer888XTo888(src, dst, pixCountVector); i = csh.ConvertBuffer888xTo888(src, dst, pixCountVector);
} }
} }
@ -811,7 +924,7 @@ void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensi
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
size_t i = 0; size_t i = 0;
@ -841,7 +954,7 @@ size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
size_t i = 0; size_t i = 0;
@ -871,19 +984,19 @@ size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restr
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque<BE_BYTESWAP>(src, dst, pixCount); return this->ColorspaceHandler::ConvertBuffer555xTo8888Opaque<BE_BYTESWAP>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCount); return this->ColorspaceHandler::ConvertBuffer555xTo8888Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
size_t i = 0; size_t i = 0;
@ -913,7 +1026,7 @@ size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
size_t i = 0; size_t i = 0;
@ -943,15 +1056,159 @@ size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restr
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque<BE_BYTESWAP>(src, dst, pixCount); return this->ColorspaceHandler::ConvertBuffer555xTo6665Opaque<BE_BYTESWAP>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCount); return this->ColorspaceHandler::ConvertBuffer555xTo6665Opaque_SwapRB<BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
switch (BE_BYTESWAP)
{
case BESwapNone:
dst[i] = ColorspaceConvert5551To8888<false>(src[i]);
break;
case BESwapSrc:
dst[i] = ColorspaceConvert5551To8888<false>(LE_TO_LOCAL_16(src[i]));
break;
case BESwapDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888<false>(src[i]) );
break;
case BESwapSrcDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888<false>(LE_TO_LOCAL_16(src[i])) );
break;
}
}
return i;
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
switch (BE_BYTESWAP)
{
case BESwapNone:
dst[i] = ColorspaceConvert5551To8888<true>(src[i]);
break;
case BESwapSrc:
dst[i] = ColorspaceConvert5551To8888<true>(LE_TO_LOCAL_16(src[i]));
break;
case BESwapDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888<true>(src[i]) );
break;
case BESwapSrcDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888<true>(LE_TO_LOCAL_16(src[i])) );
break;
}
}
return i;
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer5551To8888<BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer5551To8888_SwapRB<BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
switch (BE_BYTESWAP)
{
case BESwapNone:
dst[i] = ColorspaceConvert5551To6665<false>(src[i]);
break;
case BESwapSrc:
dst[i] = ColorspaceConvert5551To6665<false>(LE_TO_LOCAL_16(src[i]));
break;
case BESwapDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665<false>(src[i]) );
break;
case BESwapSrcDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665<false>(LE_TO_LOCAL_16(src[i])) );
break;
}
}
return i;
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
switch (BE_BYTESWAP)
{
case BESwapNone:
dst[i] = ColorspaceConvert5551To6665<true>(src[i]);
break;
case BESwapSrc:
dst[i] = ColorspaceConvert5551To6665<true>(LE_TO_LOCAL_16(src[i]));
break;
case BESwapDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665<true>(src[i]) );
break;
case BESwapSrcDst:
dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665<true>(LE_TO_LOCAL_16(src[i])) );
break;
}
}
return i;
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer5551To6665<BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer5551To6665_SwapRB<BE_BYTESWAP>(src, dst, pixCount);
} }
size_t ColorspaceHandler::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
@ -1090,7 +1347,7 @@ size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *
return this->ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(src, dst, pixCount); return this->ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(src, dst, pixCount);
} }
size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{ {
size_t i = 0; size_t i = 0;
@ -1102,7 +1359,7 @@ size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst
return i; return i;
} }
size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{ {
size_t i = 0; size_t i = 0;
@ -1114,17 +1371,17 @@ size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u
return i; return i;
} }
size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return this->ConvertBuffer888XTo8888Opaque(src, dst, pixCount); return this->ConvertBuffer888xTo8888Opaque(src, dst, pixCount);
} }
size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return this->ConvertBuffer888XTo8888Opaque_SwapRB(src, dst, pixCount); return this->ConvertBuffer888xTo8888Opaque_SwapRB(src, dst, pixCount);
} }
size_t ColorspaceHandler::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
size_t i = 0; size_t i = 0;
@ -1136,7 +1393,7 @@ size_t ColorspaceHandler::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *
return i; return i;
} }
size_t ColorspaceHandler::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
size_t i = 0; size_t i = 0;
@ -1148,17 +1405,17 @@ size_t ColorspaceHandler::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict sr
return i; return i;
} }
size_t ColorspaceHandler::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return this->ConvertBuffer555XTo888(src, dst, pixCount); return this->ConvertBuffer555xTo888(src, dst, pixCount);
} }
size_t ColorspaceHandler::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return this->ConvertBuffer555XTo888_SwapRB(src, dst, pixCount); return this->ConvertBuffer555xTo888_SwapRB(src, dst, pixCount);
} }
size_t ColorspaceHandler::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
size_t i = 0; size_t i = 0;
@ -1170,7 +1427,7 @@ size_t ColorspaceHandler::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *
return i; return i;
} }
size_t ColorspaceHandler::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
size_t i = 0; size_t i = 0;
@ -1182,14 +1439,14 @@ size_t ColorspaceHandler::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict sr
return i; return i;
} }
size_t ColorspaceHandler::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return this->ConvertBuffer888XTo888(src, dst, pixCount); return this->ConvertBuffer888xTo888(src, dst, pixCount);
} }
size_t ColorspaceHandler::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return this->ConvertBuffer888XTo888_SwapRB(src, dst, pixCount); return this->ConvertBuffer888xTo888_SwapRB(src, dst, pixCount);
} }
size_t ColorspaceHandler::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const size_t ColorspaceHandler::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const
@ -1396,39 +1653,73 @@ size_t ColorspaceHandler::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst,
return this->ApplyIntensityToBuffer32_SwapRB(dst, pixCount, intensity); return this->ApplyIntensityToBuffer32_SwapRB(dst, pixCount, intensity);
} }
template void ColorspaceConvertBuffer555To8888Opaque<true, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<true, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<false, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<true, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<false, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<true, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<false, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<true, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<true, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<false, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<true, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<true, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<false, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<true, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<true, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<false, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<true, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<true, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<false, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<true, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<true, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<false, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo6665Opaque<false, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<true, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To8888<false, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, true, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, false, BESwapNone>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, true, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, false, BESwapIn>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, true, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, false, BESwapOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<true, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, true, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer5551To6665<false, false, BESwapInOut>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<true, true>(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer8888To6665<true, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<true, false>(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer8888To6665<true, false>(const u32 *src, u32 *dst, size_t pixCount);
@ -1450,20 +1741,20 @@ template void ColorspaceConvertBuffer6665To5551<true, false>(const u32 *__restri
template void ColorspaceConvertBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo8888Opaque<true, true>(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer888xTo8888Opaque<true, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo8888Opaque<true, false>(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer888xTo8888Opaque<true, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo8888Opaque<false, true>(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer888xTo8888Opaque<false, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo8888Opaque<false, false>(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer888xTo8888Opaque<false, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer555XTo888<true, true>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo888<true, true>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555XTo888<true, false>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo888<true, false>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555XTo888<false, true>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo888<false, true>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555XTo888<false, false>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555xTo888<false, false>(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo888<true, true>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer888xTo888<true, true>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo888<true, false>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer888xTo888<true, false>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo888<false, true>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer888xTo888<false, true>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer888XTo888<false, false>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer888xTo888<false, false>(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template void ColorspaceCopyBuffer16<true, true>(const u16 *src, u16 *dst, size_t pixCount); template void ColorspaceCopyBuffer16<true, true>(const u16 *src, u16 *dst, size_t pixCount);
template void ColorspaceCopyBuffer16<true, false>(const u16 *src, u16 *dst, size_t pixCount); template void ColorspaceCopyBuffer16<true, false>(const u16 *src, u16 *dst, size_t pixCount);

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2023 DeSmuME team Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -126,6 +126,26 @@ FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src)
return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF); return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
} }
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert5551To8888(const u16 src)
{
Color4u8 outColor;
outColor.value = (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF);
outColor.a = (src & 0x8000) ? 0xFF : 0x00;
return outColor.value;
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert5551To6665(const u16 src)
{
Color4u8 outColor;
outColor.value = (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
outColor.a = (src & 0x8000) ? 0x1F : 0x00;
return outColor.value;
}
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert8888To6665(Color4u8 srcColor) FORCEINLINE u32 ColorspaceConvert8888To6665(Color4u8 srcColor)
{ {
@ -331,16 +351,18 @@ FORCEINLINE u32 ColorspaceApplyIntensity32(u32 srcColor, float intensity)
return ColorspaceApplyIntensity32<SWAP_RB>(srcColorComponent); return ColorspaceApplyIntensity32<SWAP_RB>(srcColorComponent);
} }
template<bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template<bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template<bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED, BESwapFlags BE_BYTESWAP> void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount); template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount); template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceCopyBuffer16(const u16 *src, u16 *dst, size_t pixCount); template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceCopyBuffer16(const u16 *src, u16 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceCopyBuffer32(const u32 *src, u32 *dst, size_t pixCount); template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceCopyBuffer32(const u32 *src, u32 *dst, size_t pixCount);
@ -353,15 +375,25 @@ class ColorspaceHandler
public: public:
ColorspaceHandler() {}; ColorspaceHandler() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
@ -383,20 +415,20 @@ public:
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const;

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2021 DeSmuME team Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -25,7 +25,7 @@
#include <immintrin.h> #include <immintrin.h>
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi) FORCEINLINE void ColorspaceConvert555aTo8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -64,7 +64,7 @@ FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo888x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -101,7 +101,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi) FORCEINLINE void ColorspaceConvert555aTo6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -141,7 +141,7 @@ FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo666x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -178,17 +178,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{ {
const v256u16 srcAlphaBits16 = _mm256_set1_epi16(0xFF00); const v256u16 srcAlphaBits16 = _mm256_set1_epi16(0xFF00);
ColorspaceConvert555To8888_AVX2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi); ColorspaceConvert555aTo8888_AVX2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{ {
const v256u16 srcAlphaBits16 = _mm256_set1_epi16(0x1F00); const v256u16 srcAlphaBits16 = _mm256_set1_epi16(0x1F00);
ColorspaceConvert555To6665_AVX2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi); ColorspaceConvert555aTo6665_AVX2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To8888_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
const v256u16 srcAlphaBits16 = _mm256_and_si256( _mm256_cmpgt_epi16(srcColor, _mm256_set1_epi16(0xFFFF)), _mm256_set1_epi16(0xFF00) );
ColorspaceConvert555aTo8888_AVX2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To6665_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
const v256u16 srcAlphaBits16 = _mm256_and_si256( _mm256_cmpgt_epi16(srcColor, _mm256_set1_epi16(0xFFFF)), _mm256_set1_epi16(0x1F00) );
ColorspaceConvert555aTo6665_AVX2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
@ -320,7 +334,7 @@ FORCEINLINE v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src) FORCEINLINE v256u32 ColorspaceConvert888xTo8888Opaque_AVX2(const v256u32 &src)
{ {
if (SWAP_RB) if (SWAP_RB)
{ {
@ -407,7 +421,7 @@ FORCEINLINE v256u32 ColorspaceApplyIntensity32_AVX2(const v256u32 &src, float in
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256) static size_t ColorspaceConvertBuffer555xTo8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256)
{ {
size_t i = 0; size_t i = 0;
@ -415,7 +429,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict
{ {
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i)); v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi; v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi); ColorspaceConvert555xTo8888Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
@ -433,7 +447,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256) size_t ColorspaceConvertBuffer555xTo6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256)
{ {
size_t i = 0; size_t i = 0;
@ -441,7 +455,59 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u3
{ {
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i)); v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi; v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi); ColorspaceConvert555xTo6665Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi);
}
else
{
_mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer5551To8888_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=(sizeof(v256u16)/sizeof(u16)))
{
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To8888_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi);
}
else
{
_mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer5551To6665_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=(sizeof(v256u16)/sizeof(u16)))
{
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To6665_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
@ -539,7 +605,7 @@ size_t ColorspaceConvertBuffer6665To5551_AVX2(const u32 *__restrict src, u16 *__
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256) size_t ColorspaceConvertBuffer888xTo8888Opaque_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
{ {
size_t i = 0; size_t i = 0;
@ -547,11 +613,11 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, si
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) ); _mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
} }
else else
{ {
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) ); _mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
} }
} }
@ -559,7 +625,7 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, si
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555XTo888_AVX2(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec256) size_t ColorspaceConvertBuffer555xTo888_AVX2(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec256)
{ {
size_t i = 0; size_t i = 0;
v256u16 src_v256u16[2]; v256u16 src_v256u16[2];
@ -636,7 +702,7 @@ size_t ColorspaceConvertBuffer555XTo888_AVX2(const u16 *__restrict src, u8 *__re
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo888_AVX2(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec256) size_t ColorspaceConvertBuffer888xTo888_AVX2(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec256)
{ {
size_t i = 0; size_t i = 0;
v256u32 src_v256u32[4]; v256u32 src_v256u32[4];
@ -905,51 +971,99 @@ size_t ColorspaceApplyIntensityToBuffer32_AVX2(u32 *dst, size_t pixCountVec256,
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_AVX2<false, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_AVX2<true, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_AVX2<false, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_AVX2<true, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_AVX2<false, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_AVX2<true, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_AVX2<false, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_AVX2<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX2<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX2<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX2<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX2<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX2<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX2<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX2<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX2<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
@ -1032,64 +1146,64 @@ size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const
return ColorspaceConvertBuffer6665To5551_AVX2<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer6665To5551_AVX2<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_AVX2<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_AVX2<false, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_AVX2<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_AVX2<true, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_AVX2<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_AVX2<false, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_AVX2<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_AVX2<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_AVX2<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_AVX2<false, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_AVX2<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_AVX2<true, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_AVX2<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_AVX2<false, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_AVX2<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_AVX2<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_AVX2<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_AVX2<false, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_AVX2<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_AVX2<true, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_AVX2<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_AVX2<false, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_AVX2<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_AVX2<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX2::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const size_t ColorspaceHandler_AVX2::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const
@ -1152,23 +1266,23 @@ size_t ColorspaceHandler_AVX2::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *
return ColorspaceApplyIntensityToBuffer32_AVX2<true, true>(dst, pixCount, intensity); return ColorspaceApplyIntensityToBuffer32_AVX2<true, true>(dst, pixCount, intensity);
} }
template void ColorspaceConvert555To8888_AVX2<true>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); template void ColorspaceConvert555aTo8888_AVX2<true>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888_AVX2<false>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); template void ColorspaceConvert555aTo8888_AVX2<false>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555XTo888X_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template void ColorspaceConvert555xTo888x_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555XTo888X_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template void ColorspaceConvert555xTo888x_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665_AVX2<true>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); template void ColorspaceConvert555aTo6665_AVX2<true>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665_AVX2<false>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); template void ColorspaceConvert555aTo6665_AVX2<false>(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555XTo666X_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template void ColorspaceConvert555xTo666x_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555XTo666X_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template void ColorspaceConvert555xTo666x_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template v256u32 ColorspaceConvert8888To6665_AVX2<true>(const v256u32 &src); template v256u32 ColorspaceConvert8888To6665_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceConvert8888To6665_AVX2<false>(const v256u32 &src); template v256u32 ColorspaceConvert8888To6665_AVX2<false>(const v256u32 &src);
@ -1182,8 +1296,8 @@ template v256u16 ColorspaceConvert8888To5551_AVX2<false>(const v256u32 &srcLo, c
template v256u16 ColorspaceConvert6665To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi); template v256u16 ColorspaceConvert6665To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u16 ColorspaceConvert6665To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi); template v256u16 ColorspaceConvert6665To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u32 ColorspaceConvert888XTo8888Opaque_AVX2<true>(const v256u32 &src); template v256u32 ColorspaceConvert888xTo8888Opaque_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceConvert888XTo8888Opaque_AVX2<false>(const v256u32 &src); template v256u32 ColorspaceConvert888xTo8888Opaque_AVX2<false>(const v256u32 &src);
template v256u16 ColorspaceCopy16_AVX2<true>(const v256u16 &src); template v256u16 ColorspaceCopy16_AVX2<true>(const v256u16 &src);
template v256u16 ColorspaceCopy16_AVX2<false>(const v256u16 &src); template v256u16 ColorspaceCopy16_AVX2<false>(const v256u16 &src);

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2021 DeSmuME team Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -24,17 +24,19 @@
#warning This header requires AVX2 support. #warning This header requires AVX2 support.
#else #else
template<bool SWAP_RB> void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555aTo8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo888x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555aTo6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo666x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To8888_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To6665_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src); template<bool SWAP_RB> v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src); template<bool SWAP_RB> v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi); template<bool SWAP_RB> v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
template<bool SWAP_RB> v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi); template<bool SWAP_RB> v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
template<bool SWAP_RB> v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src); template<bool SWAP_RB> v256u32 ColorspaceConvert888xTo8888Opaque_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u16 ColorspaceCopy16_AVX2(const v256u16 &src); template<bool SWAP_RB> v256u16 ColorspaceCopy16_AVX2(const v256u16 &src);
template<bool SWAP_RB> v256u32 ColorspaceCopy32_AVX2(const v256u32 &src); template<bool SWAP_RB> v256u32 ColorspaceCopy32_AVX2(const v256u32 &src);
@ -47,15 +49,25 @@ class ColorspaceHandler_AVX2 : public ColorspaceHandler
public: public:
ColorspaceHandler_AVX2() {}; ColorspaceHandler_AVX2() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
@ -77,20 +89,20 @@ public:
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const;

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2021 DeSmuME team Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -25,7 +25,7 @@
#include <immintrin.h> #include <immintrin.h>
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi) FORCEINLINE void ColorspaceConvert555aTo8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -44,7 +44,7 @@ FORCEINLINE void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, cons
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo888x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -62,7 +62,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v51
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi) FORCEINLINE void ColorspaceConvert555aTo6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -81,7 +81,7 @@ FORCEINLINE void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, cons
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo666x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -99,17 +99,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v51
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
{ {
const v512u16 srcAlphaBits16 = _mm512_set1_epi16(0xFF00); const v512u16 srcAlphaBits16 = _mm512_set1_epi16(0xFF00);
ColorspaceConvert555To8888_AVX512<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi); ColorspaceConvert555aTo8888_AVX512<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
{ {
const v512u16 srcAlphaBits16 = _mm512_set1_epi16(0x1F00); const v512u16 srcAlphaBits16 = _mm512_set1_epi16(0x1F00);
ColorspaceConvert555To6665_AVX512<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi); ColorspaceConvert555aTo6665_AVX512<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To8888_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
{
const v512u16 srcAlphaBits16 = _mm512_and_si512( _mm512_cmpgt_epi16(srcColor, _mm512_set1_epi16(0xFFFF)), _mm512_set1_epi16(0xFF00) );
ColorspaceConvert555aTo8888_AVX512<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To6665_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi)
{
const v512u16 srcAlphaBits16 = _mm512_and_si512( _mm512_cmpgt_epi16(srcColor, _mm512_set1_epi16(0xFFFF)), _mm512_set1_epi16(0x1F00) );
ColorspaceConvert555aTo6665_AVX512<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
@ -239,7 +253,7 @@ FORCEINLINE v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, con
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE v512u32 ColorspaceConvert888XTo8888Opaque_AVX512(const v512u32 &src) FORCEINLINE v512u32 ColorspaceConvert888xTo8888Opaque_AVX512(const v512u32 &src)
{ {
if (SWAP_RB) if (SWAP_RB)
{ {
@ -326,7 +340,7 @@ FORCEINLINE v512u32 ColorspaceApplyIntensity32_AVX512(const v512u32 &src, float
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec512) static size_t ColorspaceConvertBuffer555xTo8888Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec512)
{ {
size_t i = 0; size_t i = 0;
@ -334,7 +348,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX512(const u16 *__restric
{ {
v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i)); v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i));
v512u32 dstConvertedLo, dstConvertedHi; v512u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_AVX512<SWAP_RB>(src_vec512, dstConvertedLo, dstConvertedHi); ColorspaceConvert555xTo8888Opaque_AVX512<SWAP_RB>(src_vec512, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
@ -352,7 +366,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX512(const u16 *__restric
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555To6665Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec512) size_t ColorspaceConvertBuffer555xTo6665Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec512)
{ {
size_t i = 0; size_t i = 0;
@ -360,7 +374,59 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AVX512(const u16 *__restrict src,
{ {
v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i)); v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i));
v512u32 dstConvertedLo, dstConvertedHi; v512u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_AVX512<SWAP_RB>(src_vec512, dstConvertedLo, dstConvertedHi); ColorspaceConvert555xTo6665Opaque_AVX512<SWAP_RB>(src_vec512, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi);
}
else
{
_mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer5551To8888_AVX512(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec512)
{
size_t i = 0;
for (; i < pixCountVec512; i+=(sizeof(v512u16)/sizeof(u16)))
{
v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i));
v512u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To8888_AVX512<SWAP_RB>(src_vec512, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi);
}
else
{
_mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer5551To6665_AVX512(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec512)
{
size_t i = 0;
for (; i < pixCountVec512; i+=(sizeof(v512u16)/sizeof(u16)))
{
v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i));
v512u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To6665_AVX512<SWAP_RB>(src_vec512, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
@ -458,7 +524,7 @@ size_t ColorspaceConvertBuffer6665To5551_AVX512(const u32 *__restrict src, u16 *
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX512(const u32 *src, u32 *dst, size_t pixCountVec512) size_t ColorspaceConvertBuffer888xTo8888Opaque_AVX512(const u32 *src, u32 *dst, size_t pixCountVec512)
{ {
size_t i = 0; size_t i = 0;
@ -466,11 +532,11 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX512(const u32 *src, u32 *dst,
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
_mm512_storeu_si512( (v512u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX512<SWAP_RB>(_mm512_loadu_si512((v512u32 *)(src+i))) ); _mm512_storeu_si512( (v512u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX512<SWAP_RB>(_mm512_loadu_si512((v512u32 *)(src+i))) );
} }
else else
{ {
_mm512_store_si512( (v512u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX512<SWAP_RB>(_mm512_load_si512((v512u32 *)(src+i))) ); _mm512_store_si512( (v512u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX512<SWAP_RB>(_mm512_load_si512((v512u32 *)(src+i))) );
} }
} }
@ -478,7 +544,7 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX512(const u32 *src, u32 *dst,
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555XTo888_AVX512(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec512) size_t ColorspaceConvertBuffer555xTo888_AVX512(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec512)
{ {
size_t i = 0; size_t i = 0;
v512u16 src_v512u16[2]; v512u16 src_v512u16[2];
@ -572,7 +638,7 @@ size_t ColorspaceConvertBuffer555XTo888_AVX512(const u16 *__restrict src, u8 *__
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo888_AVX512(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec512) size_t ColorspaceConvertBuffer888xTo888_AVX512(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec512)
{ {
size_t i = 0; size_t i = 0;
v512u32 src_v512u32[4]; v512u32 src_v512u32[4];
@ -858,51 +924,99 @@ size_t ColorspaceApplyIntensityToBuffer32_AVX512(u32 *dst, size_t pixCountVec512
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_AVX512<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_AVX512<false, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_AVX512<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_AVX512<true, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_AVX512<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_AVX512<false, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_AVX512<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_AVX512<true, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_AVX512<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_AVX512<false, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_AVX512<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_AVX512<true, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_AVX512<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_AVX512<false, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_AVX512<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_AVX512<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX512<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX512<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX512<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AVX512<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX512<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX512<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX512<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AVX512<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
@ -985,64 +1099,64 @@ size_t ColorspaceHandler_AVX512::ConvertBuffer6665To5551_SwapRB_IsUnaligned(cons
return ColorspaceConvertBuffer6665To5551_AVX512<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer6665To5551_AVX512<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_AVX512<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_AVX512<false, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_AVX512<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_AVX512<true, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_AVX512<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_AVX512<false, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_AVX512<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_AVX512<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_AVX512<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_AVX512<false, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_AVX512<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_AVX512<true, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_AVX512<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_AVX512<false, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_AVX512<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_AVX512<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_AVX512<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_AVX512<false, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_AVX512<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_AVX512<true, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_AVX512<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_AVX512<false, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_AVX512<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_AVX512<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AVX512::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const size_t ColorspaceHandler_AVX512::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const
@ -1105,23 +1219,29 @@ size_t ColorspaceHandler_AVX512::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32
return ColorspaceApplyIntensityToBuffer32_AVX512<true, true>(dst, pixCount, intensity); return ColorspaceApplyIntensityToBuffer32_AVX512<true, true>(dst, pixCount, intensity);
} }
template void ColorspaceConvert555To8888_AVX512<true>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); template void ColorspaceConvert555aTo8888_AVX512<true>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To8888_AVX512<false>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); template void ColorspaceConvert555aTo8888_AVX512<false>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555XTo888X_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template void ColorspaceConvert555xTo888x_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555XTo888X_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template void ColorspaceConvert555xTo888x_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To6665_AVX512<true>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); template void ColorspaceConvert555aTo6665_AVX512<true>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To6665_AVX512<false>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); template void ColorspaceConvert555aTo6665_AVX512<false>(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555XTo666X_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template void ColorspaceConvert555xTo666x_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555XTo666X_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template void ColorspaceConvert555xTo666x_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert5551To8888_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert5551To8888_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert5551To6665_AVX512<true>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template void ColorspaceConvert5551To6665_AVX512<false>(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template v512u32 ColorspaceConvert8888To6665_AVX512<true>(const v512u32 &src); template v512u32 ColorspaceConvert8888To6665_AVX512<true>(const v512u32 &src);
template v512u32 ColorspaceConvert8888To6665_AVX512<false>(const v512u32 &src); template v512u32 ColorspaceConvert8888To6665_AVX512<false>(const v512u32 &src);
@ -1135,8 +1255,8 @@ template v512u16 ColorspaceConvert8888To5551_AVX512<false>(const v512u32 &srcLo,
template v512u16 ColorspaceConvert6665To5551_AVX512<true>(const v512u32 &srcLo, const v512u32 &srcHi); template v512u16 ColorspaceConvert6665To5551_AVX512<true>(const v512u32 &srcLo, const v512u32 &srcHi);
template v512u16 ColorspaceConvert6665To5551_AVX512<false>(const v512u32 &srcLo, const v512u32 &srcHi); template v512u16 ColorspaceConvert6665To5551_AVX512<false>(const v512u32 &srcLo, const v512u32 &srcHi);
template v512u32 ColorspaceConvert888XTo8888Opaque_AVX512<true>(const v512u32 &src); template v512u32 ColorspaceConvert888xTo8888Opaque_AVX512<true>(const v512u32 &src);
template v512u32 ColorspaceConvert888XTo8888Opaque_AVX512<false>(const v512u32 &src); template v512u32 ColorspaceConvert888xTo8888Opaque_AVX512<false>(const v512u32 &src);
template v512u16 ColorspaceCopy16_AVX512<true>(const v512u16 &src); template v512u16 ColorspaceCopy16_AVX512<true>(const v512u16 &src);
template v512u16 ColorspaceCopy16_AVX512<false>(const v512u16 &src); template v512u16 ColorspaceCopy16_AVX512<false>(const v512u16 &src);

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2021 DeSmuME team Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -24,17 +24,19 @@
#warning This header requires AVX-512 Tier-1 support. #warning This header requires AVX-512 Tier-1 support.
#else #else
template<bool SWAP_RB> void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555aTo8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo888x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555aTo6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo666x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To8888_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To6665_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi);
template<bool SWAP_RB> v512u32 ColorspaceConvert8888To6665_AVX512(const v512u32 &src); template<bool SWAP_RB> v512u32 ColorspaceConvert8888To6665_AVX512(const v512u32 &src);
template<bool SWAP_RB> v512u32 ColorspaceConvert6665To8888_AVX512(const v512u32 &src); template<bool SWAP_RB> v512u32 ColorspaceConvert6665To8888_AVX512(const v512u32 &src);
template<bool SWAP_RB> v512u16 ColorspaceConvert8888To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi); template<bool SWAP_RB> v512u16 ColorspaceConvert8888To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi);
template<bool SWAP_RB> v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi); template<bool SWAP_RB> v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi);
template<bool SWAP_RB> v512u32 ColorspaceConvert888XTo8888Opaque_AVX512(const v512u32 &src); template<bool SWAP_RB> v512u32 ColorspaceConvert888xTo8888Opaque_AVX512(const v512u32 &src);
template<bool SWAP_RB> v512u16 ColorspaceCopy16_AVX512(const v512u16 &src); template<bool SWAP_RB> v512u16 ColorspaceCopy16_AVX512(const v512u16 &src);
template<bool SWAP_RB> v512u32 ColorspaceCopy32_AVX512(const v512u32 &src); template<bool SWAP_RB> v512u32 ColorspaceCopy32_AVX512(const v512u32 &src);
@ -47,15 +49,25 @@ class ColorspaceHandler_AVX512 : public ColorspaceHandler
public: public:
ColorspaceHandler_AVX512() {}; ColorspaceHandler_AVX512() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
@ -77,20 +89,20 @@ public:
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const;

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2022 DeSmuME team Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -24,7 +24,7 @@
#include <string.h> #include <string.h>
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP> template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -65,14 +65,14 @@ FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, con
} }
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP> template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{ {
const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0}; const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0};
ColorspaceConvert555To8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi); ColorspaceConvert555aTo8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
} }
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP> template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -113,24 +113,38 @@ FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, con
} }
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP> template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{ {
const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0}; const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0};
ColorspaceConvert555To6665_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi); ColorspaceConvert555aTo6665_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
} }
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP> template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{ {
const v128u16 srcAlphaBits16 = {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}; const v128u16 srcAlphaBits16 = {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF};
ColorspaceConvert555To8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi); ColorspaceConvert555aTo8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
} }
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP> template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{ {
const v128u16 srcAlphaBits16 = {0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F}; const v128u16 srcAlphaBits16 = {0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F};
ColorspaceConvert555To6665_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi); ColorspaceConvert555aTo6665_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert5551To8888_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = (v128u16)vec_cmpgt( (v128s16)srcColor, ((v128s16){0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF}) );
ColorspaceConvert555aTo8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert5551To6665_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = vec_and( (v128u16)vec_cmpgt( (v128s16)srcColor, ((v128s16){0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF}) ), ((v128u16){0x1F1F,0x1F1F,0x1F1F,0x1F1F, 0x1F1F,0x1F1F,0x1F1F,0x1F1F, 0x1F1F,0x1F1F,0x1F1F,0x1F1F, 0x1F1F,0x1F1F,0x1F1F,0x1F1F}) );
ColorspaceConvert555aTo6665_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
@ -230,7 +244,7 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, co
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src) FORCEINLINE v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec(const v128u32 &src)
{ {
if (SWAP_RB) if (SWAP_RB)
{ {
@ -263,7 +277,7 @@ FORCEINLINE v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src)
} }
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP> template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) static size_t ColorspaceConvertBuffer555xTo8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{ {
size_t i = 0; size_t i = 0;
@ -271,7 +285,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restri
{ {
v128u32 dstConvertedLo, dstConvertedHi; v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); ColorspaceConvert555xTo8888Opaque_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i); vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i); vec_st(dstConvertedLo, 16, dst+i);
} }
@ -280,7 +294,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restri
} }
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP> template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) size_t ColorspaceConvertBuffer555xTo6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{ {
size_t i = 0; size_t i = 0;
@ -288,7 +302,41 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src,
{ {
v128u32 dstConvertedLo, dstConvertedHi; v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); ColorspaceConvert555xTo6665Opaque_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
}
return i;
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
static size_t ColorspaceConvertBuffer5551To8888_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=sizeof(v128u16)/sizeof(u16))
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To8888_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
}
return i;
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
size_t ColorspaceConvertBuffer5551To6665_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=sizeof(v128u16)/sizeof(u16))
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To6665_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i); vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i); vec_st(dstConvertedLo, 16, dst+i);
} }
@ -349,20 +397,20 @@ size_t ColorspaceConvertBuffer6665To5551_AltiVec(const u32 *__restrict src, u16
} }
template <bool SWAP_RB> template <bool SWAP_RB>
size_t ColorspaceConvertBuffer888XTo8888Opaque_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128) size_t ColorspaceConvertBuffer888xTo8888Opaque_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
{ {
size_t i = 0; size_t i = 0;
for (; i < pixCountVec128; i+=4) for (; i < pixCountVec128; i+=4)
{ {
vec_st( ColorspaceConvert888XTo8888Opaque_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i ); vec_st( ColorspaceConvert888xTo8888Opaque_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
} }
return i; return i;
} }
template <bool SWAP_RB> template <bool SWAP_RB>
size_t ColorspaceConvertBuffer555XTo888_AltiVec(const u16 *src, u8 *dst, size_t pixCountVec128) size_t ColorspaceConvertBuffer555xTo888_AltiVec(const u16 *src, u8 *dst, size_t pixCountVec128)
{ {
size_t i = 0; size_t i = 0;
v128u16 src_v128u16[2]; v128u16 src_v128u16[2];
@ -405,7 +453,7 @@ size_t ColorspaceConvertBuffer555XTo888_AltiVec(const u16 *src, u8 *dst, size_t
} }
template <bool SWAP_RB> template <bool SWAP_RB>
size_t ColorspaceConvertBuffer888XTo888_AltiVec(const u32 *src, u8 *dst, size_t pixCountVec128) size_t ColorspaceConvertBuffer888xTo888_AltiVec(const u32 *src, u8 *dst, size_t pixCountVec128)
{ {
size_t i = 0; size_t i = 0;
v128u32 src_v128u32[4]; v128u32 src_v128u32[4];
@ -477,27 +525,51 @@ size_t ColorspaceCopyBuffer32_AltiVec(const u32 *src, u32 *dst, size_t pixCountV
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<false, BE_BYTESWAP>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_AltiVec<false, BE_BYTESWAP>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<true, BE_BYTESWAP>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_AltiVec<true, BE_BYTESWAP>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<false, BE_BYTESWAP>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_AltiVec<false, BE_BYTESWAP>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<true, BE_BYTESWAP>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_AltiVec<true, BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AltiVec<false, BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_AltiVec<true, BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AltiVec<false, BE_BYTESWAP>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_AltiVec<true, BE_BYTESWAP>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
@ -540,34 +612,34 @@ size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551_SwapRB(const u32 *__re
return ColorspaceConvertBuffer6665To5551_AltiVec<true>(src, dst, pixCount); return ColorspaceConvertBuffer6665To5551_AltiVec<true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_AltiVec<false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_AltiVec<false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_AltiVec<true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_AltiVec<true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AltiVec::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_AltiVec<false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_AltiVec<false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AltiVec::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_AltiVec<true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_AltiVec<true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_AltiVec<false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_AltiVec<false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_AltiVec<true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_AltiVec<true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_AltiVec::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const size_t ColorspaceHandler_AltiVec::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const
@ -580,59 +652,59 @@ size_t ColorspaceHandler_AltiVec::CopyBuffer32_SwapRB(const u32 *src, u32 *dst,
return ColorspaceCopyBuffer32_AltiVec<true>(src, dst, pixCount); return ColorspaceCopyBuffer32_AltiVec<true>(src, dst, pixCount);
} }
template void ColorspaceConvert555To8888_AltiVec<true, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo8888_AltiVec<true, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<false, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo8888_AltiVec<false, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo8888_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo8888_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<true, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo8888_AltiVec<true, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<false, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo8888_AltiVec<false, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo8888_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo8888_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo888x_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo888x_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo888x_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo888x_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo888x_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo888x_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo888x_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo888x_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<true, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo6665_AltiVec<true, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<false, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo6665_AltiVec<false, BESwapNone>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo6665_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo6665_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<true, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo6665_AltiVec<true, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<false, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo6665_AltiVec<false, BESwapDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo6665_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo6665_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo666x_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo666x_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo666x_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo666x_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo666x_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo666x_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo666x_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo666x_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_AltiVec<true, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapNone>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_AltiVec<true, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapSrc>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_AltiVec<true, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_AltiVec<true, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_AltiVec<false, BESwapSrcDst>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template v128u32 ColorspaceConvert8888To6665_AltiVec<true>(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_AltiVec<true>(const v128u32 &src);
template v128u32 ColorspaceConvert8888To6665_AltiVec<false>(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_AltiVec<false>(const v128u32 &src);
@ -646,8 +718,8 @@ template v128u16 ColorspaceConvert8888To5551_AltiVec<false>(const v128u32 &srcLo
template v128u16 ColorspaceConvert6665To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec<true>(const v128u32 &src); template v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec<true>(const v128u32 &src);
template v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec<false>(const v128u32 &src); template v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec<false>(const v128u32 &src);
template v128u16 ColorspaceCopy16_AltiVec<true>(const v128u16 &src); template v128u16 ColorspaceCopy16_AltiVec<true>(const v128u16 &src);
template v128u16 ColorspaceCopy16_AltiVec<false>(const v128u16 &src); template v128u16 ColorspaceCopy16_AltiVec<false>(const v128u16 &src);

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2021 DeSmuME team Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -24,17 +24,17 @@
#warning This header requires PowerPC AltiVec support. #warning This header requires PowerPC AltiVec support.
#else #else
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB, BESwapFlags BE_BYTESWAP> void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src); template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src); template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi); template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi); template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src); template<bool SWAP_RB> v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceCopy16_AltiVec(const v128u16 &src); template<bool SWAP_RB> v128u16 ColorspaceCopy16_AltiVec(const v128u16 &src);
template<bool SWAP_RB> v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src); template<bool SWAP_RB> v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src);
@ -46,11 +46,17 @@ class ColorspaceHandler_AltiVec : public ColorspaceHandler
public: public:
ColorspaceHandler_AltiVec() {}; ColorspaceHandler_AltiVec() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
@ -64,14 +70,14 @@ public:
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2022 DeSmuME team Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -28,7 +28,7 @@
#define COLOR32_SWAPRB_NEON(src) vreinterpretq_u32_u8( vqtbl1q_u8(vreinterpretq_u8_u32(src), ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15})) ) #define COLOR32_SWAPRB_NEON(src) vreinterpretq_u32_u8( vqtbl1q_u8(vreinterpretq_u8_u32(src), ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15})) )
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555aTo8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -60,7 +60,7 @@ FORCEINLINE void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo888x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -90,7 +90,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555aTo6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -122,7 +122,7 @@ FORCEINLINE void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo666x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -152,17 +152,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{ {
const v128u16 srcAlphaBits16 = vdupq_n_u16(0xFF00); const v128u16 srcAlphaBits16 = vdupq_n_u16(0xFF00);
ColorspaceConvert555To8888_NEON<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi); ColorspaceConvert555aTo8888_NEON<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{ {
const v128u16 srcAlphaBits16 = vdupq_n_u16(0x1F00); const v128u16 srcAlphaBits16 = vdupq_n_u16(0x1F00);
ColorspaceConvert555To6665_NEON<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi); ColorspaceConvert555aTo6665_NEON<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To8888_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128s16 srcAlphaBits16 = vandq_s16( vcgtq_s16(vreinterpretq_u16_s16(srcColor), vdupq_n_s16(0xFFFF)), vdupq_n_s16(0xFF00) );
ColorspaceConvert555aTo8888_NEON<SWAP_RB>(srcColor, vreinterpretq_s16_u16(srcAlphaBits16), dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To6665_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128s16 srcAlphaBits16 = vandq_s16( vcgtq_s16(vreinterpretq_u16_s16(srcColor), vdupq_n_s16(0xFFFF)), vdupq_n_s16(0x1F00) );
ColorspaceConvert555aTo6665_NEON<SWAP_RB>(srcColor, vreinterpretq_s16_u16(srcAlphaBits16), dstLo, dstHi);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
@ -290,7 +304,7 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src) FORCEINLINE v128u32 ColorspaceConvert888xTo8888Opaque_NEON(const v128u32 &src)
{ {
if (SWAP_RB) if (SWAP_RB)
{ {
@ -377,7 +391,7 @@ FORCEINLINE v128u32 ColorspaceApplyIntensity32_NEON(const v128u32 &src, float in
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) static size_t ColorspaceConvertBuffer555xTo8888Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{ {
size_t i = 0; size_t i = 0;
v128u16 srcVec; v128u16 srcVec;
@ -386,7 +400,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_NEON(const u16 *__restrict
for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16)))
{ {
srcVec = vld1q_u16(src+i); srcVec = vld1q_u16(src+i);
ColorspaceConvert555To8888Opaque_NEON<SWAP_RB>(srcVec, dstVec.val[0], dstVec.val[1]); ColorspaceConvert555xTo8888Opaque_NEON<SWAP_RB>(srcVec, dstVec.val[0], dstVec.val[1]);
vst1q_u32_x2(dst+i, dstVec); vst1q_u32_x2(dst+i, dstVec);
} }
@ -394,7 +408,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_NEON(const u16 *__restrict
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555To6665Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) size_t ColorspaceConvertBuffer555xTo6665Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{ {
size_t i = 0; size_t i = 0;
v128u16 srcVec; v128u16 srcVec;
@ -403,7 +417,41 @@ size_t ColorspaceConvertBuffer555To6665Opaque_NEON(const u16 *__restrict src, u3
for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16)))
{ {
srcVec = vld1q_u16(src+i); srcVec = vld1q_u16(src+i);
ColorspaceConvert555To6665Opaque_NEON<SWAP_RB>(srcVec, dstVec.val[0], dstVec.val[1]); ColorspaceConvert555xTo6665Opaque_NEON<SWAP_RB>(srcVec, dstVec.val[0], dstVec.val[1]);
vst1q_u32_x2(dst+i, dstVec);
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer5551To8888_NEON(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
v128u16 srcVec;
uint32x4x2_t dstVec;
for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16)))
{
srcVec = vld1q_u16(src+i);
ColorspaceConvert5551To8888_NEON<SWAP_RB>(srcVec, dstVec.val[0], dstVec.val[1]);
vst1q_u32_x2(dst+i, dstVec);
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer5551To6665_NEON(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
v128u16 srcVec;
uint32x4x2_t dstVec;
for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16)))
{
srcVec = vld1q_u16(src+i);
ColorspaceConvert5551To6665_NEON<SWAP_RB>(srcVec, dstVec.val[0], dstVec.val[1]);
vst1q_u32_x2(dst+i, dstVec); vst1q_u32_x2(dst+i, dstVec);
} }
@ -467,7 +515,7 @@ size_t ColorspaceConvertBuffer6665To5551_NEON(const u32 *__restrict src, u16 *__
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo8888Opaque_NEON(const u32 *src, u32 *dst, size_t pixCountVec128) size_t ColorspaceConvertBuffer888xTo8888Opaque_NEON(const u32 *src, u32 *dst, size_t pixCountVec128)
{ {
size_t i = 0; size_t i = 0;
uint8x16x4_t srcVec_x4; uint8x16x4_t srcVec_x4;
@ -491,7 +539,7 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_NEON(const u32 *src, u32 *dst, si
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555XTo888_NEON(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec128) size_t ColorspaceConvertBuffer555xTo888_NEON(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec128)
{ {
size_t i = 0; size_t i = 0;
uint16x8x2_t srcVec; uint16x8x2_t srcVec;
@ -529,7 +577,7 @@ size_t ColorspaceConvertBuffer555XTo888_NEON(const u16 *__restrict src, u8 *__re
} }
template <bool SWAP_RB, bool IS_UNALIGNED> template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer888XTo888_NEON(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec128) size_t ColorspaceConvertBuffer888xTo888_NEON(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec128)
{ {
size_t i = 0; size_t i = 0;
uint8x16x4_t srcVec_x4; uint8x16x4_t srcVec_x4;
@ -723,51 +771,99 @@ size_t ColorspaceApplyIntensityToBuffer32_NEON(u32 *dst, size_t pixCountVec128,
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_NEON<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_NEON<false, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_NEON<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_NEON<true, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_NEON<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_NEON<false, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_NEON<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo8888Opaque_NEON<true, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_NEON<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_NEON<false, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_NEON<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_NEON<true, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_NEON<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_NEON<false, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_NEON<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo6665Opaque_NEON<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_NEON<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_NEON<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_NEON<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_NEON<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_NEON<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_NEON<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_NEON<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_NEON<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
@ -850,64 +946,64 @@ size_t ColorspaceHandler_NEON::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const
return ColorspaceConvertBuffer6665To5551_NEON<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer6665To5551_NEON<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_NEON<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_NEON<false, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_NEON<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_NEON<true, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_NEON<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_NEON<false, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_NEON<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo8888Opaque_NEON<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_NEON<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_NEON<false, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_NEON<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_NEON<true, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_NEON<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_NEON<false, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_NEON<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer555xTo888_NEON<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_NEON<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_NEON<false, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_NEON<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_NEON<true, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_NEON<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_NEON<false, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_NEON<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer888xTo888_NEON<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_NEON::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const size_t ColorspaceHandler_NEON::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const
@ -970,23 +1066,29 @@ size_t ColorspaceHandler_NEON::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *
return ColorspaceApplyIntensityToBuffer32_NEON<true, true>(dst, pixCount, intensity); return ColorspaceApplyIntensityToBuffer32_NEON<true, true>(dst, pixCount, intensity);
} }
template void ColorspaceConvert555To8888_NEON<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo8888_NEON<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_NEON<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo8888_NEON<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo888x_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo888x_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_NEON<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo6665_NEON<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_NEON<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo6665_NEON<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo666x_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo666x_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert5551To8888_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert5551To8888_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert5551To6665_NEON<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert5551To6665_NEON<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template v128u32 ColorspaceConvert8888To6665_NEON<true>(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_NEON<true>(const v128u32 &src);
template v128u32 ColorspaceConvert8888To6665_NEON<false>(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_NEON<false>(const v128u32 &src);
@ -1000,8 +1102,8 @@ template v128u16 ColorspaceConvert8888To5551_NEON<false>(const v128u32 &srcLo, c
template v128u16 ColorspaceConvert6665To5551_NEON<true>(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_NEON<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_NEON<false>(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_NEON<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u32 ColorspaceConvert888XTo8888Opaque_NEON<true>(const v128u32 &src); template v128u32 ColorspaceConvert888xTo8888Opaque_NEON<true>(const v128u32 &src);
template v128u32 ColorspaceConvert888XTo8888Opaque_NEON<false>(const v128u32 &src); template v128u32 ColorspaceConvert888xTo8888Opaque_NEON<false>(const v128u32 &src);
template v128u16 ColorspaceCopy16_NEON<true>(const v128u16 &src); template v128u16 ColorspaceCopy16_NEON<true>(const v128u16 &src);
template v128u16 ColorspaceCopy16_NEON<false>(const v128u16 &src); template v128u16 ColorspaceCopy16_NEON<false>(const v128u16 &src);

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2022 DeSmuME team Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -24,17 +24,19 @@
#warning This header requires ARM64 NEON support. #warning This header requires ARM64 NEON support.
#else #else
template<bool SWAP_RB> void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555aTo8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo888x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555aTo6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo666x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To8888_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To6665_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_NEON(const v128u32 &src); template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_NEON(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_NEON(const v128u32 &src); template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_NEON(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi); template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi); template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u32 C6olorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src); template<bool SWAP_RB> v128u32 ColorspaceConvert888xTo8888Opaque_NEON(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceCopy16_NEON(const v128u16 &src); template<bool SWAP_RB> v128u16 ColorspaceCopy16_NEON(const v128u16 &src);
template<bool SWAP_RB> v128u32 ColorspaceCopy32_NEON(const v128u32 &src); template<bool SWAP_RB> v128u32 ColorspaceCopy32_NEON(const v128u32 &src);
@ -47,15 +49,25 @@ class ColorspaceHandler_NEON : public ColorspaceHandler
public: public:
ColorspaceHandler_NEON() {}; ColorspaceHandler_NEON() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
@ -77,20 +89,20 @@ public:
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;
size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const;

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2021 DeSmuME team Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -33,7 +33,7 @@
#endif #endif
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555aTo8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -66,7 +66,7 @@ FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo888x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
@ -97,7 +97,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555aTo6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -131,7 +131,7 @@ FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo666x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{ {
// Conversion algorithm: // Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
@ -162,17 +162,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{ {
const v128u16 srcAlphaBits16 = _mm_set1_epi16(0xFF00); const v128u16 srcAlphaBits16 = _mm_set1_epi16(0xFF00);
ColorspaceConvert555To8888_SSE2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi); ColorspaceConvert555aTo8888_SSE2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) FORCEINLINE void ColorspaceConvert555xTo6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{ {
const v128u16 srcAlphaBits16 = _mm_set1_epi16(0x1F00); const v128u16 srcAlphaBits16 = _mm_set1_epi16(0x1F00);
ColorspaceConvert555To6665_SSE2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi); ColorspaceConvert555aTo6665_SSE2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To8888_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = _mm_and_si128( _mm_cmpgt_epi16(srcColor, _mm_set1_epi16(0xFFFF)), _mm_set1_epi16(0xFF00) );
ColorspaceConvert555aTo8888_SSE2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert5551To6665_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = _mm_and_si128( _mm_cmpgt_epi16(srcColor, _mm_set1_epi16(0xFFFF)), _mm_set1_epi16(0x1F00) );
ColorspaceConvert555aTo6665_SSE2<SWAP_RB>(srcColor, srcAlphaBits16, dstLo, dstHi);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
@ -315,7 +329,7 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src) FORCEINLINE v128u32 ColorspaceConvert888xTo8888Opaque_SSE2(const v128u32 &src)
{ {
if (SWAP_RB) if (SWAP_RB)
{ {
@ -422,7 +436,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_SSE2(const u16 *__restrict
{ {
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i)); v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi; v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi); ColorspaceConvert555xTo8888Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
@ -448,7 +462,59 @@ size_t ColorspaceConvertBuffer555To6665Opaque_SSE2(const u16 *__restrict src, u3
{ {
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i)); v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi; v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi); ColorspaceConvert555xTo6665Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi);
}
else
{
_mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer5551To8888_SSE2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16)))
{
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To8888_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi);
}
else
{
_mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo);
_mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer5551To6665_SSE2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16)))
{
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To6665_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
@ -554,11 +620,11 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_SSE2(const u32 *src, u32 *dst, si
{ {
if (IS_UNALIGNED) if (IS_UNALIGNED)
{ {
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) ); _mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
} }
else else
{ {
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) ); _mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
} }
} }
@ -937,53 +1003,101 @@ size_t ColorspaceApplyIntensityToBuffer32_SSE2(u32 *dst, size_t pixCountVec128,
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, false>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP> template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, true>(src, dst, pixCount);
} }
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_SSE2<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_SSE2<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_SSE2<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To8888_SSE2<true, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_SSE2<false, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_SSE2<true, false>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_SSE2<false, true>(src, dst, pixCount);
}
template <BESwapFlags BE_BYTESWAP>
size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer5551To6665_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer8888To6665_SSE2<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer8888To6665_SSE2<false, false>(src, dst, pixCount);
@ -1064,64 +1178,64 @@ size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const
return ColorspaceConvertBuffer6665To5551_SSE2<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer6665To5551_SSE2<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<false, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<true, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<false, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer888XTo8888Opaque_SSE2<true, true>(src, dst, pixCount);
} }
#ifdef ENABLE_SSSE3 #ifdef ENABLE_SSSE3
size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_SSSE3<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer555XTo888_SSSE3<false, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_SSSE3<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer555XTo888_SSSE3<true, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_SSSE3<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer555XTo888_SSSE3<false, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer555XTo888_SSSE3<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer555XTo888_SSSE3<true, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_SSSE3<false, false>(src, dst, pixCount); return ColorspaceConvertBuffer888XTo888_SSSE3<false, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_SSSE3<true, false>(src, dst, pixCount); return ColorspaceConvertBuffer888XTo888_SSSE3<true, false>(src, dst, pixCount);
} }
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_SSSE3<false, true>(src, dst, pixCount); return ColorspaceConvertBuffer888XTo888_SSSE3<false, true>(src, dst, pixCount);
} }
size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const
{ {
return ColorspaceConvertBuffer888XTo888_SSSE3<true, true>(src, dst, pixCount); return ColorspaceConvertBuffer888XTo888_SSSE3<true, true>(src, dst, pixCount);
} }
@ -1188,23 +1302,23 @@ size_t ColorspaceHandler_SSE2::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *
return ColorspaceApplyIntensityToBuffer32_SSE2<true, true>(dst, pixCount, intensity); return ColorspaceApplyIntensityToBuffer32_SSE2<true, true>(dst, pixCount, intensity);
} }
template void ColorspaceConvert555To8888_SSE2<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo8888_SSE2<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_SSE2<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo8888_SSE2<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo888x_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo888X_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo888x_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_SSE2<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo6665_SSE2<true>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_SSE2<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555aTo6665_SSE2<false>(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo666x_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555XTo666X_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo666x_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo8888Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template void ColorspaceConvert555xTo6665Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template v128u32 ColorspaceConvert8888To6665_SSE2<true>(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceConvert8888To6665_SSE2<false>(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_SSE2<false>(const v128u32 &src);
@ -1218,8 +1332,8 @@ template v128u16 ColorspaceConvert8888To5551_SSE2<false>(const v128u32 &srcLo, c
template v128u16 ColorspaceConvert6665To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u32 ColorspaceConvert888XTo8888Opaque_SSE2<true>(const v128u32 &src); template v128u32 ColorspaceConvert888xTo8888Opaque_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceConvert888XTo8888Opaque_SSE2<false>(const v128u32 &src); template v128u32 ColorspaceConvert888xTo8888Opaque_SSE2<false>(const v128u32 &src);
template v128u16 ColorspaceCopy16_SSE2<true>(const v128u16 &src); template v128u16 ColorspaceCopy16_SSE2<true>(const v128u16 &src);
template v128u16 ColorspaceCopy16_SSE2<false>(const v128u16 &src); template v128u16 ColorspaceCopy16_SSE2<false>(const v128u16 &src);

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2021 DeSmuME team Copyright (C) 2016-2024 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -24,17 +24,19 @@
#warning This header requires SSE2 support. #warning This header requires SSE2 support.
#else #else
template<bool SWAP_RB> void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555aTo8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo888x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555aTo6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo666x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template<bool SWAP_RB> void ColorspaceConvert555xTo6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To8888_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert5551To6665_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src); template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src); template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi); template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi); template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src); template<bool SWAP_RB> v128u32 ColorspaceConvert888xTo8888Opaque_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceCopy16_SSE2(const v128u16 &src); template<bool SWAP_RB> v128u16 ColorspaceCopy16_SSE2(const v128u16 &src);
template<bool SWAP_RB> v128u32 ColorspaceCopy32_SSE2(const v128u32 &src); template<bool SWAP_RB> v128u32 ColorspaceCopy32_SSE2(const v128u32 &src);
@ -47,15 +49,25 @@ class ColorspaceHandler_SSE2 : public ColorspaceHandler
public: public:
ColorspaceHandler_SSE2() {}; ColorspaceHandler_SSE2() {};
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
template<BESwapFlags BE_BYTESWAP> size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
@ -77,21 +89,21 @@ public:
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
#ifdef ENABLE_SSSE3 #ifdef ENABLE_SSSE3
size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const;
#endif #endif
size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;