From 586aea5310594bd54c410dde9bdeed9f1dc60449 Mon Sep 17 00:00:00 2001 From: rogerman Date: Mon, 15 Jul 2024 16:24:02 -0700 Subject: [PATCH] Colorspace Handler: Add new functions for 16-bit to 18-bit and 32-bit color conversion, now respecting the 16-bit color's alpha bit. - ColorspaceConvert5551To8888() - ColorspaceConvert5551To6665() - ColorspaceConvertBuffer5551To8888() - ColorspaceConvertBuffer5551To6665() - Also rename the existing 16-bit color conversion functions to help further distinguish the functions from one another. --- desmume/src/FIFO.cpp | 6 +- desmume/src/GPU.cpp | 32 +- desmume/src/GPU_Operations_AVX2.cpp | 58 +-- desmume/src/GPU_Operations_SSE2.cpp | 58 +-- .../frontend/cocoa/ClientAVCaptureObject.cpp | 4 +- .../src/frontend/cocoa/OGLDisplayOutput.cpp | 6 +- desmume/src/frontend/cocoa/cocoa_rom.mm | 4 +- .../src/frontend/cocoa/cocoa_videofilter.mm | 4 +- .../userinterface/MacMetalDisplayView.mm | 6 +- desmume/src/frontend/posix/gtk/main.cpp | 4 +- desmume/src/frontend/posix/gtk2/main.cpp | 4 +- desmume/src/frontend/windows/aviout.cpp | 6 +- desmume/src/frontend/windows/display.cpp | 6 +- desmume/src/frontend/windows/hotkey.cpp | 6 +- desmume/src/frontend/windows/main.cpp | 2 +- desmume/src/rasterize.cpp | 4 +- desmume/src/texcache.cpp | 130 ++--- .../colorspacehandler/colorspacehandler.cpp | 491 ++++++++++++++---- .../colorspacehandler/colorspacehandler.h | 84 ++- .../colorspacehandler_AVX2.cpp | 260 +++++++--- .../colorspacehandler_AVX2.h | 68 ++- .../colorspacehandler_AVX512.cpp | 266 +++++++--- .../colorspacehandler_AVX512.h | 68 ++- .../colorspacehandler_AltiVec.cpp | 252 +++++---- .../colorspacehandler_AltiVec.h | 42 +- .../colorspacehandler_NEON.cpp | 244 ++++++--- .../colorspacehandler_NEON.h | 68 ++- .../colorspacehandler_SSE2.cpp | 210 ++++++-- .../colorspacehandler_SSE2.h | 68 ++- 29 files changed, 1680 insertions(+), 781 deletions(-) mode change 100755 => 100644 desmume/src/frontend/cocoa/OGLDisplayOutput.cpp diff --git a/desmume/src/FIFO.cpp b/desmume/src/FIFO.cpp index ae812cefc..e0489c909 100755 --- a/desmume/src/FIFO.cpp +++ b/desmume/src/FIFO.cpp @@ -1,7 +1,7 @@ /* Copyright 2006 yopyop Copyright 2007 shash - Copyright 2007-2023 DeSmuME team + Copyright 2007-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -489,11 +489,11 @@ void DISP_FIFOrecv_LineOpaque(u32 *__restrict dst) } else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvertBuffer555To6665Opaque((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo6665Opaque((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); } else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) { - ColorspaceConvertBuffer555To8888Opaque((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); } _DISP_FIFOrecv_LineAdvance(); diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index c3a24bd07..6a720e019 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -2,7 +2,7 @@ Copyright (C) 2006 yopyop Copyright (C) 2006-2007 Theo Berkau Copyright (C) 2007 shash - Copyright (C) 2008-2023 DeSmuME team + Copyright (C) 2008-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1032,12 +1032,12 @@ void GPUEngineBase::_TransitionLineNativeToCustom(GPUEngineCompositorInfo &compI { if ( (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (compInfo.line.renderCount == 1) ) { - ColorspaceConvertBuffer555To6665Opaque((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo6665Opaque((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH); } else { u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32(); - ColorspaceConvertBuffer555To6665Opaque((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo6665Opaque((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(compInfo.line, workingNativeBuffer32 + compInfo.line.blockOffsetNative, compInfo.target.lineColorHeadCustom); } break; @@ -1047,12 +1047,12 @@ void GPUEngineBase::_TransitionLineNativeToCustom(GPUEngineCompositorInfo &compI { if ( (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (compInfo.line.renderCount == 1) ) { - ColorspaceConvertBuffer555To8888Opaque((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH); } else { u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32(); - ColorspaceConvertBuffer555To8888Opaque((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(compInfo.line, workingNativeBuffer32 + compInfo.line.blockOffsetNative, compInfo.target.lineColorHeadCustom); } break; @@ -3729,7 +3729,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA { if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) { - ColorspaceConvertBuffer555To8888Opaque(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); } this->_RenderLine_DispCapture_Copy(lineInfo, srcBPtr, dstCustomPtr, captureLengthExt); @@ -3755,7 +3755,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA else { u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32(); - ColorspaceConvertBuffer555To8888Opaque((u16 *)srcAPtr, workingNativeBuffer32 + lineInfo.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque((u16 *)srcAPtr, workingNativeBuffer32 + lineInfo.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, workingNativeBuffer32 + lineInfo.blockOffsetNative, this->_captureWorkingA32); srcAPtr = this->_captureWorkingA32; } @@ -3772,7 +3772,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA { if ((OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) && (DISPCAPCNT.SrcB != 0)) { - ColorspaceConvertBuffer555To8888Opaque(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); } CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, srcBPtr, this->_captureWorkingB32); @@ -3919,7 +3919,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI { if (willReadNativeVRAM) { - ColorspaceConvertBuffer555To8888Opaque(vramNative16, (u32 *)vramCustom32, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque(vramNative16, (u32 *)vramCustom32, GPU_FRAMEBUFFER_NATIVE_WIDTH); } } @@ -4435,7 +4435,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const GPUEngineLineInfo &lineInfo) { const u16 *src = (u16 *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + lineInfo.blockOffsetCustom; u32 *dst = (u32 *)customBuffer + lineInfo.blockOffsetCustom; - ColorspaceConvertBuffer555To6665Opaque(src, dst, lineInfo.pixelCount); + ColorspaceConvertBuffer555xTo6665Opaque(src, dst, lineInfo.pixelCount); break; } @@ -5780,11 +5780,11 @@ void GPUSubsystem::_ConvertAndUpscaleForLoadstate(const NDSDisplayID displayID, switch (this->_display[displayID]->GetColorFormat()) { case NDSColorFormat_BGR666_Rev: - ColorspaceConvertBuffer555To6665Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555xTo6665Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); break; case NDSColorFormat_BGR888_Rev: - ColorspaceConvertBuffer555To8888Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555xTo8888Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); break; default: @@ -6212,11 +6212,11 @@ void NDSDisplay::ResolveLinesDisplayedNative() { if (this->_customColorFormat == NDSColorFormat_BGR888_Rev) { - ColorspaceConvertBuffer555To8888Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH); } else { - ColorspaceConvertBuffer555To6665Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo6665Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH); } CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, working, dst); @@ -6256,7 +6256,7 @@ void NDSDisplay::ResolveFramebufferToCustom(NDSDisplayInfo &mutableInfo) { case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR888_Rev: - ColorspaceConvertBuffer555To8888Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555xTo8888Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); break; default: @@ -6298,7 +6298,7 @@ void NDSDisplay::ResolveFramebufferToCustom(NDSDisplayInfo &mutableInfo) case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR888_Rev: - ColorspaceConvertBuffer555To8888Opaque(this->_nativeBuffer16, (u32 *)this->_customBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555xTo8888Opaque(this->_nativeBuffer16, (u32 *)this->_customBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); break; } } diff --git a/desmume/src/GPU_Operations_AVX2.cpp b/desmume/src/GPU_Operations_AVX2.cpp index 78e5e9823..d064a7370 100644 --- a/desmume/src/GPU_Operations_AVX2.cpp +++ b/desmume/src/GPU_Operations_AVX2.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2021-2023 DeSmuME team + Copyright (C) 2021-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1128,13 +1128,13 @@ FORCEINLINE void PixelOperation_AVX2::_copy16(GPUEngineCompositorInfo &compInfo, if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555To6665Opaque_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555To6665Opaque_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo6665Opaque_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo6665Opaque_AVX2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555To8888Opaque_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555To8888Opaque_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo8888Opaque_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo8888Opaque_AVX2(src1, src32[2], src32[3]); } _mm256_store_si256( (v256u32 *)compInfo.target.lineColor32 + 0, src32[0] ); @@ -1205,13 +1205,13 @@ FORCEINLINE void PixelOperation_AVX2::_copyMask16(GPUEngineCompositorInfo &compI if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555To6665Opaque_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555To6665Opaque_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo6665Opaque_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo6665Opaque_AVX2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555To8888Opaque_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555To8888Opaque_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo8888Opaque_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo8888Opaque_AVX2(src1, src32[2], src32[3]); } passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8); @@ -1304,13 +1304,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessUp16(GPUEngineCompositorInfo &c if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_AVX2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo666X_AVX2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo666x_AVX2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo666x_AVX2(src1, dst[2], dst[3]); } else { - ColorspaceConvert555XTo888X_AVX2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo888X_AVX2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo888x_AVX2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo888x_AVX2(src1, dst[2], dst[3]); } const v256u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm256_set1_epi32(0x1F000000) : _mm256_set1_epi32(0xFF000000); @@ -1377,13 +1377,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessUpMask16(GPUEngineCompositorInf if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo666X_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo666x_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo666x_AVX2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555XTo888X_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo888X_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo888x_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo888x_AVX2(src1, src32[2], src32[3]); } passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8); @@ -1471,13 +1471,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessDown16(GPUEngineCompositorInfo if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_AVX2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo666X_AVX2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo666x_AVX2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo666x_AVX2(src1, dst[2], dst[3]); } else { - ColorspaceConvert555XTo888X_AVX2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo888X_AVX2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo888x_AVX2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo888x_AVX2(src1, dst[2], dst[3]); } const v256u32 alphaBits = _mm256_set1_epi32((OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F000000 : 0xFF000000); @@ -1544,13 +1544,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessDownMask16(GPUEngineCompositorI if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo666X_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo666x_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo666x_AVX2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555XTo888X_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo888X_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo888x_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo888x_AVX2(src1, src32[2], src32[3]); } passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8); @@ -1674,13 +1674,13 @@ FORCEINLINE void PixelOperation_AVX2::_unknownEffectMask16(GPUEngineCompositorIn } else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_AVX2(src0, tmpSrc[0], tmpSrc[1]); - ColorspaceConvert555XTo666X_AVX2(src1, tmpSrc[2], tmpSrc[3]); + ColorspaceConvert555xTo666x_AVX2(src0, tmpSrc[0], tmpSrc[1]); + ColorspaceConvert555xTo666x_AVX2(src1, tmpSrc[2], tmpSrc[3]); } else { - ColorspaceConvert555XTo888X_AVX2(src0, tmpSrc[0], tmpSrc[1]); - ColorspaceConvert555XTo888X_AVX2(src1, tmpSrc[2], tmpSrc[3]); + ColorspaceConvert555xTo888x_AVX2(src0, tmpSrc[0], tmpSrc[1]); + ColorspaceConvert555xTo888x_AVX2(src1, tmpSrc[2], tmpSrc[3]); } switch (compInfo.renderState.colorEffect) diff --git a/desmume/src/GPU_Operations_SSE2.cpp b/desmume/src/GPU_Operations_SSE2.cpp index 3aca64de4..5621fb196 100644 --- a/desmume/src/GPU_Operations_SSE2.cpp +++ b/desmume/src/GPU_Operations_SSE2.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2021-2023 DeSmuME team + Copyright (C) 2021-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -922,13 +922,13 @@ FORCEINLINE void PixelOperation_SSE2::_copy16(GPUEngineCompositorInfo &compInfo, if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555To6665Opaque_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555To6665Opaque_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo6665Opaque_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo6665Opaque_SSE2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555To8888Opaque_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555To8888Opaque_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo8888Opaque_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo8888Opaque_SSE2(src1, src32[2], src32[3]); } _mm_store_si128( (v128u32 *)compInfo.target.lineColor32 + 0, src32[0] ); @@ -999,13 +999,13 @@ FORCEINLINE void PixelOperation_SSE2::_copyMask16(GPUEngineCompositorInfo &compI if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555To6665Opaque_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555To6665Opaque_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo6665Opaque_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo6665Opaque_SSE2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555To8888Opaque_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555To8888Opaque_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo8888Opaque_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo8888Opaque_SSE2(src1, src32[2], src32[3]); } const v128u32 dst32[4] = { @@ -1104,13 +1104,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessUp16(GPUEngineCompositorInfo &c if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_SSE2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo666X_SSE2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo666x_SSE2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo666x_SSE2(src1, dst[2], dst[3]); } else { - ColorspaceConvert555XTo888X_SSE2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo888X_SSE2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo888x_SSE2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo888x_SSE2(src1, dst[2], dst[3]); } const v128u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm_set1_epi32(0x1F000000) : _mm_set1_epi32(0xFF000000); @@ -1182,13 +1182,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessUpMask16(GPUEngineCompositorInf if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo666X_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo666x_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo666x_SSE2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555XTo888X_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo888X_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo888x_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo888x_SSE2(src1, src32[2], src32[3]); } const v128u32 dst32[4] = { @@ -1275,13 +1275,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessDown16(GPUEngineCompositorInfo if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_SSE2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo666X_SSE2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo666x_SSE2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo666x_SSE2(src1, dst[2], dst[3]); } else { - ColorspaceConvert555XTo888X_SSE2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo888X_SSE2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo888x_SSE2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo888x_SSE2(src1, dst[2], dst[3]); } const v128u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm_set1_epi32(0x1F000000) : _mm_set1_epi32(0xFF000000); @@ -1353,13 +1353,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessDownMask16(GPUEngineCompositorI if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo666X_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo666x_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo666x_SSE2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555XTo888X_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo888X_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo888x_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo888x_SSE2(src1, src32[2], src32[3]); } const v128u32 dst32[4] = { @@ -1494,13 +1494,13 @@ FORCEINLINE void PixelOperation_SSE2::_unknownEffectMask16(GPUEngineCompositorIn } else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_SSE2(src0, tmpSrc[0], tmpSrc[1]); - ColorspaceConvert555XTo666X_SSE2(src1, tmpSrc[2], tmpSrc[3]); + ColorspaceConvert555xTo666x_SSE2(src0, tmpSrc[0], tmpSrc[1]); + ColorspaceConvert555xTo666x_SSE2(src1, tmpSrc[2], tmpSrc[3]); } else { - ColorspaceConvert555XTo888X_SSE2(src0, tmpSrc[0], tmpSrc[1]); - ColorspaceConvert555XTo888X_SSE2(src1, tmpSrc[2], tmpSrc[3]); + ColorspaceConvert555xTo888x_SSE2(src0, tmpSrc[0], tmpSrc[1]); + ColorspaceConvert555xTo888x_SSE2(src1, tmpSrc[2], tmpSrc[3]); } switch (compInfo.renderState.colorEffect) diff --git a/desmume/src/frontend/cocoa/ClientAVCaptureObject.cpp b/desmume/src/frontend/cocoa/ClientAVCaptureObject.cpp index 5b53b8ef3..18a9a33f8 100644 --- a/desmume/src/frontend/cocoa/ClientAVCaptureObject.cpp +++ b/desmume/src/frontend/cocoa/ClientAVCaptureObject.cpp @@ -484,7 +484,7 @@ void ClientAVCaptureObject::ConvertVideoSlice555Xto888(const VideoConvertParam & const u16 *__restrict src = (const u16 *__restrict)param.src; u8 *__restrict dst = param.dst; - ColorspaceConvertBuffer555XTo888(src, dst, param.frameWidth * lineCount); + ColorspaceConvertBuffer555xTo888(src, dst, param.frameWidth * lineCount); } //converts 32bpp to 24bpp and flips @@ -494,7 +494,7 @@ void ClientAVCaptureObject::ConvertVideoSlice888Xto888(const VideoConvertParam & const u32 *__restrict src = (const u32 *__restrict)param.src; u8 *__restrict dst = param.dst; - ColorspaceConvertBuffer888XTo888(src, dst, param.frameWidth * lineCount); + ColorspaceConvertBuffer888xTo888(src, dst, param.frameWidth * lineCount); } void ClientAVCaptureObject::CaptureVideoFrame(const void *srcVideoFrame, const size_t inFrameWidth, const size_t inFrameHeight, const NDSColorFormat colorFormat) diff --git a/desmume/src/frontend/cocoa/OGLDisplayOutput.cpp b/desmume/src/frontend/cocoa/OGLDisplayOutput.cpp old mode 100755 new mode 100644 index e2734fd79..003fc490b --- a/desmume/src/frontend/cocoa/OGLDisplayOutput.cpp +++ b/desmume/src/frontend/cocoa/OGLDisplayOutput.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2023 DeSmuME team + Copyright (C) 2014-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -4719,7 +4719,7 @@ void OGLClientSharedData::FetchNativeDisplayToSrcClone(const NDSDisplayInfo *dis return; } - ColorspaceConvertBuffer555To8888Opaque(displayInfoList[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555xTo8888Opaque(displayInfoList[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); this->_srcCloneNeedsUpdate[displayID][bufferIndex] = false; if (needsLock) @@ -4744,7 +4744,7 @@ void OGLClientSharedData::FetchCustomDisplayToSrcClone(const NDSDisplayInfo *dis return; } - ColorspaceConvertBuffer888XTo8888Opaque((u32 *)displayInfoList[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer888xTo8888Opaque((u32 *)displayInfoList[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); this->_srcCloneNeedsUpdate[displayID][bufferIndex] = false; if (needsLock) diff --git a/desmume/src/frontend/cocoa/cocoa_rom.mm b/desmume/src/frontend/cocoa/cocoa_rom.mm index be5091bf2..c9094bca7 100644 --- a/desmume/src/frontend/cocoa/cocoa_rom.mm +++ b/desmume/src/frontend/cocoa/cocoa_rom.mm @@ -1,6 +1,6 @@ /* Copyright (C) 2011 Roger Manuel - Copyright (C) 2011-2022 DeSmuME team + Copyright (C) 2011-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -674,7 +674,7 @@ void RomIconToRGBA8888(uint32_t *bitmapData) // The first entry always represents the alpha, so just set it to 0. const uint16_t *clut4 = (uint16_t *)ndsRomBanner.palette; CACHE_ALIGN uint32_t clut32[16]; - ColorspaceConvertBuffer555To8888Opaque(clut4, clut32, 16); + ColorspaceConvertBuffer555xTo8888Opaque(clut4, clut32, 16); clut32[0] = 0x00000000; // Load the image from the icon pixel data. diff --git a/desmume/src/frontend/cocoa/cocoa_videofilter.mm b/desmume/src/frontend/cocoa/cocoa_videofilter.mm index 82a16a6f5..0ba9a7293 100644 --- a/desmume/src/frontend/cocoa/cocoa_videofilter.mm +++ b/desmume/src/frontend/cocoa/cocoa_videofilter.mm @@ -1,6 +1,6 @@ /* Copyright (C) 2011 Roger Manuel - Copyright (C) 2013 DeSmuME team + Copyright (C) 2013-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -139,7 +139,7 @@ } uint32_t *bitmapData = (uint32_t *)[imageRep bitmapData]; - ColorspaceConvertBuffer888XTo8888Opaque((const uint32_t *)[self runFilter], bitmapData, w * h); + ColorspaceConvertBuffer888xTo8888Opaque((const uint32_t *)[self runFilter], bitmapData, w * h); #ifdef MSB_FIRST for (size_t i = 0; i < w * h; i++) diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm index e83459aff..7a9942393 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm @@ -1,5 +1,5 @@ /* - Copyright (C) 2017-2023 DeSmuME team + Copyright (C) 2017-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2556,7 +2556,7 @@ void MacMetalFetchObject::_FetchNativeDisplayByID(const NDSDisplayID displayID, GPU->PostprocessDisplay(displayID, this->_fetchDisplayInfo[bufferIndex]); pthread_rwlock_wrlock(&this->_srcCloneRWLock[displayID][bufferIndex]); - ColorspaceConvertBuffer555To8888Opaque(this->_fetchDisplayInfo[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555xTo8888Opaque(this->_fetchDisplayInfo[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); pthread_rwlock_unlock(&this->_srcCloneRWLock[displayID][bufferIndex]); } @@ -2570,7 +2570,7 @@ void MacMetalFetchObject::_FetchCustomDisplayByID(const NDSDisplayID displayID, GPU->PostprocessDisplay(displayID, this->_fetchDisplayInfo[bufferIndex]); pthread_rwlock_wrlock(&this->_srcCloneRWLock[displayID][bufferIndex]); - ColorspaceConvertBuffer888XTo8888Opaque((u32 *)this->_fetchDisplayInfo[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer888xTo8888Opaque((u32 *)this->_fetchDisplayInfo[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); pthread_rwlock_unlock(&this->_srcCloneRWLock[displayID][bufferIndex]); } diff --git a/desmume/src/frontend/posix/gtk/main.cpp b/desmume/src/frontend/posix/gtk/main.cpp index a9ad5b3aa..06f55c656 100644 --- a/desmume/src/frontend/posix/gtk/main.cpp +++ b/desmume/src/frontend/posix/gtk/main.cpp @@ -1380,7 +1380,7 @@ static int ConfigureDrawingArea(GtkWidget *widget, GdkEventConfigure *event, gpo static inline void gpu_screen_to_rgb(u32* dst) { - ColorspaceConvertBuffer555To8888Opaque(GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16, + ColorspaceConvertBuffer555xTo8888Opaque(GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16, dst, real_framebuffer_width * real_framebuffer_height * 2); } @@ -1591,7 +1591,7 @@ static gboolean ExposeDrawingArea (GtkWidget *widget, GdkEventExpose *event, gpo } static void RedrawScreen() { - ColorspaceConvertBuffer555To8888Opaque( + ColorspaceConvertBuffer555xTo8888Opaque( GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16, (uint32_t *)video->GetSrcBufferPtr(), real_framebuffer_width * real_framebuffer_height * 2); #ifdef HAVE_LIBAGG diff --git a/desmume/src/frontend/posix/gtk2/main.cpp b/desmume/src/frontend/posix/gtk2/main.cpp index 70946a8e7..cee54cd60 100644 --- a/desmume/src/frontend/posix/gtk2/main.cpp +++ b/desmume/src/frontend/posix/gtk2/main.cpp @@ -1666,7 +1666,7 @@ static int ConfigureDrawingArea(GtkWidget *widget, GdkEventConfigure *event, gpo static inline void gpu_screen_to_rgb(u32* dst) { - ColorspaceConvertBuffer555To8888Opaque(GPU->GetDisplayInfo().masterNativeBuffer16, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); + ColorspaceConvertBuffer555xTo8888Opaque(GPU->GetDisplayInfo().masterNativeBuffer16, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); } static inline void drawScreen(cairo_t* cr, u32* buf, gint w, gint h) { @@ -1791,7 +1791,7 @@ static gboolean ExposeDrawingArea (GtkWidget *widget, GdkEventExpose *event, gpo } static void RedrawScreen() { - ColorspaceConvertBuffer555To8888Opaque(GPU->GetDisplayInfo().masterNativeBuffer16, (uint32_t *)video->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); + ColorspaceConvertBuffer555xTo8888Opaque(GPU->GetDisplayInfo().masterNativeBuffer16, (uint32_t *)video->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); #ifdef HAVE_LIBAGG aggDraw.hud->attach((u8*)video->GetSrcBufferPtr(), 256, 384, 1024); osd->update(); diff --git a/desmume/src/frontend/windows/aviout.cpp b/desmume/src/frontend/windows/aviout.cpp index 3a5aed7ec..39be5c383 100644 --- a/desmume/src/frontend/windows/aviout.cpp +++ b/desmume/src/frontend/windows/aviout.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2006-2018 DeSmuME team + Copyright (C) 2006-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -736,7 +736,7 @@ void NDSCaptureObject::ConvertVideoSlice555Xto888(const VideoConvertParam ¶m for (size_t y = param.firstLineIndex; y <= param.lastLineIndex; y++) { - ColorspaceConvertBuffer555XTo888(src, dst, param.frameWidth); + ColorspaceConvertBuffer555xTo888(src, dst, param.frameWidth); src += param.frameWidth; dst -= param.frameWidth * 3; } @@ -750,7 +750,7 @@ void NDSCaptureObject::ConvertVideoSlice888Xto888(const VideoConvertParam ¶m for (size_t y = param.firstLineIndex; y <= param.lastLineIndex; y++) { - ColorspaceConvertBuffer888XTo888(src, dst, param.frameWidth); + ColorspaceConvertBuffer888xTo888(src, dst, param.frameWidth); src += param.frameWidth; dst -= param.frameWidth * 3; } diff --git a/desmume/src/frontend/windows/display.cpp b/desmume/src/frontend/windows/display.cpp index 143136083..4e4d8b329 100644 --- a/desmume/src/frontend/windows/display.cpp +++ b/desmume/src/frontend/windows/display.cpp @@ -1,5 +1,5 @@ /* -Copyright (C) 2018 DeSmuME team +Copyright (C) 2018-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -683,9 +683,9 @@ void DoDisplay() //we have to do a copy here because we're about to draw the OSD onto it. bummer. if (gpu_bpp == 15) - ColorspaceConvertBuffer555To8888Opaque((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / 2); + ColorspaceConvertBuffer555xTo8888Opaque((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / 2); else - ColorspaceConvertBuffer888XTo8888Opaque((u32*)video.srcBuffer, video.buffer, video.srcBufferSize / 4); + ColorspaceConvertBuffer888xTo8888Opaque((u32*)video.srcBuffer, video.buffer, video.srcBufferSize / 4); //some games use the backlight for fading effects const size_t pixCount = video.prefilterWidth * video.prefilterHeight / 2; diff --git a/desmume/src/frontend/windows/hotkey.cpp b/desmume/src/frontend/windows/hotkey.cpp index 88e6be856..fb37bd2c8 100644 --- a/desmume/src/frontend/windows/hotkey.cpp +++ b/desmume/src/frontend/windows/hotkey.cpp @@ -3,7 +3,7 @@ licensed under the terms supplied at the end of this file (for the terms are very long!) Differences from that baseline version are: - Copyright (C) 2009-2019 DeSmuME team + Copyright (C) 2009-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -174,7 +174,7 @@ static void DoScreenshot(const char* fname) else { u32* swapbuf = (u32*)malloc_alignedCacheLine(dispInfo.customWidth * dispInfo.customHeight * 2 * 4); - ColorspaceConvertBuffer888XTo8888Opaque((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2); + ColorspaceConvertBuffer888xTo8888Opaque((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2); NDS_WritePNG_32bppBuffer(dispInfo.customWidth, dispInfo.customHeight*2, swapbuf, fname); free_aligned(swapbuf); } @@ -189,7 +189,7 @@ static void DoScreenshot(const char* fname) else { u32* swapbuf = (u32*)malloc_alignedCacheLine(dispInfo.customWidth * dispInfo.customHeight * 2 * 4); - ColorspaceConvertBuffer888XTo8888Opaque((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2); + ColorspaceConvertBuffer888xTo8888Opaque((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2); NDS_WriteBMP_32bppBuffer(dispInfo.customWidth, dispInfo.customHeight *2, swapbuf, fname); free_aligned(swapbuf); } diff --git a/desmume/src/frontend/windows/main.cpp b/desmume/src/frontend/windows/main.cpp index 7766d3a47..796c3349b 100644 --- a/desmume/src/frontend/windows/main.cpp +++ b/desmume/src/frontend/windows/main.cpp @@ -3441,7 +3441,7 @@ void ScreenshotToClipboard(bool extraInfo) else { u32* swapbuf = (u32*)malloc_alignedPage(width*height * 4); - ColorspaceConvertBuffer888XTo8888Opaque((const u32*)dispInfo.masterCustomBuffer, swapbuf, width * height); + ColorspaceConvertBuffer888xTo8888Opaque((const u32*)dispInfo.masterCustomBuffer, swapbuf, width * height); SetDIBitsToDevice(hMemDC, 0, 0, width, height, 0, 0, 0, height, swapbuf, (BITMAPINFO*)&bmi, DIB_RGB_COLORS); diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index 64684b26a..fa7458cf1 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2009-2023 DeSmuME team + Copyright (C) 2009-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2032,7 +2032,7 @@ Render3DError SoftRasterizerRenderer::BeginRender(const GFX3D_State &renderState } // Convert the toon table colors - ColorspaceConvertBuffer555To6665Opaque(renderState.toonTable16, (u32 *)this->toonColor32LUT, 32); + ColorspaceConvertBuffer555xTo6665Opaque(renderState.toonTable16, (u32 *)this->toonColor32LUT, 32); if (this->_enableEdgeMark) { diff --git a/desmume/src/texcache.cpp b/desmume/src/texcache.cpp index faafd5574..692cd5504 100644 --- a/desmume/src/texcache.cpp +++ b/desmume/src/texcache.cpp @@ -1,7 +1,7 @@ /* Copyright (C) 2006 yopyop Copyright (C) 2006-2007 shash - Copyright (C) 2008-2023 DeSmuME team + Copyright (C) 2008-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -867,13 +867,13 @@ void __NDSTextureUnpackI2_AVX2(const size_t texelCount, const u8 *__restrict src if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo6665Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo6665Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); } else { - ColorspaceConvert555To8888Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo8888Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo8888Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); } // Set converted colors to 0 if the palette index is 0. @@ -923,13 +923,13 @@ void __NDSTextureUnpackI2_SSSE3(const size_t texelCount, const u8 *__restrict sr if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); } else { - ColorspaceConvert555To8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); } // Set converted colors to 0 if the palette index is 0. @@ -977,13 +977,13 @@ void __NDSTextureUnpackI2_NEON(const size_t texelCount, const u8 *__restrict src if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To6665Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555xTo6665Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555xTo6665Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); } else { - ColorspaceConvert555To8888Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To8888Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555xTo8888Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555xTo8888Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); } // Set converted colors to 0 if the palette index is 0. @@ -1028,13 +1028,13 @@ void __NDSTextureUnpackI2_AltiVec(const size_t texelCount, const u8 *__restrict if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To6665Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); + ColorspaceConvert555xTo6665Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); + ColorspaceConvert555xTo6665Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); } else { - ColorspaceConvert555To8888Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To8888Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); + ColorspaceConvert555xTo8888Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); + ColorspaceConvert555xTo8888Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); } // Set converted colors to 0 if the palette index is 0. @@ -1146,13 +1146,13 @@ void __NDSTextureUnpackI4_AVX2(const size_t texelCount, const u8 *__restrict src if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo6665Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo6665Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); } else { - ColorspaceConvert555To8888Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo8888Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo8888Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); } // Set converted colors to 0 if the palette index is 0. @@ -1208,13 +1208,13 @@ void __NDSTextureUnpackI4_SSSE3(const size_t texelCount, const u8 *__restrict sr if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); } else { - ColorspaceConvert555To8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); } // Set converted colors to 0 if the palette index is 0. @@ -1261,13 +1261,13 @@ void __NDSTextureUnpackI4_NEON(const size_t texelCount, const u8 *__restrict src if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To6665Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555xTo6665Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555xTo6665Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); } else { - ColorspaceConvert555To8888Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To8888Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555xTo8888Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555xTo8888Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); } // Set converted colors to 0 if the palette index is 0. @@ -1312,13 +1312,13 @@ void __NDSTextureUnpackI4_AltiVec(const size_t texelCount, const u8 *__restrict if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To6665Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); + ColorspaceConvert555xTo6665Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); + ColorspaceConvert555xTo6665Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); } else { - ColorspaceConvert555To8888Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To8888Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); + ColorspaceConvert555xTo8888Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); + ColorspaceConvert555xTo8888Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); } // Set converted colors to 0 if the palette index is 0. @@ -1434,13 +1434,13 @@ void __NDSTextureUnpackA3I5_NEON(const size_t texelCount, const u8 *__restrict s if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To6665_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555aTo6665_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555aTo6665_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); } else { - ColorspaceConvert555To8888_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To8888_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555aTo8888_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555aTo8888_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); } vst1q_u32_x4(dstBuffer + i, convertedColor); @@ -1486,13 +1486,13 @@ void __NDSTextureUnpackA3I5_AltiVec(const size_t texelCount, const u8 *__restric if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To6665_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); + ColorspaceConvert555aTo6665_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); + ColorspaceConvert555aTo6665_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); } else { - ColorspaceConvert555To8888_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To8888_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); + ColorspaceConvert555aTo8888_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); + ColorspaceConvert555aTo8888_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); } vec_st(convertedColor[0], 0, dstBuffer); @@ -1566,8 +1566,8 @@ void __NDSTextureUnpackA5I3_AVX2(const size_t texelCount, const u8 *__restrict s const v256u16 alphaLo = _mm256_unpacklo_epi8(_mm256_setzero_si256(), alpha); const v256u16 alphaHi = _mm256_unpackhi_epi8(_mm256_setzero_si256(), alpha); - ColorspaceConvert555To6665_AVX2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665_AVX2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); + ColorspaceConvert555aTo6665_AVX2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); + ColorspaceConvert555aTo6665_AVX2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); } else { @@ -1577,8 +1577,8 @@ void __NDSTextureUnpackA5I3_AVX2(const size_t texelCount, const u8 *__restrict s const v256u16 alphaLo = _mm256_unpacklo_epi8(_mm256_setzero_si256(), alpha); const v256u16 alphaHi = _mm256_unpackhi_epi8(_mm256_setzero_si256(), alpha); - ColorspaceConvert555To8888_AVX2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888_AVX2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); + ColorspaceConvert555aTo8888_AVX2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); + ColorspaceConvert555aTo8888_AVX2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); } _mm256_store_si256((v256u32 *)dstBuffer + 0, convertedColor[0]); @@ -1615,8 +1615,8 @@ void __NDSTextureUnpackA5I3_SSSE3(const size_t texelCount, const u8 *__restrict const v128u16 alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); const v128u16 alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); - ColorspaceConvert555To6665_SSE2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665_SSE2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); + ColorspaceConvert555aTo6665_SSE2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); + ColorspaceConvert555aTo6665_SSE2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); } else { @@ -1624,8 +1624,8 @@ void __NDSTextureUnpackA5I3_SSSE3(const size_t texelCount, const u8 *__restrict const v128u16 alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); const v128u16 alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); - ColorspaceConvert555To8888_SSE2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888_SSE2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); + ColorspaceConvert555aTo8888_SSE2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); + ColorspaceConvert555aTo8888_SSE2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); } _mm_store_si128((v128u32 *)(dstBuffer + i) + 0, convertedColor[0]); @@ -1661,8 +1661,8 @@ void __NDSTextureUnpackA5I3_NEON(const size_t texelCount, const u8 *__restrict s const v128u16 alphaLo = vreinterpretq_u16_u8( vzip1q_u8(vdupq_n_u8(0), alpha) ); const v128u16 alphaHi = vreinterpretq_u16_u8( vzip2q_u8(vdupq_n_u8(0), alpha) ); - ColorspaceConvert555To6665_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To6665_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555aTo6665_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555aTo6665_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); } else { @@ -1670,8 +1670,8 @@ void __NDSTextureUnpackA5I3_NEON(const size_t texelCount, const u8 *__restrict s const v128u16 alphaLo = vreinterpretq_u16_u8( vzip1q_u8(vdupq_n_u8(0), alpha) ); const v128u16 alphaHi = vreinterpretq_u16_u8( vzip2q_u8(vdupq_n_u8(0), alpha) ); - ColorspaceConvert555To8888_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To8888_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555aTo8888_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555aTo8888_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); } vst1q_u32_x4(dstBuffer + i, convertedColor); @@ -1707,8 +1707,8 @@ void __NDSTextureUnpackA5I3_AltiVec(const size_t texelCount, const u8 *__restric const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) ); const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) ); - ColorspaceConvert555To6665_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To6665_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); + ColorspaceConvert555aTo6665_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); + ColorspaceConvert555aTo6665_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); } else { @@ -1716,8 +1716,8 @@ void __NDSTextureUnpackA5I3_AltiVec(const size_t texelCount, const u8 *__restric const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) ); const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) ); - ColorspaceConvert555To8888_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To8888_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); + ColorspaceConvert555aTo8888_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); + ColorspaceConvert555aTo8888_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); } vec_st(convertedColor[0], 0, dstBuffer); @@ -1900,11 +1900,11 @@ void __NDSTextureUnpackDirect16Bit_AVX2(const size_t texelCount, const u16 *__re if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_AVX2(c, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo6665Opaque_AVX2(c, convertedColor[0], convertedColor[1]); } else { - ColorspaceConvert555To8888Opaque_AVX2(c, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo8888Opaque_AVX2(c, convertedColor[0], convertedColor[1]); } v256u16 alpha = _mm256_cmpeq_epi16(_mm256_srli_epi16(c, 15), _mm256_set1_epi16(1)); @@ -1930,11 +1930,11 @@ void __NDSTextureUnpackDirect16Bit_SSE2(const size_t texelCount, const u16 *__re if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_SSE2(c, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo6665Opaque_SSE2(c, convertedColor[0], convertedColor[1]); } else { - ColorspaceConvert555To8888Opaque_SSE2(c, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo8888Opaque_SSE2(c, convertedColor[0], convertedColor[1]); } const v128u16 alpha = _mm_cmpeq_epi16(_mm_srli_epi16(c, 15), _mm_set1_epi16(1)); @@ -1959,11 +1959,11 @@ void __NDSTextureUnpackDirect16Bit_NEON(const size_t texelCount, const u16 *__re if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_NEON(c, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555xTo6665Opaque_NEON(c, convertedColor.val[0], convertedColor.val[1]); } else { - ColorspaceConvert555To8888Opaque_NEON(c, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555xTo8888Opaque_NEON(c, convertedColor.val[0], convertedColor.val[1]); } const v128u16 alpha = vceqq_u16(vshrq_n_u16(c,15), vdupq_n_u16(1)); @@ -1987,11 +1987,11 @@ void __NDSTextureUnpackDirect16Bit_AltiVec(const size_t texelCount, const u16 *_ if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_AltiVec(c, convertedColor[1], convertedColor[0]); + ColorspaceConvert555xTo6665Opaque_AltiVec(c, convertedColor[1], convertedColor[0]); } else { - ColorspaceConvert555To8888Opaque_AltiVec(c, convertedColor[1], convertedColor[0]); + ColorspaceConvert555xTo8888Opaque_AltiVec(c, convertedColor[1], convertedColor[0]); } const v128u16 alpha = vec_and(c, ((v128u16){0x0080,0x0080,0x0080,0x0080,0x0080,0x0080,0x0080,0x0080})); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler.cpp index 9704845be..c5034f845 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2023 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -187,7 +187,7 @@ void ColorspaceHandlerInit() } template -void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) +void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) { size_t i = 0; @@ -198,22 +198,22 @@ void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__re { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo8888Opaque_SwapRB(src, dst, pixCountVector); } } else { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555To8888Opaque_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo8888Opaque_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555To8888Opaque(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo8888Opaque(src, dst, pixCountVector); } } @@ -243,7 +243,7 @@ void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__re } template -void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) +void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) { size_t i = 0; @@ -254,22 +254,22 @@ void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__re { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo6665Opaque_SwapRB(src, dst, pixCountVector); } } else { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555To6665Opaque_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo6665Opaque_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555To6665Opaque(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo6665Opaque(src, dst, pixCountVector); } } @@ -298,6 +298,119 @@ void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__re } } + +template +void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) +{ + size_t i = 0; + +#ifdef USEMANUALVECTORIZATION + const size_t pixCountVector = pixCount - (pixCount % (VECTORSIZE / sizeof(u16))); + + if (SWAP_RB) + { + if (IS_UNALIGNED) + { + i = csh.ConvertBuffer5551To8888_SwapRB_IsUnaligned(src, dst, pixCountVector); + } + else + { + i = csh.ConvertBuffer5551To8888_SwapRB(src, dst, pixCountVector); + } + } + else + { + if (IS_UNALIGNED) + { + i = csh.ConvertBuffer5551To8888_IsUnaligned(src, dst, pixCountVector); + } + else + { + i = csh.ConvertBuffer5551To8888(src, dst, pixCountVector); + } + } + +#pragma LOOPVECTORIZE_DISABLE +#endif // USEMANUALVECTORIZATION + for (; i < pixCount; i++) + { + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert5551To8888(src[i]); + break; + + case BESwapIn: + dst[i] = ColorspaceConvert5551To8888(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapOut: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888(src[i]) ); + break; + + case BESwapInOut: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888(LE_TO_LOCAL_16(src[i])) ); + break; + } + } +} + +template +void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) +{ + size_t i = 0; + +#ifdef USEMANUALVECTORIZATION + const size_t pixCountVector = pixCount - (pixCount % (VECTORSIZE / sizeof(u16))); + + if (SWAP_RB) + { + if (IS_UNALIGNED) + { + i = csh.ConvertBuffer5551To6665_SwapRB_IsUnaligned(src, dst, pixCountVector); + } + else + { + i = csh.ConvertBuffer5551To6665_SwapRB(src, dst, pixCountVector); + } + } + else + { + if (IS_UNALIGNED) + { + i = csh.ConvertBuffer5551To6665_IsUnaligned(src, dst, pixCountVector); + } + else + { + i = csh.ConvertBuffer5551To6665(src, dst, pixCountVector); + } + } + +#pragma LOOPVECTORIZE_DISABLE +#endif // USEMANUALVECTORIZATION + for (; i < pixCount; i++) + { + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert5551To6665(src[i]); + break; + + case BESwapIn: + dst[i] = ColorspaceConvert5551To6665(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapOut: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665(src[i]) ); + break; + + case BESwapInOut: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665(LE_TO_LOCAL_16(src[i])) ); + break; + } + } +} + template void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) { @@ -455,7 +568,7 @@ void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restric } template -void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) +void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) { size_t i = 0; @@ -466,22 +579,22 @@ void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pi { if (IS_UNALIGNED) { - i = csh.ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer888XTo8888Opaque_SwapRB(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo8888Opaque_SwapRB(src, dst, pixCountVector); } } else { if (IS_UNALIGNED) { - i = csh.ConvertBuffer888XTo8888Opaque_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo8888Opaque_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer888XTo8888Opaque(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo8888Opaque(src, dst, pixCountVector); } } @@ -494,7 +607,7 @@ void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pi } template -void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) +void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) { size_t i = 0; @@ -505,22 +618,22 @@ void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555XTo888_SwapRB_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo888_SwapRB_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555XTo888_SwapRB(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo888_SwapRB(src, dst, pixCountVector); } } else { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555XTo888_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo888_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555XTo888(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo888(src, dst, pixCountVector); } } @@ -533,7 +646,7 @@ void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict } template -void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) +void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) { size_t i = 0; @@ -544,22 +657,22 @@ void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict { if (IS_UNALIGNED) { - i = csh.ConvertBuffer888XTo888_SwapRB_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo888_SwapRB_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer888XTo888_SwapRB(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo888_SwapRB(src, dst, pixCountVector); } } else { if (IS_UNALIGNED) { - i = csh.ConvertBuffer888XTo888_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo888_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer888XTo888(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo888(src, dst, pixCountVector); } } @@ -811,7 +924,7 @@ void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensi } template -size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -841,7 +954,7 @@ size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src } template -size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -871,19 +984,19 @@ size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restr } template -size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return this->ColorspaceHandler::ConvertBuffer555To8888Opaque(src, dst, pixCount); + return this->ColorspaceHandler::ConvertBuffer555xTo8888Opaque(src, dst, pixCount); } template -size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return this->ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCount); + return this->ColorspaceHandler::ConvertBuffer555xTo8888Opaque_SwapRB(src, dst, pixCount); } template -size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -913,7 +1026,7 @@ size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src } template -size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -943,15 +1056,159 @@ size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restr } template -size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return this->ColorspaceHandler::ConvertBuffer555To6665Opaque(src, dst, pixCount); + return this->ColorspaceHandler::ConvertBuffer555xTo6665Opaque(src, dst, pixCount); } template -size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return this->ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCount); + return this->ColorspaceHandler::ConvertBuffer555xTo6665Opaque_SwapRB(src, dst, pixCount); +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + size_t i = 0; + + for (; i < pixCount; i++) + { + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert5551To8888(src[i]); + break; + + case BESwapSrc: + dst[i] = ColorspaceConvert5551To8888(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888(src[i]) ); + break; + + case BESwapSrcDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888(LE_TO_LOCAL_16(src[i])) ); + break; + } + } + + return i; +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + size_t i = 0; + + for (; i < pixCount; i++) + { + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert5551To8888(src[i]); + break; + + case BESwapSrc: + dst[i] = ColorspaceConvert5551To8888(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888(src[i]) ); + break; + + case BESwapSrcDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888(LE_TO_LOCAL_16(src[i])) ); + break; + } + } + + return i; +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return this->ColorspaceHandler::ConvertBuffer5551To8888(src, dst, pixCount); +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return this->ColorspaceHandler::ConvertBuffer5551To8888_SwapRB(src, dst, pixCount); +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + size_t i = 0; + + for (; i < pixCount; i++) + { + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert5551To6665(src[i]); + break; + + case BESwapSrc: + dst[i] = ColorspaceConvert5551To6665(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665(src[i]) ); + break; + + case BESwapSrcDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665(LE_TO_LOCAL_16(src[i])) ); + break; + } + } + + return i; +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + size_t i = 0; + + for (; i < pixCount; i++) + { + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert5551To6665(src[i]); + break; + + case BESwapSrc: + dst[i] = ColorspaceConvert5551To6665(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665(src[i]) ); + break; + + case BESwapSrcDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665(LE_TO_LOCAL_16(src[i])) ); + break; + } + } + + return i; +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return this->ColorspaceHandler::ConvertBuffer5551To6665(src, dst, pixCount); +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return this->ColorspaceHandler::ConvertBuffer5551To6665_SwapRB(src, dst, pixCount); } size_t ColorspaceHandler::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const @@ -1090,7 +1347,7 @@ size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 * return this->ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(src, dst, pixCount); } -size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const { size_t i = 0; @@ -1102,7 +1359,7 @@ size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst return i; } -size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const { size_t i = 0; @@ -1114,17 +1371,17 @@ size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u return i; } -size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return this->ConvertBuffer888XTo8888Opaque(src, dst, pixCount); + return this->ConvertBuffer888xTo8888Opaque(src, dst, pixCount); } -size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return this->ConvertBuffer888XTo8888Opaque_SwapRB(src, dst, pixCount); + return this->ConvertBuffer888xTo8888Opaque_SwapRB(src, dst, pixCount); } -size_t ColorspaceHandler::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -1136,7 +1393,7 @@ size_t ColorspaceHandler::ConvertBuffer555XTo888(const u16 *__restrict src, u8 * return i; } -size_t ColorspaceHandler::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -1148,17 +1405,17 @@ size_t ColorspaceHandler::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict sr return i; } -size_t ColorspaceHandler::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return this->ConvertBuffer555XTo888(src, dst, pixCount); + return this->ConvertBuffer555xTo888(src, dst, pixCount); } -size_t ColorspaceHandler::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return this->ConvertBuffer555XTo888_SwapRB(src, dst, pixCount); + return this->ConvertBuffer555xTo888_SwapRB(src, dst, pixCount); } -size_t ColorspaceHandler::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -1170,7 +1427,7 @@ size_t ColorspaceHandler::ConvertBuffer888XTo888(const u32 *__restrict src, u8 * return i; } -size_t ColorspaceHandler::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -1182,14 +1439,14 @@ size_t ColorspaceHandler::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict sr return i; } -size_t ColorspaceHandler::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return this->ConvertBuffer888XTo888(src, dst, pixCount); + return this->ConvertBuffer888xTo888(src, dst, pixCount); } -size_t ColorspaceHandler::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return this->ConvertBuffer888XTo888_SwapRB(src, dst, pixCount); + return this->ConvertBuffer888xTo888_SwapRB(src, dst, pixCount); } size_t ColorspaceHandler::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const @@ -1396,39 +1653,73 @@ size_t ColorspaceHandler::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, return this->ApplyIntensityToBuffer32_SwapRB(dst, pixCount, intensity); } -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); + +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); + +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount); @@ -1450,20 +1741,20 @@ template void ColorspaceConvertBuffer6665To5551(const u32 *__restri template void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); -template void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); template void ColorspaceCopyBuffer16(const u16 *src, u16 *dst, size_t pixCount); template void ColorspaceCopyBuffer16(const u16 *src, u16 *dst, size_t pixCount); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler.h b/desmume/src/utils/colorspacehandler/colorspacehandler.h index 48bae72e8..69bd913d5 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2023 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -126,6 +126,26 @@ FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src) return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF); } +template +FORCEINLINE u32 ColorspaceConvert5551To8888(const u16 src) +{ + Color4u8 outColor; + outColor.value = (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF); + outColor.a = (src & 0x8000) ? 0xFF : 0x00; + + return outColor.value; +} + +template +FORCEINLINE u32 ColorspaceConvert5551To6665(const u16 src) +{ + Color4u8 outColor; + outColor.value = (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF); + outColor.a = (src & 0x8000) ? 0x1F : 0x00; + + return outColor.value; +} + template FORCEINLINE u32 ColorspaceConvert8888To6665(Color4u8 srcColor) { @@ -331,16 +351,18 @@ FORCEINLINE u32 ColorspaceApplyIntensity32(u32 srcColor, float intensity) return ColorspaceApplyIntensity32(srcColorComponent); } -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); -template void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); template void ColorspaceCopyBuffer16(const u16 *src, u16 *dst, size_t pixCount); template void ColorspaceCopyBuffer32(const u32 *src, u32 *dst, size_t pixCount); @@ -353,15 +375,25 @@ class ColorspaceHandler public: ColorspaceHandler() {}; - template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; @@ -383,20 +415,20 @@ public: size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.cpp index e35747f34..378015905 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ #include template -FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -64,7 +64,7 @@ FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const } template -FORCEINLINE void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo888x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -101,7 +101,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u } template -FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -141,7 +141,7 @@ FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const } template -FORCEINLINE void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo666x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -178,17 +178,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u } template -FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) { const v256u16 srcAlphaBits16 = _mm256_set1_epi16(0xFF00); - ColorspaceConvert555To8888_AVX2(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo8888_AVX2(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) { const v256u16 srcAlphaBits16 = _mm256_set1_epi16(0x1F00); - ColorspaceConvert555To6665_AVX2(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo6665_AVX2(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To8888_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) +{ + const v256u16 srcAlphaBits16 = _mm256_and_si256( _mm256_cmpgt_epi16(srcColor, _mm256_set1_epi16(0xFFFF)), _mm256_set1_epi16(0xFF00) ); + ColorspaceConvert555aTo8888_AVX2(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To6665_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) +{ + const v256u16 srcAlphaBits16 = _mm256_and_si256( _mm256_cmpgt_epi16(srcColor, _mm256_set1_epi16(0xFFFF)), _mm256_set1_epi16(0x1F00) ); + ColorspaceConvert555aTo6665_AVX2(srcColor, srcAlphaBits16, dstLo, dstHi); } template @@ -320,7 +334,7 @@ FORCEINLINE v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const } template -FORCEINLINE v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src) +FORCEINLINE v256u32 ColorspaceConvert888xTo8888Opaque_AVX2(const v256u32 &src) { if (SWAP_RB) { @@ -407,7 +421,7 @@ FORCEINLINE v256u32 ColorspaceApplyIntensity32_AVX2(const v256u32 &src, float in } template -static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256) +static size_t ColorspaceConvertBuffer555xTo8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256) { size_t i = 0; @@ -415,7 +429,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict { v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i)); v256u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To8888Opaque_AVX2(src_vec256, dstConvertedLo, dstConvertedHi); + ColorspaceConvert555xTo8888Opaque_AVX2(src_vec256, dstConvertedLo, dstConvertedHi); if (IS_UNALIGNED) { @@ -433,7 +447,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict } template -size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256) +size_t ColorspaceConvertBuffer555xTo6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256) { size_t i = 0; @@ -441,7 +455,59 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u3 { v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i)); v256u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To6665Opaque_AVX2(src_vec256, dstConvertedLo, dstConvertedHi); + ColorspaceConvert555xTo6665Opaque_AVX2(src_vec256, dstConvertedLo, dstConvertedHi); + + if (IS_UNALIGNED) + { + _mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi); + } + else + { + _mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi); + } + } + + return i; +} + +template +static size_t ColorspaceConvertBuffer5551To8888_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256) +{ + size_t i = 0; + + for (; i < pixCountVec256; i+=(sizeof(v256u16)/sizeof(u16))) + { + v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i)); + v256u32 dstConvertedLo, dstConvertedHi; + ColorspaceConvert5551To8888_AVX2(src_vec256, dstConvertedLo, dstConvertedHi); + + if (IS_UNALIGNED) + { + _mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi); + } + else + { + _mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi); + } + } + + return i; +} + +template +size_t ColorspaceConvertBuffer5551To6665_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256) +{ + size_t i = 0; + + for (; i < pixCountVec256; i+=(sizeof(v256u16)/sizeof(u16))) + { + v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i)); + v256u32 dstConvertedLo, dstConvertedHi; + ColorspaceConvert5551To6665_AVX2(src_vec256, dstConvertedLo, dstConvertedHi); if (IS_UNALIGNED) { @@ -539,7 +605,7 @@ size_t ColorspaceConvertBuffer6665To5551_AVX2(const u32 *__restrict src, u16 *__ } template -size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256) +size_t ColorspaceConvertBuffer888xTo8888Opaque_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256) { size_t i = 0; @@ -547,11 +613,11 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, si { if (IS_UNALIGNED) { - _mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX2(_mm256_loadu_si256((v256u32 *)(src+i))) ); + _mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX2(_mm256_loadu_si256((v256u32 *)(src+i))) ); } else { - _mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX2(_mm256_load_si256((v256u32 *)(src+i))) ); + _mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX2(_mm256_load_si256((v256u32 *)(src+i))) ); } } @@ -559,7 +625,7 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, si } template -size_t ColorspaceConvertBuffer555XTo888_AVX2(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec256) +size_t ColorspaceConvertBuffer555xTo888_AVX2(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec256) { size_t i = 0; v256u16 src_v256u16[2]; @@ -636,7 +702,7 @@ size_t ColorspaceConvertBuffer555XTo888_AVX2(const u16 *__restrict src, u8 *__re } template -size_t ColorspaceConvertBuffer888XTo888_AVX2(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec256) +size_t ColorspaceConvertBuffer888xTo888_AVX2(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec256) { size_t i = 0; v256u32 src_v256u32[4]; @@ -905,51 +971,99 @@ size_t ColorspaceApplyIntensityToBuffer32_AVX2(u32 *dst, size_t pixCountVec256, } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AVX2(src, dst, pixCount); } size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const @@ -1032,64 +1146,64 @@ size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const return ColorspaceConvertBuffer6665To5551_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX2(src, dst, pixCount); } size_t ColorspaceHandler_AVX2::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const @@ -1152,23 +1266,23 @@ size_t ColorspaceHandler_AVX2::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 * return ColorspaceApplyIntensityToBuffer32_AVX2(dst, pixCount, intensity); } -template void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555aTo8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555aTo8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo888x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo888x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555aTo6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555aTo6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo666x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo666x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src); template v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src); @@ -1182,8 +1296,8 @@ template v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, c template v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi); template v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi); -template v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src); -template v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src); +template v256u32 ColorspaceConvert888xTo8888Opaque_AVX2(const v256u32 &src); +template v256u32 ColorspaceConvert888xTo8888Opaque_AVX2(const v256u32 &src); template v256u16 ColorspaceCopy16_AVX2(const v256u16 &src); template v256u16 ColorspaceCopy16_AVX2(const v256u16 &src); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.h b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.h index af8f832d7..572f8025e 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,17 +24,19 @@ #warning This header requires AVX2 support. #else -template void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555aTo8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo888x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555aTo6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo666x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert5551To8888_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert5551To6665_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src); template v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src); template v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi); template v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi); -template v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src); +template v256u32 ColorspaceConvert888xTo8888Opaque_AVX2(const v256u32 &src); template v256u16 ColorspaceCopy16_AVX2(const v256u16 &src); template v256u32 ColorspaceCopy32_AVX2(const v256u32 &src); @@ -47,15 +49,25 @@ class ColorspaceHandler_AVX2 : public ColorspaceHandler public: ColorspaceHandler_AVX2() {}; - template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; @@ -77,20 +89,20 @@ public: size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.cpp index 713b145ba..0c404687e 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ #include template -FORCEINLINE void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -44,7 +44,7 @@ FORCEINLINE void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, cons } template -FORCEINLINE void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo888x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -62,7 +62,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v51 } template -FORCEINLINE void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -81,7 +81,7 @@ FORCEINLINE void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, cons } template -FORCEINLINE void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo666x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -99,17 +99,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v51 } template -FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) { const v512u16 srcAlphaBits16 = _mm512_set1_epi16(0xFF00); - ColorspaceConvert555To8888_AVX512(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo8888_AVX512(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) { const v512u16 srcAlphaBits16 = _mm512_set1_epi16(0x1F00); - ColorspaceConvert555To6665_AVX512(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo6665_AVX512(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To8888_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) +{ + const v512u16 srcAlphaBits16 = _mm512_and_si512( _mm512_cmpgt_epi16(srcColor, _mm512_set1_epi16(0xFFFF)), _mm512_set1_epi16(0xFF00) ); + ColorspaceConvert555aTo8888_AVX512(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To6665_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) +{ + const v512u16 srcAlphaBits16 = _mm512_and_si512( _mm512_cmpgt_epi16(srcColor, _mm512_set1_epi16(0xFFFF)), _mm512_set1_epi16(0x1F00) ); + ColorspaceConvert555aTo6665_AVX512(srcColor, srcAlphaBits16, dstLo, dstHi); } template @@ -239,7 +253,7 @@ FORCEINLINE v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, con } template -FORCEINLINE v512u32 ColorspaceConvert888XTo8888Opaque_AVX512(const v512u32 &src) +FORCEINLINE v512u32 ColorspaceConvert888xTo8888Opaque_AVX512(const v512u32 &src) { if (SWAP_RB) { @@ -326,7 +340,7 @@ FORCEINLINE v512u32 ColorspaceApplyIntensity32_AVX512(const v512u32 &src, float } template -static size_t ColorspaceConvertBuffer555To8888Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec512) +static size_t ColorspaceConvertBuffer555xTo8888Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec512) { size_t i = 0; @@ -334,7 +348,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX512(const u16 *__restric { v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i)); v512u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To8888Opaque_AVX512(src_vec512, dstConvertedLo, dstConvertedHi); + ColorspaceConvert555xTo8888Opaque_AVX512(src_vec512, dstConvertedLo, dstConvertedHi); if (IS_UNALIGNED) { @@ -352,7 +366,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX512(const u16 *__restric } template -size_t ColorspaceConvertBuffer555To6665Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec512) +size_t ColorspaceConvertBuffer555xTo6665Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec512) { size_t i = 0; @@ -360,7 +374,59 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AVX512(const u16 *__restrict src, { v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i)); v512u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To6665Opaque_AVX512(src_vec512, dstConvertedLo, dstConvertedHi); + ColorspaceConvert555xTo6665Opaque_AVX512(src_vec512, dstConvertedLo, dstConvertedHi); + + if (IS_UNALIGNED) + { + _mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi); + } + else + { + _mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi); + } + } + + return i; +} + +template +static size_t ColorspaceConvertBuffer5551To8888_AVX512(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec512) +{ + size_t i = 0; + + for (; i < pixCountVec512; i+=(sizeof(v512u16)/sizeof(u16))) + { + v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i)); + v512u32 dstConvertedLo, dstConvertedHi; + ColorspaceConvert5551To8888_AVX512(src_vec512, dstConvertedLo, dstConvertedHi); + + if (IS_UNALIGNED) + { + _mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi); + } + else + { + _mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi); + } + } + + return i; +} + +template +size_t ColorspaceConvertBuffer5551To6665_AVX512(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec512) +{ + size_t i = 0; + + for (; i < pixCountVec512; i+=(sizeof(v512u16)/sizeof(u16))) + { + v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i)); + v512u32 dstConvertedLo, dstConvertedHi; + ColorspaceConvert5551To6665_AVX512(src_vec512, dstConvertedLo, dstConvertedHi); if (IS_UNALIGNED) { @@ -458,7 +524,7 @@ size_t ColorspaceConvertBuffer6665To5551_AVX512(const u32 *__restrict src, u16 * } template -size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX512(const u32 *src, u32 *dst, size_t pixCountVec512) +size_t ColorspaceConvertBuffer888xTo8888Opaque_AVX512(const u32 *src, u32 *dst, size_t pixCountVec512) { size_t i = 0; @@ -466,11 +532,11 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX512(const u32 *src, u32 *dst, { if (IS_UNALIGNED) { - _mm512_storeu_si512( (v512u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX512(_mm512_loadu_si512((v512u32 *)(src+i))) ); + _mm512_storeu_si512( (v512u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX512(_mm512_loadu_si512((v512u32 *)(src+i))) ); } else { - _mm512_store_si512( (v512u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX512(_mm512_load_si512((v512u32 *)(src+i))) ); + _mm512_store_si512( (v512u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX512(_mm512_load_si512((v512u32 *)(src+i))) ); } } @@ -478,7 +544,7 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX512(const u32 *src, u32 *dst, } template -size_t ColorspaceConvertBuffer555XTo888_AVX512(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec512) +size_t ColorspaceConvertBuffer555xTo888_AVX512(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec512) { size_t i = 0; v512u16 src_v512u16[2]; @@ -572,7 +638,7 @@ size_t ColorspaceConvertBuffer555XTo888_AVX512(const u16 *__restrict src, u8 *__ } template -size_t ColorspaceConvertBuffer888XTo888_AVX512(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec512) +size_t ColorspaceConvertBuffer888xTo888_AVX512(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec512) { size_t i = 0; v512u32 src_v512u32[4]; @@ -858,51 +924,99 @@ size_t ColorspaceApplyIntensityToBuffer32_AVX512(u32 *dst, size_t pixCountVec512 } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AVX512(src, dst, pixCount); } size_t ColorspaceHandler_AVX512::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const @@ -985,64 +1099,64 @@ size_t ColorspaceHandler_AVX512::ConvertBuffer6665To5551_SwapRB_IsUnaligned(cons return ColorspaceConvertBuffer6665To5551_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX512(src, dst, pixCount); } size_t ColorspaceHandler_AVX512::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const @@ -1105,23 +1219,29 @@ size_t ColorspaceHandler_AVX512::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 return ColorspaceApplyIntensityToBuffer32_AVX512(dst, pixCount, intensity); } -template void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555aTo8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555aTo8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo888x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo888x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555aTo6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555aTo6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo666x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo666x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); + +template void ColorspaceConvert5551To8888_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert5551To8888_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); + +template void ColorspaceConvert5551To6665_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert5551To6665_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template v512u32 ColorspaceConvert8888To6665_AVX512(const v512u32 &src); template v512u32 ColorspaceConvert8888To6665_AVX512(const v512u32 &src); @@ -1135,8 +1255,8 @@ template v512u16 ColorspaceConvert8888To5551_AVX512(const v512u32 &srcLo, template v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi); template v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi); -template v512u32 ColorspaceConvert888XTo8888Opaque_AVX512(const v512u32 &src); -template v512u32 ColorspaceConvert888XTo8888Opaque_AVX512(const v512u32 &src); +template v512u32 ColorspaceConvert888xTo8888Opaque_AVX512(const v512u32 &src); +template v512u32 ColorspaceConvert888xTo8888Opaque_AVX512(const v512u32 &src); template v512u16 ColorspaceCopy16_AVX512(const v512u16 &src); template v512u16 ColorspaceCopy16_AVX512(const v512u16 &src); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.h b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.h index b04077a66..09283de67 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,17 +24,19 @@ #warning This header requires AVX-512 Tier-1 support. #else -template void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555aTo8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo888x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555aTo6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo666x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert5551To8888_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert5551To6665_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template v512u32 ColorspaceConvert8888To6665_AVX512(const v512u32 &src); template v512u32 ColorspaceConvert6665To8888_AVX512(const v512u32 &src); template v512u16 ColorspaceConvert8888To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi); template v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi); -template v512u32 ColorspaceConvert888XTo8888Opaque_AVX512(const v512u32 &src); +template v512u32 ColorspaceConvert888xTo8888Opaque_AVX512(const v512u32 &src); template v512u16 ColorspaceCopy16_AVX512(const v512u16 &src); template v512u32 ColorspaceCopy32_AVX512(const v512u32 &src); @@ -47,15 +49,25 @@ class ColorspaceHandler_AVX512 : public ColorspaceHandler public: ColorspaceHandler_AVX512() {}; - template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; @@ -77,20 +89,20 @@ public: size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp index e28949998..d36afecda 100755 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2022 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,7 +24,7 @@ #include template -FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -65,14 +65,14 @@ FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, con } template -FORCEINLINE void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0}; - ColorspaceConvert555To8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -113,24 +113,38 @@ FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, con } template -FORCEINLINE void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0}; - ColorspaceConvert555To6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}; - ColorspaceConvert555To8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = {0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F}; - ColorspaceConvert555To6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To8888_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +{ + const v128u16 srcAlphaBits16 = (v128u16)vec_cmpgt( (v128s16)srcColor, ((v128s16){0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF}) ); + ColorspaceConvert555aTo8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To6665_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +{ + const v128u16 srcAlphaBits16 = vec_and( (v128u16)vec_cmpgt( (v128s16)srcColor, ((v128s16){0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF}) ), ((v128u16){0x1F1F,0x1F1F,0x1F1F,0x1F1F, 0x1F1F,0x1F1F,0x1F1F,0x1F1F, 0x1F1F,0x1F1F,0x1F1F,0x1F1F, 0x1F1F,0x1F1F,0x1F1F,0x1F1F}) ); + ColorspaceConvert555aTo6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); } template @@ -230,7 +244,7 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, co } template -FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src) +FORCEINLINE v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec(const v128u32 &src) { if (SWAP_RB) { @@ -263,7 +277,7 @@ FORCEINLINE v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src) } template -static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) +static size_t ColorspaceConvertBuffer555xTo8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) { size_t i = 0; @@ -271,7 +285,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restri { v128u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To8888Opaque_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); + ColorspaceConvert555xTo8888Opaque_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); vec_st(dstConvertedHi, 0, dst+i); vec_st(dstConvertedLo, 16, dst+i); } @@ -280,7 +294,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restri } template -size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer555xTo6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) { size_t i = 0; @@ -288,7 +302,41 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, { v128u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To6665Opaque_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); + ColorspaceConvert555xTo6665Opaque_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); + vec_st(dstConvertedHi, 0, dst+i); + vec_st(dstConvertedLo, 16, dst+i); + } + + return i; +} + +template +static size_t ColorspaceConvertBuffer5551To8888_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) +{ + size_t i = 0; + + for (; i < pixCountVec128; i+=sizeof(v128u16)/sizeof(u16)) + { + v128u32 dstConvertedLo, dstConvertedHi; + + ColorspaceConvert5551To8888_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); + vec_st(dstConvertedHi, 0, dst+i); + vec_st(dstConvertedLo, 16, dst+i); + } + + return i; +} + +template +size_t ColorspaceConvertBuffer5551To6665_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) +{ + size_t i = 0; + + for (; i < pixCountVec128; i+=sizeof(v128u16)/sizeof(u16)) + { + v128u32 dstConvertedLo, dstConvertedHi; + + ColorspaceConvert5551To6665_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); vec_st(dstConvertedHi, 0, dst+i); vec_st(dstConvertedLo, 16, dst+i); } @@ -349,20 +397,20 @@ size_t ColorspaceConvertBuffer6665To5551_AltiVec(const u32 *__restrict src, u16 } template -size_t ColorspaceConvertBuffer888XTo8888Opaque_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer888xTo8888Opaque_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128) { size_t i = 0; for (; i < pixCountVec128; i+=4) { - vec_st( ColorspaceConvert888XTo8888Opaque_AltiVec(vec_ld(0, src+i)), 0, dst+i ); + vec_st( ColorspaceConvert888xTo8888Opaque_AltiVec(vec_ld(0, src+i)), 0, dst+i ); } return i; } template -size_t ColorspaceConvertBuffer555XTo888_AltiVec(const u16 *src, u8 *dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer555xTo888_AltiVec(const u16 *src, u8 *dst, size_t pixCountVec128) { size_t i = 0; v128u16 src_v128u16[2]; @@ -405,7 +453,7 @@ size_t ColorspaceConvertBuffer555XTo888_AltiVec(const u16 *src, u8 *dst, size_t } template -size_t ColorspaceConvertBuffer888XTo888_AltiVec(const u32 *src, u8 *dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer888xTo888_AltiVec(const u32 *src, u8 *dst, size_t pixCountVec128) { size_t i = 0; v128u32 src_v128u32[4]; @@ -477,27 +525,51 @@ size_t ColorspaceCopyBuffer32_AltiVec(const u32 *src, u32 *dst, size_t pixCountV } template -size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_AltiVec(src, dst, pixCount); } template -size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_AltiVec(src, dst, pixCount); } template -size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AltiVec(src, dst, pixCount); } template -size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AltiVec(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_AltiVec(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_AltiVec(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AltiVec(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AltiVec(src, dst, pixCount); } size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const @@ -540,34 +612,34 @@ size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551_SwapRB(const u32 *__re return ColorspaceConvertBuffer6665To5551_AltiVec(src, dst, pixCount); } -size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AltiVec(src, dst, pixCount); } -size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AltiVec(src, dst, pixCount); } -size_t ColorspaceHandler_AltiVec::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AltiVec(src, dst, pixCount); } -size_t ColorspaceHandler_AltiVec::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AltiVec(src, dst, pixCount); } -size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AltiVec(src, dst, pixCount); } -size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AltiVec(src, dst, pixCount); } size_t ColorspaceHandler_AltiVec::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const @@ -580,59 +652,59 @@ size_t ColorspaceHandler_AltiVec::CopyBuffer32_SwapRB(const u32 *src, u32 *dst, return ColorspaceCopyBuffer32_AltiVec(src, dst, pixCount); } -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src); @@ -646,8 +718,8 @@ template v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo template v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi); -template v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src); -template v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec(const v128u32 &src); template v128u16 ColorspaceCopy16_AltiVec(const v128u16 &src); template v128u16 ColorspaceCopy16_AltiVec(const v128u16 &src); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.h b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.h index 3078a13da..c607bfc01 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,17 +24,17 @@ #warning This header requires PowerPC AltiVec support. #else -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src); template v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src); template v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi); -template v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec(const v128u32 &src); template v128u16 ColorspaceCopy16_AltiVec(const v128u16 &src); template v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src); @@ -46,11 +46,17 @@ class ColorspaceHandler_AltiVec : public ColorspaceHandler public: ColorspaceHandler_AltiVec() {}; - template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; @@ -64,14 +70,14 @@ public: size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.cpp index 81ad657ab..3557c3975 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2022 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,7 +28,7 @@ #define COLOR32_SWAPRB_NEON(src) vreinterpretq_u32_u8( vqtbl1q_u8(vreinterpretq_u8_u32(src), ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15})) ) template -FORCEINLINE void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -60,7 +60,7 @@ FORCEINLINE void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const } template -FORCEINLINE void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo888x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -90,7 +90,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u } template -FORCEINLINE void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -122,7 +122,7 @@ FORCEINLINE void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const } template -FORCEINLINE void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo666x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -152,17 +152,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u } template -FORCEINLINE void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = vdupq_n_u16(0xFF00); - ColorspaceConvert555To8888_NEON(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo8888_NEON(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = vdupq_n_u16(0x1F00); - ColorspaceConvert555To6665_NEON(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo6665_NEON(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To8888_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +{ + const v128s16 srcAlphaBits16 = vandq_s16( vcgtq_s16(vreinterpretq_u16_s16(srcColor), vdupq_n_s16(0xFFFF)), vdupq_n_s16(0xFF00) ); + ColorspaceConvert555aTo8888_NEON(srcColor, vreinterpretq_s16_u16(srcAlphaBits16), dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To6665_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +{ + const v128s16 srcAlphaBits16 = vandq_s16( vcgtq_s16(vreinterpretq_u16_s16(srcColor), vdupq_n_s16(0xFFFF)), vdupq_n_s16(0x1F00) ); + ColorspaceConvert555aTo6665_NEON(srcColor, vreinterpretq_s16_u16(srcAlphaBits16), dstLo, dstHi); } template @@ -290,7 +304,7 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const } template -FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src) +FORCEINLINE v128u32 ColorspaceConvert888xTo8888Opaque_NEON(const v128u32 &src) { if (SWAP_RB) { @@ -377,7 +391,7 @@ FORCEINLINE v128u32 ColorspaceApplyIntensity32_NEON(const v128u32 &src, float in } template -static size_t ColorspaceConvertBuffer555To8888Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) +static size_t ColorspaceConvertBuffer555xTo8888Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) { size_t i = 0; v128u16 srcVec; @@ -386,7 +400,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_NEON(const u16 *__restrict for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) { srcVec = vld1q_u16(src+i); - ColorspaceConvert555To8888Opaque_NEON(srcVec, dstVec.val[0], dstVec.val[1]); + ColorspaceConvert555xTo8888Opaque_NEON(srcVec, dstVec.val[0], dstVec.val[1]); vst1q_u32_x2(dst+i, dstVec); } @@ -394,7 +408,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_NEON(const u16 *__restrict } template -size_t ColorspaceConvertBuffer555To6665Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer555xTo6665Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) { size_t i = 0; v128u16 srcVec; @@ -403,7 +417,41 @@ size_t ColorspaceConvertBuffer555To6665Opaque_NEON(const u16 *__restrict src, u3 for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) { srcVec = vld1q_u16(src+i); - ColorspaceConvert555To6665Opaque_NEON(srcVec, dstVec.val[0], dstVec.val[1]); + ColorspaceConvert555xTo6665Opaque_NEON(srcVec, dstVec.val[0], dstVec.val[1]); + vst1q_u32_x2(dst+i, dstVec); + } + + return i; +} + +template +static size_t ColorspaceConvertBuffer5551To8888_NEON(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) +{ + size_t i = 0; + v128u16 srcVec; + uint32x4x2_t dstVec; + + for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) + { + srcVec = vld1q_u16(src+i); + ColorspaceConvert5551To8888_NEON(srcVec, dstVec.val[0], dstVec.val[1]); + vst1q_u32_x2(dst+i, dstVec); + } + + return i; +} + +template +size_t ColorspaceConvertBuffer5551To6665_NEON(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) +{ + size_t i = 0; + v128u16 srcVec; + uint32x4x2_t dstVec; + + for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) + { + srcVec = vld1q_u16(src+i); + ColorspaceConvert5551To6665_NEON(srcVec, dstVec.val[0], dstVec.val[1]); vst1q_u32_x2(dst+i, dstVec); } @@ -467,7 +515,7 @@ size_t ColorspaceConvertBuffer6665To5551_NEON(const u32 *__restrict src, u16 *__ } template -size_t ColorspaceConvertBuffer888XTo8888Opaque_NEON(const u32 *src, u32 *dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer888xTo8888Opaque_NEON(const u32 *src, u32 *dst, size_t pixCountVec128) { size_t i = 0; uint8x16x4_t srcVec_x4; @@ -491,7 +539,7 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_NEON(const u32 *src, u32 *dst, si } template -size_t ColorspaceConvertBuffer555XTo888_NEON(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer555xTo888_NEON(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec128) { size_t i = 0; uint16x8x2_t srcVec; @@ -529,7 +577,7 @@ size_t ColorspaceConvertBuffer555XTo888_NEON(const u16 *__restrict src, u8 *__re } template -size_t ColorspaceConvertBuffer888XTo888_NEON(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer888xTo888_NEON(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec128) { size_t i = 0; uint8x16x4_t srcVec_x4; @@ -723,51 +771,99 @@ size_t ColorspaceApplyIntensityToBuffer32_NEON(u32 *dst, size_t pixCountVec128, } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_NEON(src, dst, pixCount); } size_t ColorspaceHandler_NEON::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const @@ -850,64 +946,64 @@ size_t ColorspaceHandler_NEON::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const return ColorspaceConvertBuffer6665To5551_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_NEON(src, dst, pixCount); } size_t ColorspaceHandler_NEON::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const @@ -970,23 +1066,29 @@ size_t ColorspaceHandler_NEON::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 * return ColorspaceApplyIntensityToBuffer32_NEON(dst, pixCount, intensity); } -template void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); + +template void ColorspaceConvert5551To8888_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To8888_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); + +template void ColorspaceConvert5551To6665_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To6665_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_NEON(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_NEON(const v128u32 &src); @@ -1000,8 +1102,8 @@ template v128u16 ColorspaceConvert8888To5551_NEON(const v128u32 &srcLo, c template v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi); -template v128u32 ColorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src); -template v128u32 ColorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_NEON(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_NEON(const v128u32 &src); template v128u16 ColorspaceCopy16_NEON(const v128u16 &src); template v128u16 ColorspaceCopy16_NEON(const v128u16 &src); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.h b/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.h index 0669fb659..dbbebee41 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2022 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,17 +24,19 @@ #warning This header requires ARM64 NEON support. #else -template void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To8888_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To6665_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_NEON(const v128u32 &src); template v128u32 ColorspaceConvert6665To8888_NEON(const v128u32 &src); template v128u16 ColorspaceConvert8888To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi); -template v128u32 C6olorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_NEON(const v128u32 &src); template v128u16 ColorspaceCopy16_NEON(const v128u16 &src); template v128u32 ColorspaceCopy32_NEON(const v128u32 &src); @@ -47,15 +49,25 @@ class ColorspaceHandler_NEON : public ColorspaceHandler public: ColorspaceHandler_NEON() {}; - template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; @@ -77,20 +89,20 @@ public: size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.cpp index d933ed4f6..77e6ae22e 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,7 +33,7 @@ #endif template -FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -66,7 +66,7 @@ FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const } template -FORCEINLINE void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo888x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -97,7 +97,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u } template -FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -131,7 +131,7 @@ FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const } template -FORCEINLINE void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo666x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -162,17 +162,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u } template -FORCEINLINE void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = _mm_set1_epi16(0xFF00); - ColorspaceConvert555To8888_SSE2(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo8888_SSE2(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = _mm_set1_epi16(0x1F00); - ColorspaceConvert555To6665_SSE2(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo6665_SSE2(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To8888_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +{ + const v128u16 srcAlphaBits16 = _mm_and_si128( _mm_cmpgt_epi16(srcColor, _mm_set1_epi16(0xFFFF)), _mm_set1_epi16(0xFF00) ); + ColorspaceConvert555aTo8888_SSE2(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To6665_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +{ + const v128u16 srcAlphaBits16 = _mm_and_si128( _mm_cmpgt_epi16(srcColor, _mm_set1_epi16(0xFFFF)), _mm_set1_epi16(0x1F00) ); + ColorspaceConvert555aTo6665_SSE2(srcColor, srcAlphaBits16, dstLo, dstHi); } template @@ -315,7 +329,7 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const } template -FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src) +FORCEINLINE v128u32 ColorspaceConvert888xTo8888Opaque_SSE2(const v128u32 &src) { if (SWAP_RB) { @@ -422,7 +436,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_SSE2(const u16 *__restrict { v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i)); v128u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To8888Opaque_SSE2(src_vec128, dstConvertedLo, dstConvertedHi); + ColorspaceConvert555xTo8888Opaque_SSE2(src_vec128, dstConvertedLo, dstConvertedHi); if (IS_UNALIGNED) { @@ -448,7 +462,59 @@ size_t ColorspaceConvertBuffer555To6665Opaque_SSE2(const u16 *__restrict src, u3 { v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i)); v128u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To6665Opaque_SSE2(src_vec128, dstConvertedLo, dstConvertedHi); + ColorspaceConvert555xTo6665Opaque_SSE2(src_vec128, dstConvertedLo, dstConvertedHi); + + if (IS_UNALIGNED) + { + _mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi); + } + else + { + _mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi); + } + } + + return i; +} + +template +static size_t ColorspaceConvertBuffer5551To8888_SSE2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) +{ + size_t i = 0; + + for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) + { + v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i)); + v128u32 dstConvertedLo, dstConvertedHi; + ColorspaceConvert5551To8888_SSE2(src_vec128, dstConvertedLo, dstConvertedHi); + + if (IS_UNALIGNED) + { + _mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi); + } + else + { + _mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi); + } + } + + return i; +} + +template +size_t ColorspaceConvertBuffer5551To6665_SSE2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) +{ + size_t i = 0; + + for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) + { + v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i)); + v128u32 dstConvertedLo, dstConvertedHi; + ColorspaceConvert5551To6665_SSE2(src_vec128, dstConvertedLo, dstConvertedHi); if (IS_UNALIGNED) { @@ -554,11 +620,11 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_SSE2(const u32 *src, u32 *dst, si { if (IS_UNALIGNED) { - _mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_SSE2(_mm_loadu_si128((v128u32 *)(src+i))) ); + _mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_SSE2(_mm_loadu_si128((v128u32 *)(src+i))) ); } else { - _mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_SSE2(_mm_load_si128((v128u32 *)(src+i))) ); + _mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_SSE2(_mm_load_si128((v128u32 *)(src+i))) ); } } @@ -937,53 +1003,101 @@ size_t ColorspaceApplyIntensityToBuffer32_SSE2(u32 *dst, size_t pixCountVec128, } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_SSE2(src, dst, pixCount); } +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_SSE2(src, dst, pixCount); +} + size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const { return ColorspaceConvertBuffer8888To6665_SSE2(src, dst, pixCount); @@ -1064,64 +1178,64 @@ size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const return ColorspaceConvertBuffer6665To5551_SSE2(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo8888Opaque_SSE2(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo8888Opaque_SSE2(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo8888Opaque_SSE2(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo8888Opaque_SSE2(src, dst, pixCount); } #ifdef ENABLE_SSSE3 -size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo888_SSSE3(src, dst, pixCount); } @@ -1188,23 +1302,23 @@ size_t ColorspaceHandler_SSE2::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 * return ColorspaceApplyIntensityToBuffer32_SSE2(dst, pixCount, intensity); } -template void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src); @@ -1218,8 +1332,8 @@ template v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, c template v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi); -template v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src); -template v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_SSE2(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_SSE2(const v128u32 &src); template v128u16 ColorspaceCopy16_SSE2(const v128u16 &src); template v128u16 ColorspaceCopy16_SSE2(const v128u16 &src); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.h b/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.h index 094dc5178..e8cbe085b 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,17 +24,19 @@ #warning This header requires SSE2 support. #else -template void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To8888_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To6665_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src); template v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src); template v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi); -template v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_SSE2(const v128u32 &src); template v128u16 ColorspaceCopy16_SSE2(const v128u16 &src); template v128u32 ColorspaceCopy32_SSE2(const v128u32 &src); @@ -47,15 +49,25 @@ class ColorspaceHandler_SSE2 : public ColorspaceHandler public: ColorspaceHandler_SSE2() {}; - template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; @@ -77,21 +89,21 @@ public: size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; #ifdef ENABLE_SSSE3 - size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; #endif size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;