From ad64e73bb25bc6f37313e933a1dbc726219afeb4 Mon Sep 17 00:00:00 2001 From: rogerman Date: Sat, 11 Sep 2021 20:59:41 -0700 Subject: [PATCH] Colorspace Handler: Colorspace conversions no longer do big-endian byte swapping by default. Instead, byte swapping must be explicitly requested. - Byte swapping can now be independently controlled for both input and output data. - As an application to this new API, VRAM display mode now shows the correct colors on big-endian systems. --- desmume/src/FIFO.cpp | 16 +- desmume/src/GPU.cpp | 28 +-- desmume/src/GPU_Operations.cpp | 20 +- .../src/frontend/cocoa/OGLDisplayOutput.cpp | 8 +- desmume/src/frontend/cocoa/cocoa_rom.mm | 116 +++------ .../userinterface/MacMetalDisplayView.mm | 2 +- desmume/src/frontend/posix/gtk/main.cpp | 4 +- desmume/src/frontend/windows/display.cpp | 2 +- desmume/src/rasterize.cpp | 8 +- desmume/src/render3D.cpp | 2 +- desmume/src/texcache.cpp | 50 ++-- desmume/src/types.h | 19 ++ .../colorspacehandler/colorspacehandler.cpp | 226 +++++++++++++----- .../colorspacehandler/colorspacehandler.h | 36 +-- .../colorspacehandler_AVX2.cpp | 10 +- .../colorspacehandler_AVX2.h | 18 +- .../colorspacehandler_AVX512.cpp | 10 +- .../colorspacehandler_AVX512.h | 18 +- .../colorspacehandler_AltiVec.cpp | 176 ++++++++++---- .../colorspacehandler_AltiVec.h | 22 +- .../colorspacehandler_SSE2.cpp | 10 +- .../colorspacehandler_SSE2.h | 18 +- 22 files changed, 506 insertions(+), 313 deletions(-) diff --git a/desmume/src/FIFO.cpp b/desmume/src/FIFO.cpp index b60b98b53..38fecc1e9 100755 --- a/desmume/src/FIFO.cpp +++ b/desmume/src/FIFO.cpp @@ -436,11 +436,11 @@ void _DISP_FIFOrecv_LineOpaque32_vec(u32 *__restrict dst) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555To6665Opaque_AltiVec(fifoColor, dstLo, dstHi); + ColorspaceConvert555To6665Opaque_AltiVec(fifoColor, dstLo, dstHi); } else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) { - ColorspaceConvert555To8888Opaque_AltiVec(fifoColor, dstLo, dstHi); + ColorspaceConvert555To8888Opaque_AltiVec(fifoColor, dstLo, dstHi); } vec_st(dstLo, d + 0, dst); @@ -449,11 +449,11 @@ void _DISP_FIFOrecv_LineOpaque32_vec(u32 *__restrict dst) #else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvertBuffer555To6665Opaque((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)); + ColorspaceConvertBuffer555To6665Opaque((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)); } else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) { - ColorspaceConvertBuffer555To8888Opaque((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)); + ColorspaceConvertBuffer555To8888Opaque((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)); } _DISP_FIFOrecv_LineAdvance(); #endif @@ -499,13 +499,13 @@ void DISP_FIFOrecv_LineOpaque(u32 *__restrict dst) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - dst[i+0] = COLOR555TO6665_OPAQUE((src >> 0) & 0x7FFF); - dst[i+1] = COLOR555TO6665_OPAQUE((src >> 16) & 0x7FFF); + dst[i+0] = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque((src >> 0) & 0x7FFF) ); + dst[i+1] = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque((src >> 16) & 0x7FFF) ); } else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) { - dst[i+0] = COLOR555TO8888_OPAQUE((src >> 0) & 0x7FFF); - dst[i+1] = COLOR555TO8888_OPAQUE((src >> 16) & 0x7FFF); + dst[i+0] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque((src >> 0) & 0x7FFF) ); + dst[i+1] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque((src >> 16) & 0x7FFF) ); } } } diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index fbe286f97..3e5f59fcd 100755 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -365,7 +365,7 @@ void GPUEngineBase::_Reset_Base() renderState.selectedBGLayer = &this->_BGLayer[GPULayerID_BG0]; renderState.backdropColor16 = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF; renderState.workingBackdropColor16 = renderState.backdropColor16; - renderState.workingBackdropColor32.color = (dispInfo.colorFormat == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(renderState.workingBackdropColor16)); + renderState.workingBackdropColor32.color = LOCAL_TO_LE_32( (dispInfo.colorFormat == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) ); renderState.colorEffect = (ColorEffect)this->_IORegisterMap->BLDCNT.ColorEffect; renderState.blendEVA = 0; renderState.blendEVB = 0; @@ -864,7 +864,7 @@ void GPUEngineBase::UpdateRenderStates(const size_t l) { currRenderState.workingBackdropColor16 = currRenderState.backdropColor16; } - currRenderState.workingBackdropColor32.color = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(currRenderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(currRenderState.workingBackdropColor16)); + currRenderState.workingBackdropColor32.color = LOCAL_TO_LE_32( (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(currRenderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(currRenderState.workingBackdropColor16)) ); // Save the current render states to this line's compositor info. compInfo.renderState = currRenderState; @@ -3949,7 +3949,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA { if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) { - ColorspaceConvertBuffer555To8888Opaque(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555To8888Opaque(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); } this->_RenderLine_DispCapture_Copy(lineInfo, srcBPtr, dstCustomPtr, captureLengthExt); @@ -3990,7 +3990,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA { if ((OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) && (DISPCAPCNT.SrcB != 0)) { - ColorspaceConvertBuffer555To8888Opaque(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555To8888Opaque(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); } CopyLineExpandHinted<0xFFFF, true, false, false, 4>(lineInfo, srcBPtr, this->_captureWorkingB32); @@ -4139,7 +4139,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI { if (willReadNativeVRAM) { - ColorspaceConvertBuffer555To8888Opaque(vramNative16, (u32 *)vramCustom32, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555To8888Opaque(vramNative16, (u32 *)vramCustom32, GPU_FRAMEBUFFER_NATIVE_WIDTH); } } @@ -4669,7 +4669,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const GPUEngineLineInfo &lineInfo) { const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + lineInfo.blockOffsetNative; u32 *dst = (u32 *)this->_nativeBuffer + lineInfo.blockOffsetNative; - ColorspaceConvertBuffer555To6665Opaque(src, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555To6665Opaque(src, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); break; } @@ -4677,7 +4677,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const GPUEngineLineInfo &lineInfo) { const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + lineInfo.blockOffsetNative; u32 *dst = (u32 *)this->_nativeBuffer + lineInfo.blockOffsetNative; - ColorspaceConvertBuffer555To8888Opaque(src, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555To8888Opaque(src, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); break; } } @@ -4694,7 +4694,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const GPUEngineLineInfo &lineInfo) { const u16 *src = (u16 *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + lineInfo.blockOffsetCustom; u32 *dst = (u32 *)this->_customBuffer + lineInfo.blockOffsetCustom; - ColorspaceConvertBuffer555To6665Opaque(src, dst, lineInfo.pixelCount); + ColorspaceConvertBuffer555To6665Opaque(src, dst, lineInfo.pixelCount); break; } @@ -5937,11 +5937,11 @@ void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551) break; case NDSColorFormat_BGR666_Rev: - color32.color = COLOR555TO6665_OPAQUE(colorBGRA5551 & 0x7FFF); + color32.color = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(colorBGRA5551 & 0x7FFF) ); break; case NDSColorFormat_BGR888_Rev: - color32.color = COLOR555TO8888_OPAQUE(colorBGRA5551 & 0x7FFF); + color32.color = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(colorBGRA5551 & 0x7FFF) ); break; default: @@ -6139,7 +6139,7 @@ bool GPUSubsystem::LoadState(EMUFILE &is, int size) { if (this->_displayInfo.isDisplayEnabled[NDSDisplayID_Main]) { - ColorspaceConvertBuffer555To6665Opaque((u16 *)this->_displayInfo.customBuffer[NDSDisplayID_Main], (u32 *)this->_displayInfo.nativeBuffer[NDSDisplayID_Main], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555To6665Opaque((u16 *)this->_displayInfo.customBuffer[NDSDisplayID_Main], (u32 *)this->_displayInfo.nativeBuffer[NDSDisplayID_Main], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); } else { @@ -6148,7 +6148,7 @@ bool GPUSubsystem::LoadState(EMUFILE &is, int size) if (this->_displayInfo.isDisplayEnabled[NDSDisplayID_Touch]) { - ColorspaceConvertBuffer555To6665Opaque((u16 *)this->_displayInfo.customBuffer[NDSDisplayID_Touch], (u32 *)this->_displayInfo.nativeBuffer[NDSDisplayID_Touch], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555To6665Opaque((u16 *)this->_displayInfo.customBuffer[NDSDisplayID_Touch], (u32 *)this->_displayInfo.nativeBuffer[NDSDisplayID_Touch], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); } else { @@ -6161,7 +6161,7 @@ bool GPUSubsystem::LoadState(EMUFILE &is, int size) { if (this->_displayInfo.isDisplayEnabled[NDSDisplayID_Main]) { - ColorspaceConvertBuffer555To8888Opaque((u16 *)this->_displayInfo.customBuffer[NDSDisplayID_Main], (u32 *)this->_displayInfo.nativeBuffer[NDSDisplayID_Main], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555To8888Opaque((u16 *)this->_displayInfo.customBuffer[NDSDisplayID_Main], (u32 *)this->_displayInfo.nativeBuffer[NDSDisplayID_Main], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); } else { @@ -6170,7 +6170,7 @@ bool GPUSubsystem::LoadState(EMUFILE &is, int size) if (this->_displayInfo.isDisplayEnabled[NDSDisplayID_Touch]) { - ColorspaceConvertBuffer555To8888Opaque((u16 *)this->_displayInfo.customBuffer[NDSDisplayID_Touch], (u32 *)this->_displayInfo.nativeBuffer[NDSDisplayID_Touch], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555To8888Opaque((u16 *)this->_displayInfo.customBuffer[NDSDisplayID_Touch], (u32 *)this->_displayInfo.nativeBuffer[NDSDisplayID_Touch], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); } else { diff --git a/desmume/src/GPU_Operations.cpp b/desmume/src/GPU_Operations.cpp index b258daee1..1948fc9fd 100644 --- a/desmume/src/GPU_Operations.cpp +++ b/desmume/src/GPU_Operations.cpp @@ -242,8 +242,8 @@ void PixelOperation::InitLUTs() cur.bits.blue = (cur.bits.blue + ((31 - cur.bits.blue) * i / 16)); cur.bits.alpha = 0; PixelOperation::BrightnessUpTable555[i][j] = cur.val; - PixelOperation::BrightnessUpTable666[i][j].color = COLOR555TO666(cur.val); - PixelOperation::BrightnessUpTable888[i][j].color = COLOR555TO888(cur.val); + PixelOperation::BrightnessUpTable666[i][j].color = LOCAL_TO_LE_32( COLOR555TO666(cur.val) ); + PixelOperation::BrightnessUpTable888[i][j].color = LOCAL_TO_LE_32( COLOR555TO888(cur.val) ); cur.val = j; cur.bits.red = (cur.bits.red - (cur.bits.red * i / 16)); @@ -251,8 +251,8 @@ void PixelOperation::InitLUTs() cur.bits.blue = (cur.bits.blue - (cur.bits.blue * i / 16)); cur.bits.alpha = 0; PixelOperation::BrightnessDownTable555[i][j] = cur.val; - PixelOperation::BrightnessDownTable666[i][j].color = COLOR555TO666(cur.val); - PixelOperation::BrightnessDownTable888[i][j].color = COLOR555TO888(cur.val); + PixelOperation::BrightnessDownTable666[i][j].color = LOCAL_TO_LE_32( COLOR555TO666(cur.val) ); + PixelOperation::BrightnessDownTable888[i][j].color = LOCAL_TO_LE_32( COLOR555TO888(cur.val) ); } } @@ -289,11 +289,11 @@ FORCEINLINE void PixelOperation::_copy16(GPUEngineCompositorInfo &compInfo, cons break; case NDSColorFormat_BGR666_Rev: - dstColor32.color = ColorspaceConvert555To6665Opaque(srcColor16); + dstColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(srcColor16) ); break; case NDSColorFormat_BGR888_Rev: - dstColor32.color = ColorspaceConvert555To8888Opaque(srcColor16); + dstColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(srcColor16) ); break; } @@ -562,7 +562,7 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI switch (selectedEffect) { case ColorEffect_Disable: - dstColor32.color = ColorspaceConvert555To6665Opaque(srcColor16); + dstColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(srcColor16) ); break; case ColorEffect_IncreaseBrightness: @@ -576,7 +576,7 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI case ColorEffect_Blend: { FragmentColor srcColor32; - srcColor32.color = ColorspaceConvert555To6665Opaque(srcColor16); + srcColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(srcColor16) ); dstColor32 = (LAYERTYPE == GPULayerType_3D) ? colorop.blend3D(srcColor32, dstColor32) : colorop.blend(srcColor32, dstColor32, blendEVA, blendEVB); break; } @@ -587,7 +587,7 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI switch (selectedEffect) { case ColorEffect_Disable: - dstColor32.color = ColorspaceConvert555To8888Opaque(srcColor16); + dstColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(srcColor16) ); break; case ColorEffect_IncreaseBrightness: @@ -601,7 +601,7 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI case ColorEffect_Blend: { FragmentColor srcColor32; - srcColor32.color = ColorspaceConvert555To8888Opaque(srcColor16); + srcColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(srcColor16) ); dstColor32 = (LAYERTYPE == GPULayerType_3D) ? colorop.blend3D(srcColor32, dstColor32) : colorop.blend(srcColor32, dstColor32, blendEVA, blendEVB); break; } diff --git a/desmume/src/frontend/cocoa/OGLDisplayOutput.cpp b/desmume/src/frontend/cocoa/OGLDisplayOutput.cpp index c2a979f69..b09d57412 100644 --- a/desmume/src/frontend/cocoa/OGLDisplayOutput.cpp +++ b/desmume/src/frontend/cocoa/OGLDisplayOutput.cpp @@ -4663,7 +4663,7 @@ void OGLClientFetchObject::FetchNativeDisplayToSrcClone(const NDSDisplayID displ if (this->_fetchColorFormatOGL == GL_UNSIGNED_SHORT_1_5_5_5_REV) { - ColorspaceConvertBuffer555To8888Opaque((const uint16_t *)this->_fetchDisplayInfo[bufferIndex].nativeBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555To8888Opaque((const uint16_t *)this->_fetchDisplayInfo[bufferIndex].nativeBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); } else { @@ -5500,9 +5500,9 @@ OGLImage::OGLImage(OGLContextInfo *contextInfo, GLsizei imageWidth, GLsizei imag const GLint vtxBuffer[8] = { (GLint)(-_normalWidth/2.0), (GLint)( _normalHeight/2.0), - (GLint)( _normalWidth/2.0), (GLint)( _normalHeight/2.0), - (GLint)(-_normalWidth/2.0), (GLint)(-_normalHeight/2.0), - (GLint)( _normalWidth/2.0), (GLint)(-_normalHeight/2.0) + (GLint)( _normalWidth/2.0), (GLint)( _normalHeight/2.0), + (GLint)(-_normalWidth/2.0), (GLint)(-_normalHeight/2.0), + (GLint)( _normalWidth/2.0), (GLint)(-_normalHeight/2.0) }; // Set up VBOs diff --git a/desmume/src/frontend/cocoa/cocoa_rom.mm b/desmume/src/frontend/cocoa/cocoa_rom.mm index fee0e4e2b..1e02051c6 100644 --- a/desmume/src/frontend/cocoa/cocoa_rom.mm +++ b/desmume/src/frontend/cocoa/cocoa_rom.mm @@ -1,6 +1,6 @@ /* Copyright (C) 2011 Roger Manuel - Copyright (C) 2011-2017 DeSmuME team + Copyright (C) 2011-2021 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -640,17 +640,15 @@ static NSMutableDictionary *saveTypeValues = nil; @end /******************************************************************************************** - RomIconToRGBA8888() + @function RomIconToRGBA8888() - Reads the icon image data from a ROM and converts it to an RGBA8888 formatted bitmap. + @brief Reads the icon image data from a ROM and converts it to an RGBA8888 formatted bitmap. - Takes: - bitmapData - Write pointer for the icon's pixel data. + @param bitmapData Write pointer for the icon's pixel data. - Returns: - Nothing. + @return Nothing - Details: + @discussion - If bitmapData is NULL, then this function immediately returns and does nothing. - If no ROM is loaded, then bitmapData will have a black square icon. - The caller is responsible for ensuring that bitmapData points to a valid @@ -660,110 +658,68 @@ static NSMutableDictionary *saveTypeValues = nil; ********************************************************************************************/ void RomIconToRGBA8888(uint32_t *bitmapData) { - const RomBanner &ndsRomBanner = gameInfo.getRomBanner(); // Contains the memory addresses we need to get our read pointer locations. - const uint16_t *iconClutPtr; // Read pointer for the icon's CLUT. - const uint32_t *iconPixPtr; // Read pointer for the icon's pixel data. - - uint32_t clut[16]; // 4-bit indexed CLUT, storing RGBA8888 values for each color. - - uint32_t pixRowColors; // Temp location for storing an 8 pixel row of 4-bit indexed color values from the icon's pixel data. - unsigned int pixRowIndex; // Temp location for tracking which pixel row of an 8x8 square that we are reading. - unsigned int x; // Temp location for tracking which of the 8x8 pixel squares that we are reading (x-dimension). - unsigned int y; // Temp location for tracking which of the 8x8 pixel squares that we are reading (y-dimension). - - uint32_t *bitmapPixPtr; // Write pointer for the RGBA8888 bitmap pixel data, relative to the passed in *bitmapData pointer. - if (bitmapData == NULL) { return; } - // Set all of our icon read pointers. - iconClutPtr = (uint16_t *)ndsRomBanner.palette + 1; - iconPixPtr = (uint32_t *)ndsRomBanner.bitmap; + const RomBanner &ndsRomBanner = gameInfo.getRomBanner(); // Contains the memory addresses we need to get our read pointer locations. + const uint32_t *iconPixPtr = (uint32_t *)ndsRomBanner.bitmap; // Read pointer for the icon's pixel data. // Setup the 4-bit CLUT. // // The actual color values are stored with the ROM icon data in RGB555 format. // We convert these color values and store them in the CLUT as RGBA8888 values. // - // The first entry always represents the alpha, so we can just ignore it. - clut[0] = 0x00000000; - ColorspaceConvertBuffer555To8888Opaque((u16 *)iconClutPtr, &clut[1], 15); + // The first entry always represents the alpha, so just set it to 0. + const uint16_t *clut4 = (uint16_t *)ndsRomBanner.palette; + CACHE_ALIGN uint32_t clut32[16]; + ColorspaceConvertBuffer555To8888Opaque(clut4, clut32, 16); + clut32[0] = 0x00000000; // Load the image from the icon pixel data. // // ROM icons are stored in 4-bit indexed color and have dimensions of 32x32 pixels. - // Also, ROM icons are split into 16 separate 8x8 pixel squares arranged in a 4x4 + // Also, ROM icons are split into 16 separate 8x8 pixel tiles arranged in a 4x4 // array. Here, we sequentially read from the ROM data, and adjust our write // location appropriately within the bitmap memory block. - for(y = 0; y < 4; y++) + for (size_t y = 0; y < 4; y++) { - for(x = 0; x < 4; x++) + for (size_t x = 0; x < 4; x++) { - for(pixRowIndex = 0; pixRowIndex < 8; pixRowIndex++, iconPixPtr++) + for (size_t p = 0; p < 8; p++, iconPixPtr++) { - // Load the entire row of pixels as a single 32-bit chunk. - pixRowColors = *iconPixPtr; + // Load an entire row of palette color indices as a single 32-bit chunk. + const uint32_t palIdx = LE_TO_LOCAL_32(*iconPixPtr); // Set the write location. The formula below calculates the proper write // location depending on the position of the read pointer. We use a more // optimized version of this formula in practice. // - // bitmapPixPtr = bitmapData + ( ((y * 8) + pixRowIndex) * 32 ) + (x * 8); - bitmapPixPtr = bitmapData + ( ((y << 3) + pixRowIndex) << 5 ) + (x << 3); + // bitmapOutPtr = bitmapData + ( ((y * 8) + palIdx) * 32 ) + (x * 8); + uint32_t *bitmapOutPtr = bitmapData + ( ((y << 3) + p) << 5 ) + (x << 3); + *bitmapOutPtr = clut32[(palIdx & 0x0000000F) >> 0]; - // Set the RGBA8888 bitmap pixels using our CLUT from earlier. + bitmapOutPtr++; + *bitmapOutPtr = clut32[(palIdx & 0x000000F0) >> 4]; -#ifdef MSB_FIRST - *bitmapPixPtr = LOCAL_TO_LE_32(clut[(pixRowColors & 0x0F000000) >> 24]); + bitmapOutPtr++; + *bitmapOutPtr = clut32[(palIdx & 0x00000F00) >> 8]; - bitmapPixPtr++; - *bitmapPixPtr = LOCAL_TO_LE_32(clut[(pixRowColors & 0xF0000000) >> 28]); + bitmapOutPtr++; + *bitmapOutPtr = clut32[(palIdx & 0x0000F000) >> 12]; - bitmapPixPtr++; - *bitmapPixPtr = LOCAL_TO_LE_32(clut[(pixRowColors & 0x000F0000) >> 16]); + bitmapOutPtr++; + *bitmapOutPtr = clut32[(palIdx & 0x000F0000) >> 16]; - bitmapPixPtr++; - *bitmapPixPtr = LOCAL_TO_LE_32(clut[(pixRowColors & 0x00F00000) >> 20]); + bitmapOutPtr++; + *bitmapOutPtr = clut32[(palIdx & 0x00F00000) >> 20]; - bitmapPixPtr++; - *bitmapPixPtr = LOCAL_TO_LE_32(clut[(pixRowColors & 0x00000F00) >> 8]); + bitmapOutPtr++; + *bitmapOutPtr = clut32[(palIdx & 0x0F000000) >> 24]; - bitmapPixPtr++; - *bitmapPixPtr = LOCAL_TO_LE_32(clut[(pixRowColors & 0x0000F000) >> 12]); - - bitmapPixPtr++; - *bitmapPixPtr = LOCAL_TO_LE_32(clut[(pixRowColors & 0x0000000F)]); - - bitmapPixPtr++; - *bitmapPixPtr = LOCAL_TO_LE_32(clut[(pixRowColors & 0x000000F0) >> 4]); - -#else - - *bitmapPixPtr = clut[(pixRowColors & 0x0000000F)]; - - bitmapPixPtr++; - *bitmapPixPtr = clut[(pixRowColors & 0x000000F0) >> 4]; - - bitmapPixPtr++; - *bitmapPixPtr = clut[(pixRowColors & 0x00000F00) >> 8]; - - bitmapPixPtr++; - *bitmapPixPtr = clut[(pixRowColors & 0x0000F000) >> 12]; - - bitmapPixPtr++; - *bitmapPixPtr = clut[(pixRowColors & 0x000F0000) >> 16]; - - bitmapPixPtr++; - *bitmapPixPtr = clut[(pixRowColors & 0x00F00000) >> 20]; - - bitmapPixPtr++; - *bitmapPixPtr = clut[(pixRowColors & 0x0F000000) >> 24]; - - bitmapPixPtr++; - *bitmapPixPtr = clut[(pixRowColors & 0xF0000000) >> 28]; -#endif + bitmapOutPtr++; + *bitmapOutPtr = clut32[(palIdx & 0xF0000000) >> 28]; } } } diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm index e199a4a9f..95f23e681 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm @@ -2456,7 +2456,7 @@ void MacMetalFetchObject::_FetchNativeDisplayByID(const NDSDisplayID displayID, if (this->_fetchDisplayInfo[bufferIndex].pixelBytes == 2) { - ColorspaceConvertBuffer555To8888Opaque((const uint16_t *)this->_fetchDisplayInfo[bufferIndex].nativeBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555To8888Opaque((const uint16_t *)this->_fetchDisplayInfo[bufferIndex].nativeBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); } else { diff --git a/desmume/src/frontend/posix/gtk/main.cpp b/desmume/src/frontend/posix/gtk/main.cpp index 30f1b96e7..82bb1cf78 100644 --- a/desmume/src/frontend/posix/gtk/main.cpp +++ b/desmume/src/frontend/posix/gtk/main.cpp @@ -1301,7 +1301,7 @@ static int ConfigureDrawingArea(GtkWidget *widget, GdkEventConfigure *event, gpo static inline void gpu_screen_to_rgb(u32* dst) { - ColorspaceConvertBuffer555To8888Opaque((const uint16_t *)GPU->GetDisplayInfo().masterNativeBuffer, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); + ColorspaceConvertBuffer555To8888Opaque((const uint16_t *)GPU->GetDisplayInfo().masterNativeBuffer, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); } static inline void drawScreen(cairo_t* cr, u32* buf, gint w, gint h) { @@ -1427,7 +1427,7 @@ static gboolean ExposeDrawingArea (GtkWidget *widget, GdkEventExpose *event, gpo } static void RedrawScreen() { - ColorspaceConvertBuffer555To8888Opaque((const uint16_t *)GPU->GetDisplayInfo().masterNativeBuffer, (uint32_t *)video->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); + ColorspaceConvertBuffer555To8888Opaque((const uint16_t *)GPU->GetDisplayInfo().masterNativeBuffer, (uint32_t *)video->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); #ifdef HAVE_LIBAGG aggDraw.hud->attach((u8*)video->GetSrcBufferPtr(), 256, 384, 1024); osd->update(); diff --git a/desmume/src/frontend/windows/display.cpp b/desmume/src/frontend/windows/display.cpp index b956ba4be..143136083 100644 --- a/desmume/src/frontend/windows/display.cpp +++ b/desmume/src/frontend/windows/display.cpp @@ -683,7 +683,7 @@ void DoDisplay() //we have to do a copy here because we're about to draw the OSD onto it. bummer. if (gpu_bpp == 15) - ColorspaceConvertBuffer555To8888Opaque((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / 2); + ColorspaceConvertBuffer555To8888Opaque((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / 2); else ColorspaceConvertBuffer888XTo8888Opaque((u32*)video.srcBuffer, video.buffer, video.srcBufferSize / 4); diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index 54af31961..a9b939882 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -2034,7 +2034,7 @@ Render3DError SoftRasterizerRenderer::BeginRender(const GFX3D &engine) } // Convert the toon table colors - ColorspaceConvertBuffer555To6665Opaque(engine.renderState.u16ToonTable, (u32 *)this->toonColor32LUT, 32); + ColorspaceConvertBuffer555To6665Opaque(engine.renderState.u16ToonTable, (u32 *)this->toonColor32LUT, 32); if (this->_enableEdgeMark) { @@ -2103,7 +2103,7 @@ void SoftRasterizerRenderer::_UpdateEdgeMarkColorTable(const u16 *edgeMarkColorT //we can do this by rendering a 3d frame and then freezing the system, but only changing the edge mark colors for (size_t i = 0; i < 8; i++) { - this->_edgeMarkTable[i].color = COLOR555TO6665(edgeMarkColorTable[i] & 0x7FFF, (this->currentRenderState->enableAntialiasing) ? 0x10 : 0x1F); + this->_edgeMarkTable[i].color = LE_TO_LOCAL_32( COLOR555TO6665(edgeMarkColorTable[i] & 0x7FFF, (this->currentRenderState->enableAntialiasing) ? 0x10 : 0x1F) ); //zero 20-jun-2013 - this doesnt make any sense. at least, it should be related to the 0x8000 bit. if this is undocumented behaviour, lets write about which scenario proves it here, or which scenario is requiring this code. //// this seems to be the only thing that selectively disables edge marking @@ -2251,7 +2251,7 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz if (param.enableFog) { FragmentColor fogColor; - fogColor.color = COLOR555TO6665( param.fogColor & 0x7FFF, (param.fogColor>>16) & 0x1F ); + fogColor.color = LE_TO_LOCAL_32( COLOR555TO6665(param.fogColor & 0x7FFF, (param.fogColor>>16) & 0x1F) ); const size_t fogIndex = depth >> 9; assert(fogIndex < 32768); @@ -2309,7 +2309,7 @@ Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colo { const size_t ir = readLine + ((x * xRatio) >> 16); - this->_framebufferColor[iw].color = COLOR555TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F); + this->_framebufferColor[iw].color = LE_TO_LOCAL_32( COLOR555TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F) ); this->_framebufferAttributes->depth[iw] = depthBuffer[ir]; this->_framebufferAttributes->isFogged[iw] = fogBuffer[ir]; this->_framebufferAttributes->opaquePolyID[iw] = opaquePolyID; diff --git a/desmume/src/render3D.cpp b/desmume/src/render3D.cpp index 447d2af33..ee814b874 100644 --- a/desmume/src/render3D.cpp +++ b/desmume/src/render3D.cpp @@ -698,7 +698,7 @@ Render3DError Render3D::Render(const GFX3D &engine) this->_isPoweredOn = true; const u32 clearColorSwapped = LE_TO_LOCAL_32(engine.renderState.clearColor); - this->_clearColor6665.color = COLOR555TO6665(clearColorSwapped & 0x7FFF, (clearColorSwapped >> 16) & 0x1F); + this->_clearColor6665.color = LE_TO_LOCAL_32( COLOR555TO6665(clearColorSwapped & 0x7FFF, (clearColorSwapped >> 16) & 0x1F) ); this->_clearAttributes.opaquePolyID = (clearColorSwapped >> 24) & 0x3F; //special value for uninitialized translucent polyid. without this, fires in spiderman2 dont display diff --git a/desmume/src/texcache.cpp b/desmume/src/texcache.cpp index 137fdb289..c028290a7 100644 --- a/desmume/src/texcache.cpp +++ b/desmume/src/texcache.cpp @@ -44,7 +44,7 @@ using std::max; #define DO_DEBUG_DUMP_TEXTURE #endif -#define CONVERT(color) ((TEXCACHEFORMAT == TexFormat_32bpp)?(COLOR555TO8888_OPAQUE(color)):COLOR555TO6665_OPAQUE(color)) +#define CONVERT(color) ( (TEXCACHEFORMAT == TexFormat_32bpp) ? ColorspaceConvert555To8888Opaque(color) : ColorspaceConvert555To6665Opaque(color) ) //This class represents a number of regions of memory which should be viewed as contiguous class MemSpan @@ -888,16 +888,16 @@ void NDSTextureUnpackI2(const size_t srcSize, const u8 *__restrict srcData, cons u8 idx; idx = *srcData & 0x03; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + *dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) ); idx = (*srcData >> 2) & 0x03; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + *dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) ); idx = (*srcData >> 4) & 0x03; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + *dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) ); idx = (*srcData >> 6) & 0x03; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + *dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) ); } #endif } @@ -939,10 +939,10 @@ void NDSTextureUnpackI2(const size_t srcSize, const u8 *__restrict srcData, cons #else for (size_t i = 0; i < srcSize; i++, srcData++) { - *dstBuffer++ = CONVERT(srcPal[ *srcData & 0x03] & 0x7FFF); - *dstBuffer++ = CONVERT(srcPal[(*srcData >> 2) & 0x03] & 0x7FFF); - *dstBuffer++ = CONVERT(srcPal[(*srcData >> 4) & 0x03] & 0x7FFF); - *dstBuffer++ = CONVERT(srcPal[(*srcData >> 6) & 0x03] & 0x7FFF); + *dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[ *srcData & 0x03] & 0x7FFF) ); + *dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[(*srcData >> 2) & 0x03] & 0x7FFF) ); + *dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[(*srcData >> 4) & 0x03] & 0x7FFF) ); + *dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[(*srcData >> 6) & 0x03] & 0x7FFF) ); } #endif } @@ -1009,10 +1009,10 @@ void NDSTextureUnpackI4(const size_t srcSize, const u8 *__restrict srcData, cons u8 idx; idx = *srcData & 0x0F; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + *dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) ); idx = *srcData >> 4; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + *dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) ); } #endif } @@ -1059,8 +1059,8 @@ void NDSTextureUnpackI4(const size_t srcSize, const u8 *__restrict srcData, cons #else for (size_t i = 0; i < srcSize; i++, srcData++) { - *dstBuffer++ = CONVERT(srcPal[*srcData & 0x0F] & 0x7FFF); - *dstBuffer++ = CONVERT(srcPal[*srcData >> 4] & 0x7FFF); + *dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[*srcData & 0x0F] & 0x7FFF) ); + *dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[*srcData >> 4] & 0x7FFF) ); } #endif } @@ -1074,14 +1074,14 @@ void NDSTextureUnpackI8(const size_t srcSize, const u8 *__restrict srcData, cons for (size_t i = 0; i < srcSize; i++, srcData++) { const u8 idx = *srcData; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + *dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) ); } } else { for (size_t i = 0; i < srcSize; i++, srcData++) { - *dstBuffer++ = CONVERT(srcPal[*srcData] & 0x7FFF); + *dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[*srcData] & 0x7FFF) ); } } } @@ -1093,7 +1093,7 @@ void NDSTextureUnpackA3I5(const size_t srcSize, const u8 *__restrict srcData, co { const u16 c = srcPal[*srcData & 0x1F] & 0x7FFF; const u8 alpha = *srcData >> 5; - *dstBuffer++ = (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, material_3bit_to_5bit[alpha]) : COLOR555TO8888(c, material_3bit_to_8bit[alpha]); + *dstBuffer++ = LE_TO_LOCAL_32( (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, material_3bit_to_5bit[alpha]) : COLOR555TO8888(c, material_3bit_to_8bit[alpha]) ); } } @@ -1145,7 +1145,7 @@ void NDSTextureUnpackA5I3(const size_t srcSize, const u8 *__restrict srcData, co { const u16 c = srcPal[*srcData & 0x07] & 0x7FFF; const u8 alpha = (*srcData >> 3); - *dstBuffer++ = (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, alpha) : COLOR555TO8888(c, material_5bit_to_8bit[alpha]); + *dstBuffer++ = LE_TO_LOCAL_32( (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, alpha) : COLOR555TO8888(c, material_5bit_to_8bit[alpha]) ); } #endif } @@ -1193,13 +1193,13 @@ void NDSTextureUnpack4x4(const size_t srcSize, const u32 *__restrict srcData, co const u8 mode = pal1>>14; CACHE_ALIGN u32 tmp_col[4]; - tmp_col[0] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset) ); - tmp_col[1] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+1) ); + tmp_col[0] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(PAL4X4(pal1offset+0)) ); + tmp_col[1] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(PAL4X4(pal1offset+1)) ); switch (mode) { case 0: - tmp_col[2] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+2) ); + tmp_col[2] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(PAL4X4(pal1offset+2)) ); tmp_col[3] = 0x00000000; break; @@ -1219,8 +1219,8 @@ void NDSTextureUnpack4x4(const size_t srcSize, const u32 *__restrict srcData, co break; case 2: - tmp_col[2] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+2) ); - tmp_col[3] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+3) ); + tmp_col[2] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(PAL4X4(pal1offset+2)) ); + tmp_col[3] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(PAL4X4(pal1offset+3)) ); break; case 3: @@ -1248,8 +1248,8 @@ void NDSTextureUnpack4x4(const size_t srcSize, const u32 *__restrict srcData, co ( ((g0*3 + g1*5)>>6) << 5 ) | ( ((b0*3 + b1*5)>>6) << 10 ); - tmp_col[2] = COLOR555TO8888_OPAQUE(tmp1); - tmp_col[3] = COLOR555TO8888_OPAQUE(tmp2); + tmp_col[2] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(tmp1) ); + tmp_col[3] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(tmp2) ); break; } @@ -1319,7 +1319,7 @@ void NDSTextureUnpackDirect16Bit(const size_t srcSize, const u16 *__restrict src for (; i < pixCount; i++, srcData++) { const u16 c = LOCAL_TO_LE_16(*srcData); - *dstBuffer++ = (c & 0x8000) ? CONVERT(c & 0x7FFF) : 0; + *dstBuffer++ = (c & 0x8000) ? LE_TO_LOCAL_32( CONVERT(c & 0x7FFF) ) : 0; } } diff --git a/desmume/src/types.h b/desmume/src/types.h index b7bdfcc16..c756a1bfb 100644 --- a/desmume/src/types.h +++ b/desmume/src/types.h @@ -414,6 +414,25 @@ inline bool atomic_test_and_clear_barrier32(volatile s32 *V, s32 M) { return (s #endif +// Flags used to determine how a conversion function swaps bytes for big-endian systems. +// These flags should be ignored on little-endian systems. +enum BESwapFlags +{ + BESwapNone = 0x00, // No byte swapping for both incoming and outgoing data. All data is used as-is. + + BESwapIn = 0x01, // All incoming data is byte swapped; outgoing data is used as-is. + BESwapPre = 0x01, // An alternate name for "BESwapIn" + BESwapSrc = 0x01, // An alternate name for "BESwapIn" + + BESwapOut = 0x02, // All incoming data is used as-is; outgoing data is byte swapped. + BESwapPost = 0x02, // An alternate name for "BESwapOut" + BESwapDst = 0x02, // An alternate name for "BESwapOut" + + BESwapInOut = 0x03, // Both incoming data and outgoing data are byte swapped. + BESwapPrePost = 0x03, // An alternate name for "BESwapInOut" + BESwapSrcDst = 0x03 // An alternate name for "BESwapInOut" +}; + /* little endian (ds' endianess) to local endianess convert macros */ #ifdef MSB_FIRST /* local arch is big endian */ # define LE_TO_LOCAL_16(x) ((((x)&0xff)<<8)|(((x)>>8)&0xff)) diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler.cpp index 17755af12..f725f376c 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2019 DeSmuME team + Copyright (C) 2016-2021 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -143,13 +143,13 @@ void ColorspaceHandlerInit() for (size_t i = 0; i < 32768; i++) { - color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) ); - color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 ); - color_555_to_6665_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000 ); + color_555_to_666[i] = RGB15TO18_BITLOGIC(i); + color_555_to_6665_opaque[i] = RGB15TO18_BITLOGIC(i) | 0x1F000000; + color_555_to_6665_opaque_swap_rb[i] = RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000; - color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) ); - color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 ); - color_555_to_8888_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000 ); + color_555_to_888[i] = RGB15TO24_BITLOGIC(i); + color_555_to_8888_opaque[i] = RGB15TO24_BITLOGIC(i) | 0xFF000000; + color_555_to_8888_opaque_swap_rb[i] = RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000; } #define RGB16_SWAP_RB_BITLOGIC(col) ( (((col)&0x001F)<<10) | ((col)&0x03E0) | (((col)&0x7C00)>>10) | ((col)&0x8000) ) @@ -161,7 +161,7 @@ void ColorspaceHandlerInit() } } -template +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) { size_t i = 0; @@ -173,36 +173,51 @@ void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__re { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCountVector); + i = csh.ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCountVector); } } else { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555To8888Opaque_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555To8888Opaque_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555To8888Opaque(src, dst, pixCountVector); + i = csh.ConvertBuffer555To8888Opaque(src, dst, pixCountVector); } } #pragma LOOPVECTORIZE_DISABLE - #endif // USEMANUALVECTORIZATION - for (; i < pixCount; i++) { - dst[i] = ColorspaceConvert555To8888Opaque(src[i]); + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert555To8888Opaque(src[i]); + break; + + case BESwapIn: + dst[i] = ColorspaceConvert555To8888Opaque(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapOut: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(src[i]) ); + break; + + case BESwapInOut: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(LE_TO_LOCAL_16(src[i])) ); + break; + } } } -template +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) { size_t i = 0; @@ -214,32 +229,47 @@ void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__re { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCountVector); + i = csh.ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCountVector); } } else { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555To6665Opaque_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555To6665Opaque_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555To6665Opaque(src, dst, pixCountVector); + i = csh.ConvertBuffer555To6665Opaque(src, dst, pixCountVector); } } #pragma LOOPVECTORIZE_DISABLE - #endif // USEMANUALVECTORIZATION - for (; i < pixCount; i++) { - dst[i] = ColorspaceConvert555To6665Opaque(src[i]); + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert555To6665Opaque(src[i]); + break; + + case BESwapIn: + dst[i] = ColorspaceConvert555To6665Opaque(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapOut: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(src[i]) ); + break; + + case BESwapInOut: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(LE_TO_LOCAL_16(src[i])) ); + break; + } } } @@ -275,9 +305,7 @@ void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount } #pragma LOOPVECTORIZE_DISABLE - #endif // USEMANUALVECTORIZATION - for (; i < pixCount; i++) { dst[i] = ColorspaceConvert8888To6665(src[i]); @@ -316,9 +344,7 @@ void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount } #pragma LOOPVECTORIZE_DISABLE - #endif // USEMANUALVECTORIZATION - for (; i < pixCount; i++) { dst[i] = ColorspaceConvert6665To8888(src[i]); @@ -357,9 +383,7 @@ void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restric } #pragma LOOPVECTORIZE_DISABLE - #endif // USEMANUALVECTORIZATION - for (; i < pixCount; i++) { dst[i] = ColorspaceConvert8888To5551(src[i]); @@ -398,9 +422,7 @@ void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restric } #pragma LOOPVECTORIZE_DISABLE - #endif // USEMANUALVECTORIZATION - for (; i < pixCount; i++) { dst[i] = ColorspaceConvert6665To5551(src[i]); @@ -439,9 +461,7 @@ void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pi } #pragma LOOPVECTORIZE_DISABLE - #endif // USEMANUALVECTORIZATION - for (; i < pixCount; i++) { dst[i] = ColorspaceConvert888XTo8888Opaque(src[i]); @@ -480,9 +500,7 @@ void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict } #pragma LOOPVECTORIZE_DISABLE - #endif // USEMANUALVECTORIZATION - for (; i < pixCount; i++) { ColorspaceConvert555XTo888(src[i], &dst[i*3]); @@ -521,9 +539,7 @@ void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict } #pragma LOOPVECTORIZE_DISABLE - #endif // USEMANUALVECTORIZATION - for (; i < pixCount; i++) { ColorspaceConvert888XTo888(src[i], &dst[i*3]); @@ -554,9 +570,7 @@ void ColorspaceCopyBuffer16(const u16 *src, u16 *dst, size_t pixCount) } #pragma LOOPVECTORIZE_DISABLE - #endif // USEMANUALVECTORIZATION - for (; i < pixCount; i++) { dst[i] = ColorspaceCopy16(src[i]); @@ -587,9 +601,7 @@ void ColorspaceCopyBuffer32(const u32 *src, u32 *dst, size_t pixCount) } #pragma LOOPVECTORIZE_DISABLE - #endif // USEMANUALVECTORIZATION - for (; i < pixCount; i++) { dst[i] = ColorspaceCopy32(src[i]); @@ -773,72 +785,148 @@ void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensi } } +template size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { size_t i = 0; for (; i < pixCount; i++) { - dst[i] = ColorspaceConvert555To8888Opaque(src[i]); + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert555To8888Opaque(src[i]); + break; + + case BESwapSrc: + dst[i] = ColorspaceConvert555To8888Opaque(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(src[i]) ); + break; + + case BESwapSrcDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(LE_TO_LOCAL_16(src[i])) ); + break; + } } return i; } +template size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { size_t i = 0; for (; i < pixCount; i++) { - dst[i] = ColorspaceConvert555To8888Opaque(src[i]); + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert555To8888Opaque(src[i]); + break; + + case BESwapSrc: + dst[i] = ColorspaceConvert555To8888Opaque(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(src[i]) ); + break; + + case BESwapSrcDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(LE_TO_LOCAL_16(src[i])) ); + break; + } } return i; } +template size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return this->ColorspaceHandler::ConvertBuffer555To8888Opaque(src, dst, pixCount); + return this->ColorspaceHandler::ConvertBuffer555To8888Opaque(src, dst, pixCount); } +template size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return this->ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCount); + return this->ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCount); } +template size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { size_t i = 0; for (; i < pixCount; i++) { - dst[i] = ColorspaceConvert555To6665Opaque(src[i]); + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert555To6665Opaque(src[i]); + break; + + case BESwapSrc: + dst[i] = ColorspaceConvert555To6665Opaque(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(src[i]) ); + break; + + case BESwapSrcDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(LE_TO_LOCAL_16(src[i])) ); + break; + } } return i; } +template size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { size_t i = 0; for (; i < pixCount; i++) { - dst[i] = ColorspaceConvert555To6665Opaque(src[i]); + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert555To6665Opaque(src[i]); + break; + + case BESwapSrc: + dst[i] = ColorspaceConvert555To6665Opaque(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(src[i]) ); + break; + + case BESwapSrcDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(LE_TO_LOCAL_16(src[i])) ); + break; + } } return i; } +template size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return this->ColorspaceHandler::ConvertBuffer555To6665Opaque(src, dst, pixCount); + return this->ColorspaceHandler::ConvertBuffer555To6665Opaque(src, dst, pixCount); } +template size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return this->ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCount); + return this->ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCount); } size_t ColorspaceHandler::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const @@ -1283,15 +1371,39 @@ size_t ColorspaceHandler::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, return this->ApplyIntensityToBuffer32_SwapRB(dst, pixCount, intensity); } -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler.h b/desmume/src/utils/colorspacehandler/colorspacehandler.h index cb520fa81..bf4ee0efa 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2017 DeSmuME team + Copyright (C) 2016-2021 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -108,22 +108,12 @@ extern CACHE_ALIGN u32 color_555_to_888[32768]; #define COLOR555TO6665_OPAQUE(col) (color_555_to_6665_opaque[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color #define COLOR555TO6665_OPAQUE_SWAP_RB(col) (color_555_to_6665_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color with R and B components swapped #define COLOR555TO666(col) (color_555_to_666[(col)]) // Convert a 15-bit color to a fully transparent sparsely packed 32-bit color containing an RGBA6665 color - -#ifdef MSB_FIRST - #define COLOR555TO6665(col,alpha5) ((alpha5) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, big-endian -#else - #define COLOR555TO6665(col,alpha5) (((alpha5)<<24) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, little-endian -#endif +#define COLOR555TO6665(col,alpha5) (((alpha5)<<24) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha #define COLOR555TO8888_OPAQUE(col) (color_555_to_8888_opaque[(col)]) // Convert a 15-bit color to an opaque 32-bit color #define COLOR555TO8888_OPAQUE_SWAP_RB(col) (color_555_to_8888_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque 32-bit color with R and B components swapped #define COLOR555TO888(col) (color_555_to_888[(col)]) // Convert a 15-bit color to an opaque 24-bit color or a fully transparent 32-bit color - -#ifdef MSB_FIRST - #define COLOR555TO8888(col,alpha8) ((alpha8) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, big-endian -#else - #define COLOR555TO8888(col,alpha8) (((alpha8)<<24) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, little-endian -#endif +#define COLOR555TO8888(col,alpha8) (((alpha8)<<24) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha //produce a 15bpp color from individual 5bit components #define R5G5B5TORGB15(r,g,b) ( (r) | ((g)<<5) | ((b)<<10) ) @@ -350,8 +340,8 @@ FORCEINLINE u32 ColorspaceApplyIntensity32(u32 srcColor, float intensity) return ColorspaceApplyIntensity32(srcColorComponent); } -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); @@ -372,15 +362,15 @@ class ColorspaceHandler public: ColorspaceHandler() {}; - size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.cpp index 50f99b363..e35747f34 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2019 DeSmuME team + Copyright (C) 2016-2021 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -904,41 +904,49 @@ size_t ColorspaceApplyIntensityToBuffer32_AVX2(u32 *dst, size_t pixCountVec256, return i; } +template size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_AVX2(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_AVX2(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_AVX2(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_AVX2(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_AVX2(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_AVX2(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_AVX2(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_AVX2(src, dst, pixCount); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.h b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.h index d32989e5d..af8f832d7 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2017 DeSmuME team + Copyright (C) 2016-2021 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,15 +47,15 @@ class ColorspaceHandler_AVX2 : public ColorspaceHandler public: ColorspaceHandler_AVX2() {}; - size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.cpp index ea31fcfc2..713b145ba 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2019 DeSmuME team + Copyright (C) 2016-2021 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -857,41 +857,49 @@ size_t ColorspaceApplyIntensityToBuffer32_AVX512(u32 *dst, size_t pixCountVec512 return i; } +template size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_AVX512(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_AVX512(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_AVX512(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_AVX512(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_AVX512(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_AVX512(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_AVX512(src, dst, pixCount); } +template size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_AVX512(src, dst, pixCount); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.h b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.h index b46b6d684..b04077a66 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2019 DeSmuME team + Copyright (C) 2016-2021 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,15 +47,15 @@ class ColorspaceHandler_AVX512 : public ColorspaceHandler public: ColorspaceHandler_AVX512() {}; - size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp index 362539030..9092db579 100755 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp @@ -23,62 +23,114 @@ #include -template +template FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) - dstLo = vec_unpackl((vector pixel)srcColor); - dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){0,3,3,3, 0,3,3,3, 0,3,3,3, 0,3,3,3})), vec_sr((v128u8)dstLo, ((v128u8){0,2,2,2, 0,2,2,2, 0,2,2,2, 0,2,2,2})) ); - dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x11, 0x05,0x06,0x07,0x13, 0x09,0x0A,0x0B,0x15, 0x0D,0x0E,0x0F,0x17}) : ((v128u8){0x03,0x02,0x01,0x11, 0x07,0x06,0x05,0x13, 0x0B,0x0A,0x09,0x15, 0x0F,0x0E,0x0D,0x17})); + v128u16 srcSwapped; + if ( (BE_BYTESWAP == BESwapSrc) || (BE_BYTESWAP == BESwapSrcDst) ) + { + srcSwapped = vec_perm(srcColor, srcColor, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); + } + else + { + srcSwapped = srcColor; + } - dstHi = vec_unpackh((vector pixel)srcColor); + dstLo = vec_unpackl((vector pixel)srcSwapped); + dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){0,3,3,3, 0,3,3,3, 0,3,3,3, 0,3,3,3})), vec_sr((v128u8)dstLo, ((v128u8){0,2,2,2, 0,2,2,2, 0,2,2,2, 0,2,2,2})) ); + + if ( (BE_BYTESWAP == BESwapDst) || (BE_BYTESWAP == BESwapSrcDst) ) + { + dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x11, 0x05,0x06,0x07,0x13, 0x09,0x0A,0x0B,0x15, 0x0D,0x0E,0x0F,0x17}) : ((v128u8){0x03,0x02,0x01,0x11, 0x07,0x06,0x05,0x13, 0x0B,0x0A,0x09,0x15, 0x0F,0x0E,0x0D,0x17})); + } + else + { + dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x11,0x03,0x02,0x01, 0x13,0x07,0x06,0x05, 0x15,0x0B,0x0A,0x09, 0x17,0x0F,0x0E,0x0D}) : ((v128u8){0x11,0x01,0x02,0x03, 0x13,0x05,0x06,0x07, 0x15,0x09,0x0A,0x0B, 0x17,0x0D,0x0E,0x0F})); + } + + dstHi = vec_unpackh((vector pixel)srcSwapped); dstHi = vec_or( vec_sl((v128u8)dstHi, ((v128u8){0,3,3,3, 0,3,3,3, 0,3,3,3, 0,3,3,3})), vec_sr((v128u8)dstHi, ((v128u8){0,2,2,2, 0,2,2,2, 0,2,2,2, 0,2,2,2})) ); - dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x19, 0x05,0x06,0x07,0x1B, 0x09,0x0A,0x0B,0x1D, 0x0D,0x0E,0x0F,0x1F}) : ((v128u8){0x03,0x02,0x01,0x19, 0x07,0x06,0x05,0x1B, 0x0B,0x0A,0x09,0x1D, 0x0F,0x0E,0x0D,0x1F})); + + if ( (BE_BYTESWAP == BESwapDst) || (BE_BYTESWAP == BESwapSrcDst) ) + { + dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x19, 0x05,0x06,0x07,0x1B, 0x09,0x0A,0x0B,0x1D, 0x0D,0x0E,0x0F,0x1F}) : ((v128u8){0x03,0x02,0x01,0x19, 0x07,0x06,0x05,0x1B, 0x0B,0x0A,0x09,0x1D, 0x0F,0x0E,0x0D,0x1F})); + } + else + { + dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x19,0x03,0x02,0x01, 0x1B,0x07,0x06,0x05, 0x1D,0x0B,0x0A,0x09, 0x1F,0x0F,0x0E,0x0D}) : ((v128u8){0x19,0x01,0x02,0x03, 0x1B,0x05,0x06,0x07, 0x1D,0x09,0x0A,0x0B, 0x1F,0x0D,0x0E,0x0F})); + } } -template +template FORCEINLINE void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0}; - ColorspaceConvert555To8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555To8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); } -template +template FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) - dstLo = vec_unpackl((vector pixel)srcColor); - dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){0,1,1,1, 0,1,1,1, 0,1,1,1, 0,1,1,1})), vec_sr((v128u8)dstLo, ((v128u8){0,4,4,4, 0,4,4,4, 0,4,4,4, 0,4,4,4})) ); - dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x11, 0x05,0x06,0x07,0x13, 0x09,0x0A,0x0B,0x15, 0x0D,0x0E,0x0F,0x17}) : ((v128u8){0x03,0x02,0x01,0x11, 0x07,0x06,0x05,0x13, 0x0B,0x0A,0x09,0x15, 0x0F,0x0E,0x0D,0x17})); + v128u16 srcSwapped; + if ( (BE_BYTESWAP == BESwapSrc) || (BE_BYTESWAP == BESwapSrcDst) ) + { + srcSwapped = vec_perm(srcColor, srcColor, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); + } + else + { + srcSwapped = srcColor; + } - dstHi = vec_unpackh((vector pixel)srcColor); + dstLo = vec_unpackl((vector pixel)srcSwapped); + dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){0,1,1,1, 0,1,1,1, 0,1,1,1, 0,1,1,1})), vec_sr((v128u8)dstLo, ((v128u8){0,4,4,4, 0,4,4,4, 0,4,4,4, 0,4,4,4})) ); + + if ( (BE_BYTESWAP == BESwapDst) || (BE_BYTESWAP == BESwapSrcDst) ) + { + dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x11, 0x05,0x06,0x07,0x13, 0x09,0x0A,0x0B,0x15, 0x0D,0x0E,0x0F,0x17}) : ((v128u8){0x03,0x02,0x01,0x11, 0x07,0x06,0x05,0x13, 0x0B,0x0A,0x09,0x15, 0x0F,0x0E,0x0D,0x17})); + } + else + { + dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x11,0x03,0x02,0x01, 0x13,0x07,0x06,0x05, 0x15,0x0B,0x0A,0x09, 0x17,0x0F,0x0E,0x0D}) : ((v128u8){0x11,0x01,0x02,0x03, 0x13,0x05,0x06,0x07, 0x15,0x09,0x0A,0x0B, 0x17,0x0D,0x0E,0x0F})); + } + + dstHi = vec_unpackh((vector pixel)srcSwapped); dstHi = vec_or( vec_sl((v128u8)dstHi, ((v128u8){0,1,1,1, 0,1,1,1, 0,1,1,1, 0,1,1,1})), vec_sr((v128u8)dstHi, ((v128u8){0,4,4,4, 0,4,4,4, 0,4,4,4, 0,4,4,4})) ); - dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x19, 0x05,0x06,0x07,0x1B, 0x09,0x0A,0x0B,0x1D, 0x0D,0x0E,0x0F,0x1F}) : ((v128u8){0x03,0x02,0x01,0x19, 0x07,0x06,0x05,0x1B, 0x0B,0x0A,0x09,0x1D, 0x0F,0x0E,0x0D,0x1F})); + + if ( (BE_BYTESWAP == BESwapDst) || (BE_BYTESWAP == BESwapSrcDst) ) + { + dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x19, 0x05,0x06,0x07,0x1B, 0x09,0x0A,0x0B,0x1D, 0x0D,0x0E,0x0F,0x1F}) : ((v128u8){0x03,0x02,0x01,0x19, 0x07,0x06,0x05,0x1B, 0x0B,0x0A,0x09,0x1D, 0x0F,0x0E,0x0D,0x1F})); + } + else + { + dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x19,0x03,0x02,0x01, 0x1B,0x07,0x06,0x05, 0x1D,0x0B,0x0A,0x09, 0x1F,0x0F,0x0E,0x0D}) : ((v128u8){0x19,0x01,0x02,0x03, 0x1B,0x05,0x06,0x07, 0x1D,0x09,0x0A,0x0B, 0x1F,0x0D,0x0E,0x0F})); + } } -template +template FORCEINLINE void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0}; - ColorspaceConvert555To6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555To6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); } -template +template FORCEINLINE void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { - const v128u16 srcAlphaBits16 = {0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00}; - ColorspaceConvert555To8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); + const v128u16 srcAlphaBits16 = {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}; + ColorspaceConvert555To8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); } -template +template FORCEINLINE void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { - const v128u16 srcAlphaBits16 = {0x1F00, 0x1F00, 0x1F00, 0x1F00, 0x1F00, 0x1F00, 0x1F00, 0x1F00}; - ColorspaceConvert555To6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); + const v128u16 srcAlphaBits16 = {0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F}; + ColorspaceConvert555To6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); } template @@ -210,7 +262,7 @@ FORCEINLINE v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src) return src; } -template +template static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) { size_t i = 0; @@ -219,7 +271,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restri { v128u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To8888Opaque_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); + ColorspaceConvert555To8888Opaque_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); vec_st(dstConvertedHi, 0, dst+i); vec_st(dstConvertedLo, 16, dst+i); } @@ -227,7 +279,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restri return i; } -template +template size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) { size_t i = 0; @@ -236,7 +288,7 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, { v128u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To6665Opaque_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); + ColorspaceConvert555To6665Opaque_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); vec_st(dstConvertedHi, 0, dst+i); vec_st(dstConvertedLo, 16, dst+i); } @@ -424,24 +476,28 @@ size_t ColorspaceCopyBuffer32_AltiVec(const u32 *src, u32 *dst, size_t pixCountV return i; } +template size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer555To8888Opaque_AltiVec(src, dst, pixCount); } +template size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer555To8888Opaque_AltiVec(src, dst, pixCount); } +template size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer555To6665Opaque_AltiVec(src, dst, pixCount); } +template size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer555To6665Opaque_AltiVec(src, dst, pixCount); } size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const @@ -524,23 +580,59 @@ size_t ColorspaceHandler_AltiVec::CopyBuffer32_SwapRB(const u32 *src, u32 *dst, return ColorspaceCopyBuffer32_AltiVec(src, dst, pixCount); } -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.h b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.h index 8fffc72e2..3078a13da 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2017 DeSmuME team + Copyright (C) 2016-2021 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,12 +24,12 @@ #warning This header requires PowerPC AltiVec support. #else -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src); template v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src); template v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi); @@ -46,11 +46,11 @@ class ColorspaceHandler_AltiVec : public ColorspaceHandler public: ColorspaceHandler_AltiVec() {}; - size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.cpp index 5e66311ed..d933ed4f6 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2019 DeSmuME team + Copyright (C) 2016-2021 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -936,41 +936,49 @@ size_t ColorspaceApplyIntensityToBuffer32_SSE2(u32 *dst, size_t pixCountVec128, return i; } +template size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_SSE2(src, dst, pixCount); } +template size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_SSE2(src, dst, pixCount); } +template size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_SSE2(src, dst, pixCount); } +template size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_SSE2(src, dst, pixCount); } +template size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_SSE2(src, dst, pixCount); } +template size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_SSE2(src, dst, pixCount); } +template size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_SSE2(src, dst, pixCount); } +template size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_SSE2(src, dst, pixCount); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.h b/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.h index 007b9f800..094dc5178 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2017 DeSmuME team + Copyright (C) 2016-2021 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,15 +47,15 @@ class ColorspaceHandler_SSE2 : public ColorspaceHandler public: ColorspaceHandler_SSE2() {}; - size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;