diff --git a/desmume/src/FIFO.cpp b/desmume/src/FIFO.cpp index b94062edc..cde1350ee 100755 --- a/desmume/src/FIFO.cpp +++ b/desmume/src/FIFO.cpp @@ -1,7 +1,7 @@ /* Copyright 2006 yopyop Copyright 2007 shash - Copyright 2007-2021 DeSmuME team + Copyright 2007-2022 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -383,7 +383,7 @@ void DISP_FIFOrecv_Line16(u16 *__restrict dst) for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16); i+=sizeof(v128u16)) { v128u16 fifoColor = vec_ld(i, disp_fifo.buf + disp_fifo.head); - fifoColor = vec_perm( fifoColor, fifoColor, ((v128u8){2,3, 0,1, 6,7, 4,5, 10,11, 8,9, 14,15, 12,13}) ); + fifoColor = vec_perm( (v128u8)fifoColor, (v128u8)fifoColor, ((v128u8){2,3, 0,1, 6,7, 4,5, 10,11, 8,9, 14,15, 12,13}) ); vec_st(fifoColor, i, dst); } #else @@ -418,7 +418,7 @@ void _DISP_FIFOrecv_LineOpaque16_vec(u32 *__restrict dst) for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16); i+=sizeof(v128u16)) { v128u16 fifoColor = vec_ld(i, disp_fifo.buf + disp_fifo.head); - fifoColor = vec_perm( fifoColor, fifoColor, ((v128u8){2,3, 0,1, 6,7, 4,5, 10,11, 8,9, 14,15, 12,13}) ); + fifoColor = vec_perm( (v128u8)fifoColor, (v128u8)fifoColor, ((v128u8){2,3, 0,1, 6,7, 4,5, 10,11, 8,9, 14,15, 12,13}) ); fifoColor = vec_or(fifoColor, ((v128u16){0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000})); vec_st(fifoColor, i, dst); } @@ -445,7 +445,7 @@ void _DISP_FIFOrecv_LineOpaque32_vec(u32 *__restrict dst) v128u32 dstLo = ((v128u32){0,0,0,0}); v128u32 dstHi = ((v128u32){0,0,0,0}); - fifoColor = vec_perm( fifoColor, fifoColor, ((v128u8){10,11, 8,9, 14,15, 12,13, 2,3, 0,1, 6,7, 4,5}) ); + fifoColor = vec_perm( (v128u8)fifoColor, (v128u8)fifoColor, ((v128u8){10,11, 8,9, 14,15, 12,13, 2,3, 0,1, 6,7, 4,5}) ); if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { diff --git a/desmume/src/render3D.cpp b/desmume/src/render3D.cpp index ee814b874..9b2e16a0f 100644 --- a/desmume/src/render3D.cpp +++ b/desmume/src/render3D.cpp @@ -1,6 +1,6 @@ /* Copyright (C) 2006-2007 shash - Copyright (C) 2008-2021 DeSmuME team + Copyright (C) 2008-2022 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -878,8 +878,8 @@ void Render3D_AltiVec::_ClearImageBaseLoop(const u16 *__restrict inColor16, cons v128u16 inColor16SwappedLo = vec_ld( 0, inColor16 + i); v128u16 inColor16SwappedHi = vec_ld(16, inColor16 + i); - inColor16SwappedLo = vec_perm(inColor16SwappedLo, inColor16SwappedLo, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); - inColor16SwappedHi = vec_perm(inColor16SwappedHi, inColor16SwappedHi, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); + inColor16SwappedLo = vec_perm((v128u8)inColor16SwappedLo, (v128u8)inColor16SwappedLo, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); + inColor16SwappedHi = vec_perm((v128u8)inColor16SwappedHi, (v128u8)inColor16SwappedHi, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); vec_st(inColor16SwappedLo, 0, outColor16 + i); vec_st(inColor16SwappedHi, 16, outColor16 + i); @@ -893,16 +893,16 @@ void Render3D_AltiVec::_ClearImageBaseLoop(const u16 *__restrict inColor16, cons v128u16 clearDepthLo = vec_ld( 0, inDepth16 + i); v128u16 clearDepthHi = vec_ld(16, inDepth16 + i); - clearDepthLo = vec_perm(clearDepthLo, clearDepthLo, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); - clearDepthHi = vec_perm(clearDepthHi, clearDepthHi, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); + clearDepthLo = vec_perm((v128u8)clearDepthLo, (v128u8)clearDepthLo, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); + clearDepthHi = vec_perm((v128u8)clearDepthHi, (v128u8)clearDepthHi, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); const v128u16 clearDepthValueLo = vec_and(clearDepthLo, ((v128u16){0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF})); const v128u16 clearDepthValueHi = vec_and(clearDepthHi, ((v128u16){0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF})); - const v128u16 calcDepth0 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), clearDepthValueLo, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0})); - const v128u16 calcDepth1 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), clearDepthValueLo, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0})); - const v128u16 calcDepth2 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), clearDepthValueHi, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0})); - const v128u16 calcDepth3 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), clearDepthValueHi, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0})); + const v128u16 calcDepth0 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), (v128u8)clearDepthValueLo, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0})); + const v128u16 calcDepth1 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), (v128u8)clearDepthValueLo, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0})); + const v128u16 calcDepth2 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), (v128u8)clearDepthValueHi, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0})); + const v128u16 calcDepth3 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), (v128u8)clearDepthValueHi, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0})); vec_st( vec_msum(calcDepth0, calcDepthMul, calcDepthAdd), 0, outDepth24 + i); vec_st( vec_msum(calcDepth1, calcDepthMul, calcDepthAdd), 16, outDepth24 + i); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp index 9092db579..e28949998 100755 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2022 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,7 +32,7 @@ FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, con v128u16 srcSwapped; if ( (BE_BYTESWAP == BESwapSrc) || (BE_BYTESWAP == BESwapSrcDst) ) { - srcSwapped = vec_perm(srcColor, srcColor, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); + srcSwapped = vec_perm((v128u8)srcColor, (v128u8)srcColor, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); } else { @@ -44,11 +44,11 @@ FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, con if ( (BE_BYTESWAP == BESwapDst) || (BE_BYTESWAP == BESwapSrcDst) ) { - dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x11, 0x05,0x06,0x07,0x13, 0x09,0x0A,0x0B,0x15, 0x0D,0x0E,0x0F,0x17}) : ((v128u8){0x03,0x02,0x01,0x11, 0x07,0x06,0x05,0x13, 0x0B,0x0A,0x09,0x15, 0x0F,0x0E,0x0D,0x17})); + dstLo = vec_perm((v128u8)dstLo, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x11, 0x05,0x06,0x07,0x13, 0x09,0x0A,0x0B,0x15, 0x0D,0x0E,0x0F,0x17}) : ((v128u8){0x03,0x02,0x01,0x11, 0x07,0x06,0x05,0x13, 0x0B,0x0A,0x09,0x15, 0x0F,0x0E,0x0D,0x17})); } else { - dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x11,0x03,0x02,0x01, 0x13,0x07,0x06,0x05, 0x15,0x0B,0x0A,0x09, 0x17,0x0F,0x0E,0x0D}) : ((v128u8){0x11,0x01,0x02,0x03, 0x13,0x05,0x06,0x07, 0x15,0x09,0x0A,0x0B, 0x17,0x0D,0x0E,0x0F})); + dstLo = vec_perm((v128u8)dstLo, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x11,0x03,0x02,0x01, 0x13,0x07,0x06,0x05, 0x15,0x0B,0x0A,0x09, 0x17,0x0F,0x0E,0x0D}) : ((v128u8){0x11,0x01,0x02,0x03, 0x13,0x05,0x06,0x07, 0x15,0x09,0x0A,0x0B, 0x17,0x0D,0x0E,0x0F})); } dstHi = vec_unpackh((vector pixel)srcSwapped); @@ -56,11 +56,11 @@ FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, con if ( (BE_BYTESWAP == BESwapDst) || (BE_BYTESWAP == BESwapSrcDst) ) { - dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x19, 0x05,0x06,0x07,0x1B, 0x09,0x0A,0x0B,0x1D, 0x0D,0x0E,0x0F,0x1F}) : ((v128u8){0x03,0x02,0x01,0x19, 0x07,0x06,0x05,0x1B, 0x0B,0x0A,0x09,0x1D, 0x0F,0x0E,0x0D,0x1F})); + dstHi = vec_perm((v128u8)dstHi, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x19, 0x05,0x06,0x07,0x1B, 0x09,0x0A,0x0B,0x1D, 0x0D,0x0E,0x0F,0x1F}) : ((v128u8){0x03,0x02,0x01,0x19, 0x07,0x06,0x05,0x1B, 0x0B,0x0A,0x09,0x1D, 0x0F,0x0E,0x0D,0x1F})); } else { - dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x19,0x03,0x02,0x01, 0x1B,0x07,0x06,0x05, 0x1D,0x0B,0x0A,0x09, 0x1F,0x0F,0x0E,0x0D}) : ((v128u8){0x19,0x01,0x02,0x03, 0x1B,0x05,0x06,0x07, 0x1D,0x09,0x0A,0x0B, 0x1F,0x0D,0x0E,0x0F})); + dstHi = vec_perm((v128u8)dstHi, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x19,0x03,0x02,0x01, 0x1B,0x07,0x06,0x05, 0x1D,0x0B,0x0A,0x09, 0x1F,0x0F,0x0E,0x0D}) : ((v128u8){0x19,0x01,0x02,0x03, 0x1B,0x05,0x06,0x07, 0x1D,0x09,0x0A,0x0B, 0x1F,0x0D,0x0E,0x0F})); } } @@ -80,7 +80,7 @@ FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, con v128u16 srcSwapped; if ( (BE_BYTESWAP == BESwapSrc) || (BE_BYTESWAP == BESwapSrcDst) ) { - srcSwapped = vec_perm(srcColor, srcColor, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); + srcSwapped = vec_perm((v128u8)srcColor, (v128u8)srcColor, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14})); } else { @@ -92,11 +92,11 @@ FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, con if ( (BE_BYTESWAP == BESwapDst) || (BE_BYTESWAP == BESwapSrcDst) ) { - dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x11, 0x05,0x06,0x07,0x13, 0x09,0x0A,0x0B,0x15, 0x0D,0x0E,0x0F,0x17}) : ((v128u8){0x03,0x02,0x01,0x11, 0x07,0x06,0x05,0x13, 0x0B,0x0A,0x09,0x15, 0x0F,0x0E,0x0D,0x17})); + dstLo = vec_perm((v128u8)dstLo, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x11, 0x05,0x06,0x07,0x13, 0x09,0x0A,0x0B,0x15, 0x0D,0x0E,0x0F,0x17}) : ((v128u8){0x03,0x02,0x01,0x11, 0x07,0x06,0x05,0x13, 0x0B,0x0A,0x09,0x15, 0x0F,0x0E,0x0D,0x17})); } else { - dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x11,0x03,0x02,0x01, 0x13,0x07,0x06,0x05, 0x15,0x0B,0x0A,0x09, 0x17,0x0F,0x0E,0x0D}) : ((v128u8){0x11,0x01,0x02,0x03, 0x13,0x05,0x06,0x07, 0x15,0x09,0x0A,0x0B, 0x17,0x0D,0x0E,0x0F})); + dstLo = vec_perm((v128u8)dstLo, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x11,0x03,0x02,0x01, 0x13,0x07,0x06,0x05, 0x15,0x0B,0x0A,0x09, 0x17,0x0F,0x0E,0x0D}) : ((v128u8){0x11,0x01,0x02,0x03, 0x13,0x05,0x06,0x07, 0x15,0x09,0x0A,0x0B, 0x17,0x0D,0x0E,0x0F})); } dstHi = vec_unpackh((vector pixel)srcSwapped); @@ -104,11 +104,11 @@ FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, con if ( (BE_BYTESWAP == BESwapDst) || (BE_BYTESWAP == BESwapSrcDst) ) { - dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x19, 0x05,0x06,0x07,0x1B, 0x09,0x0A,0x0B,0x1D, 0x0D,0x0E,0x0F,0x1F}) : ((v128u8){0x03,0x02,0x01,0x19, 0x07,0x06,0x05,0x1B, 0x0B,0x0A,0x09,0x1D, 0x0F,0x0E,0x0D,0x1F})); + dstHi = vec_perm((v128u8)dstHi, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x19, 0x05,0x06,0x07,0x1B, 0x09,0x0A,0x0B,0x1D, 0x0D,0x0E,0x0F,0x1F}) : ((v128u8){0x03,0x02,0x01,0x19, 0x07,0x06,0x05,0x1B, 0x0B,0x0A,0x09,0x1D, 0x0F,0x0E,0x0D,0x1F})); } else { - dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x19,0x03,0x02,0x01, 0x1B,0x07,0x06,0x05, 0x1D,0x0B,0x0A,0x09, 0x1F,0x0F,0x0E,0x0D}) : ((v128u8){0x19,0x01,0x02,0x03, 0x1B,0x05,0x06,0x07, 0x1D,0x09,0x0A,0x0B, 0x1F,0x0D,0x0E,0x0F})); + dstHi = vec_perm((v128u8)dstHi, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x19,0x03,0x02,0x01, 0x1B,0x07,0x06,0x05, 0x1D,0x0B,0x0A,0x09, 0x1F,0x0F,0x0E,0x0D}) : ((v128u8){0x19,0x01,0x02,0x03, 0x1B,0x05,0x06,0x07, 0x1D,0x09,0x0A,0x0B, 0x1F,0x0D,0x0E,0x0F})); } } @@ -202,13 +202,13 @@ FORCEINLINE v128u16 _ConvertColorBaseTo5551_AltiVec(const v128u32 &srcLo, const // Convert RGB if (SWAP_RB) { - rgbLo = vec_perm( rgbLo, rgbLo, ((v128u8){3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14}) ); - rgbHi = vec_perm( rgbHi, rgbHi, ((v128u8){3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14}) ); + rgbLo = vec_perm( (v128u8)rgbLo, (v128u8)rgbLo, ((v128u8){3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14}) ); + rgbHi = vec_perm( (v128u8)rgbHi, (v128u8)rgbHi, ((v128u8){3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14}) ); } else { - rgbLo = vec_perm( rgbLo, rgbLo, ((v128u8){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12}) ); - rgbHi = vec_perm( rgbHi, rgbHi, ((v128u8){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12}) ); + rgbLo = vec_perm( (v128u8)rgbLo, (v128u8)rgbLo, ((v128u8){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12}) ); + rgbHi = vec_perm( (v128u8)rgbHi, (v128u8)rgbHi, ((v128u8){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12}) ); } dstColor = (v128u16)vec_packpx(rgbLo, rgbHi); @@ -234,10 +234,10 @@ FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src { if (SWAP_RB) { - return vec_or( vec_perm(src, src, ((v128u8){3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14})), ((v128u32){0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000}) ); + return vec_or( vec_perm((v128u8)src, (v128u8)src, ((v128u8){3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14})), ((v128u32){0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000}) ); } - return vec_or( vec_perm(src, src, ((v128u8){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12})), ((v128u32){0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000}) ); + return vec_or( vec_perm((v128u8)src, (v128u8)src, ((v128u8){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12})), ((v128u32){0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000}) ); } template @@ -256,7 +256,7 @@ FORCEINLINE v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src) { if (SWAP_RB) { - return vec_perm(src, src, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15})); + return vec_perm((v128u8)src, (v128u8)src, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15})); } return src; @@ -385,15 +385,15 @@ size_t ColorspaceConvertBuffer555XTo888_AltiVec(const u16 *src, u8 *dst, size_t if (SWAP_RB) { - src_v128u32[0] = vec_perm( src_v128u32[0], src_v128u32[1], ((v128u8){0x05,0x03,0x02,0x01, 0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11}) ); - src_v128u32[1] = vec_perm( src_v128u32[1], src_v128u32[2], ((v128u8){0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16}) ); - src_v128u32[2] = vec_perm( src_v128u32[2], src_v128u32[3], ((v128u8){0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16, 0x1F,0x1E,0x1D,0x1B}) ); + src_v128u32[0] = vec_perm( (v128u8)src_v128u32[0], (v128u8)src_v128u32[1], ((v128u8){0x05,0x03,0x02,0x01, 0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11}) ); + src_v128u32[1] = vec_perm( (v128u8)src_v128u32[1], (v128u8)src_v128u32[2], ((v128u8){0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16}) ); + src_v128u32[2] = vec_perm( (v128u8)src_v128u32[2], (v128u8)src_v128u32[3], ((v128u8){0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16, 0x1F,0x1E,0x1D,0x1B}) ); } else { - src_v128u32[0] = vec_perm( src_v128u32[0], src_v128u32[1], ((v128u8){0x07,0x01,0x02,0x03, 0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13}) ); - src_v128u32[1] = vec_perm( src_v128u32[1], src_v128u32[2], ((v128u8){0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16}) ); - src_v128u32[2] = vec_perm( src_v128u32[2], src_v128u32[3], ((v128u8){0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16, 0x1D,0x1E,0x1F,0x19}) ); + src_v128u32[0] = vec_perm( (v128u8)src_v128u32[0], (v128u8)src_v128u32[1], ((v128u8){0x07,0x01,0x02,0x03, 0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13}) ); + src_v128u32[1] = vec_perm( (v128u8)src_v128u32[1], (v128u8)src_v128u32[2], ((v128u8){0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16}) ); + src_v128u32[2] = vec_perm( (v128u8)src_v128u32[2], (v128u8)src_v128u32[3], ((v128u8){0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16, 0x1D,0x1E,0x1F,0x19}) ); } vec_st( src_v128u32[0], 0, dst + (i * 3) ); @@ -419,15 +419,15 @@ size_t ColorspaceConvertBuffer888XTo888_AltiVec(const u32 *src, u8 *dst, size_t if (SWAP_RB) { - src_v128u32[0] = vec_perm( src_v128u32[0], src_v128u32[1], ((v128u8){0x05,0x03,0x02,0x01, 0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11}) ); - src_v128u32[1] = vec_perm( src_v128u32[1], src_v128u32[2], ((v128u8){0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16}) ); - src_v128u32[2] = vec_perm( src_v128u32[2], src_v128u32[3], ((v128u8){0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16, 0x1F,0x1E,0x1D,0x1B}) ); + src_v128u32[0] = vec_perm( (v128u8)src_v128u32[0], (v128u8)src_v128u32[1], ((v128u8){0x05,0x03,0x02,0x01, 0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11}) ); + src_v128u32[1] = vec_perm( (v128u8)src_v128u32[1], (v128u8)src_v128u32[2], ((v128u8){0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16}) ); + src_v128u32[2] = vec_perm( (v128u8)src_v128u32[2], (v128u8)src_v128u32[3], ((v128u8){0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16, 0x1F,0x1E,0x1D,0x1B}) ); } else { - src_v128u32[0] = vec_perm( src_v128u32[0], src_v128u32[1], ((v128u8){0x07,0x01,0x02,0x03, 0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13}) ); - src_v128u32[1] = vec_perm( src_v128u32[1], src_v128u32[2], ((v128u8){0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16}) ); - src_v128u32[2] = vec_perm( src_v128u32[2], src_v128u32[3], ((v128u8){0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16, 0x1D,0x1E,0x1F,0x19}) ); + src_v128u32[0] = vec_perm( (v128u8)src_v128u32[0], (v128u8)src_v128u32[1], ((v128u8){0x07,0x01,0x02,0x03, 0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13}) ); + src_v128u32[1] = vec_perm( (v128u8)src_v128u32[1], (v128u8)src_v128u32[2], ((v128u8){0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16}) ); + src_v128u32[2] = vec_perm( (v128u8)src_v128u32[2], (v128u8)src_v128u32[3], ((v128u8){0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16, 0x1D,0x1E,0x1F,0x19}) ); } vec_st( src_v128u32[0], 0, dst + (i * 3) ); @@ -655,4 +655,4 @@ template v128u16 ColorspaceCopy16_AltiVec(const v128u16 &src); template v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src); template v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src); -#endif // ENABLE_SSE2 +#endif // ENABLE_ALTIVEC