Cocoa Port: Fix AltiVec vec_perm() related bugs if building for ppc64.
- Apparently, vec_perm() on ppc32 assumes that vec_perm() will always use vectors with 8-bit elements. However, ppc64 vec_perm() can use elements of different sizes, and so we need to typecast every single case of this so that the correct vec_perm() is called on ppc64.
This commit is contained in:
parent
a677ffd168
commit
a9706059e0
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
Copyright 2006 yopyop
|
||||
Copyright 2007 shash
|
||||
Copyright 2007-2021 DeSmuME team
|
||||
Copyright 2007-2022 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -383,7 +383,7 @@ void DISP_FIFOrecv_Line16(u16 *__restrict dst)
|
|||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16); i+=sizeof(v128u16))
|
||||
{
|
||||
v128u16 fifoColor = vec_ld(i, disp_fifo.buf + disp_fifo.head);
|
||||
fifoColor = vec_perm( fifoColor, fifoColor, ((v128u8){2,3, 0,1, 6,7, 4,5, 10,11, 8,9, 14,15, 12,13}) );
|
||||
fifoColor = vec_perm( (v128u8)fifoColor, (v128u8)fifoColor, ((v128u8){2,3, 0,1, 6,7, 4,5, 10,11, 8,9, 14,15, 12,13}) );
|
||||
vec_st(fifoColor, i, dst);
|
||||
}
|
||||
#else
|
||||
|
@ -418,7 +418,7 @@ void _DISP_FIFOrecv_LineOpaque16_vec(u32 *__restrict dst)
|
|||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16); i+=sizeof(v128u16))
|
||||
{
|
||||
v128u16 fifoColor = vec_ld(i, disp_fifo.buf + disp_fifo.head);
|
||||
fifoColor = vec_perm( fifoColor, fifoColor, ((v128u8){2,3, 0,1, 6,7, 4,5, 10,11, 8,9, 14,15, 12,13}) );
|
||||
fifoColor = vec_perm( (v128u8)fifoColor, (v128u8)fifoColor, ((v128u8){2,3, 0,1, 6,7, 4,5, 10,11, 8,9, 14,15, 12,13}) );
|
||||
fifoColor = vec_or(fifoColor, ((v128u16){0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000}));
|
||||
vec_st(fifoColor, i, dst);
|
||||
}
|
||||
|
@ -445,7 +445,7 @@ void _DISP_FIFOrecv_LineOpaque32_vec(u32 *__restrict dst)
|
|||
|
||||
v128u32 dstLo = ((v128u32){0,0,0,0});
|
||||
v128u32 dstHi = ((v128u32){0,0,0,0});
|
||||
fifoColor = vec_perm( fifoColor, fifoColor, ((v128u8){10,11, 8,9, 14,15, 12,13, 2,3, 0,1, 6,7, 4,5}) );
|
||||
fifoColor = vec_perm( (v128u8)fifoColor, (v128u8)fifoColor, ((v128u8){10,11, 8,9, 14,15, 12,13, 2,3, 0,1, 6,7, 4,5}) );
|
||||
|
||||
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Copyright (C) 2006-2007 shash
|
||||
Copyright (C) 2008-2021 DeSmuME team
|
||||
Copyright (C) 2008-2022 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -878,8 +878,8 @@ void Render3D_AltiVec::_ClearImageBaseLoop(const u16 *__restrict inColor16, cons
|
|||
v128u16 inColor16SwappedLo = vec_ld( 0, inColor16 + i);
|
||||
v128u16 inColor16SwappedHi = vec_ld(16, inColor16 + i);
|
||||
|
||||
inColor16SwappedLo = vec_perm(inColor16SwappedLo, inColor16SwappedLo, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||
inColor16SwappedHi = vec_perm(inColor16SwappedHi, inColor16SwappedHi, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||
inColor16SwappedLo = vec_perm((v128u8)inColor16SwappedLo, (v128u8)inColor16SwappedLo, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||
inColor16SwappedHi = vec_perm((v128u8)inColor16SwappedHi, (v128u8)inColor16SwappedHi, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||
|
||||
vec_st(inColor16SwappedLo, 0, outColor16 + i);
|
||||
vec_st(inColor16SwappedHi, 16, outColor16 + i);
|
||||
|
@ -893,16 +893,16 @@ void Render3D_AltiVec::_ClearImageBaseLoop(const u16 *__restrict inColor16, cons
|
|||
v128u16 clearDepthLo = vec_ld( 0, inDepth16 + i);
|
||||
v128u16 clearDepthHi = vec_ld(16, inDepth16 + i);
|
||||
|
||||
clearDepthLo = vec_perm(clearDepthLo, clearDepthLo, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||
clearDepthHi = vec_perm(clearDepthHi, clearDepthHi, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||
clearDepthLo = vec_perm((v128u8)clearDepthLo, (v128u8)clearDepthLo, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||
clearDepthHi = vec_perm((v128u8)clearDepthHi, (v128u8)clearDepthHi, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||
|
||||
const v128u16 clearDepthValueLo = vec_and(clearDepthLo, ((v128u16){0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF}));
|
||||
const v128u16 clearDepthValueHi = vec_and(clearDepthHi, ((v128u16){0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF}));
|
||||
|
||||
const v128u16 calcDepth0 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), clearDepthValueLo, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0}));
|
||||
const v128u16 calcDepth1 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), clearDepthValueLo, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0}));
|
||||
const v128u16 calcDepth2 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), clearDepthValueHi, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0}));
|
||||
const v128u16 calcDepth3 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), clearDepthValueHi, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0}));
|
||||
const v128u16 calcDepth0 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), (v128u8)clearDepthValueLo, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0}));
|
||||
const v128u16 calcDepth1 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), (v128u8)clearDepthValueLo, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0}));
|
||||
const v128u16 calcDepth2 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), (v128u8)clearDepthValueHi, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0}));
|
||||
const v128u16 calcDepth3 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), (v128u8)clearDepthValueHi, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0}));
|
||||
|
||||
vec_st( vec_msum(calcDepth0, calcDepthMul, calcDepthAdd), 0, outDepth24 + i);
|
||||
vec_st( vec_msum(calcDepth1, calcDepthMul, calcDepthAdd), 16, outDepth24 + i);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
Copyright (C) 2016-2021 DeSmuME team
|
||||
Copyright (C) 2016-2022 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -32,7 +32,7 @@ FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, con
|
|||
v128u16 srcSwapped;
|
||||
if ( (BE_BYTESWAP == BESwapSrc) || (BE_BYTESWAP == BESwapSrcDst) )
|
||||
{
|
||||
srcSwapped = vec_perm(srcColor, srcColor, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||
srcSwapped = vec_perm((v128u8)srcColor, (v128u8)srcColor, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -44,11 +44,11 @@ FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, con
|
|||
|
||||
if ( (BE_BYTESWAP == BESwapDst) || (BE_BYTESWAP == BESwapSrcDst) )
|
||||
{
|
||||
dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x11, 0x05,0x06,0x07,0x13, 0x09,0x0A,0x0B,0x15, 0x0D,0x0E,0x0F,0x17}) : ((v128u8){0x03,0x02,0x01,0x11, 0x07,0x06,0x05,0x13, 0x0B,0x0A,0x09,0x15, 0x0F,0x0E,0x0D,0x17}));
|
||||
dstLo = vec_perm((v128u8)dstLo, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x11, 0x05,0x06,0x07,0x13, 0x09,0x0A,0x0B,0x15, 0x0D,0x0E,0x0F,0x17}) : ((v128u8){0x03,0x02,0x01,0x11, 0x07,0x06,0x05,0x13, 0x0B,0x0A,0x09,0x15, 0x0F,0x0E,0x0D,0x17}));
|
||||
}
|
||||
else
|
||||
{
|
||||
dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x11,0x03,0x02,0x01, 0x13,0x07,0x06,0x05, 0x15,0x0B,0x0A,0x09, 0x17,0x0F,0x0E,0x0D}) : ((v128u8){0x11,0x01,0x02,0x03, 0x13,0x05,0x06,0x07, 0x15,0x09,0x0A,0x0B, 0x17,0x0D,0x0E,0x0F}));
|
||||
dstLo = vec_perm((v128u8)dstLo, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x11,0x03,0x02,0x01, 0x13,0x07,0x06,0x05, 0x15,0x0B,0x0A,0x09, 0x17,0x0F,0x0E,0x0D}) : ((v128u8){0x11,0x01,0x02,0x03, 0x13,0x05,0x06,0x07, 0x15,0x09,0x0A,0x0B, 0x17,0x0D,0x0E,0x0F}));
|
||||
}
|
||||
|
||||
dstHi = vec_unpackh((vector pixel)srcSwapped);
|
||||
|
@ -56,11 +56,11 @@ FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, con
|
|||
|
||||
if ( (BE_BYTESWAP == BESwapDst) || (BE_BYTESWAP == BESwapSrcDst) )
|
||||
{
|
||||
dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x19, 0x05,0x06,0x07,0x1B, 0x09,0x0A,0x0B,0x1D, 0x0D,0x0E,0x0F,0x1F}) : ((v128u8){0x03,0x02,0x01,0x19, 0x07,0x06,0x05,0x1B, 0x0B,0x0A,0x09,0x1D, 0x0F,0x0E,0x0D,0x1F}));
|
||||
dstHi = vec_perm((v128u8)dstHi, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x19, 0x05,0x06,0x07,0x1B, 0x09,0x0A,0x0B,0x1D, 0x0D,0x0E,0x0F,0x1F}) : ((v128u8){0x03,0x02,0x01,0x19, 0x07,0x06,0x05,0x1B, 0x0B,0x0A,0x09,0x1D, 0x0F,0x0E,0x0D,0x1F}));
|
||||
}
|
||||
else
|
||||
{
|
||||
dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x19,0x03,0x02,0x01, 0x1B,0x07,0x06,0x05, 0x1D,0x0B,0x0A,0x09, 0x1F,0x0F,0x0E,0x0D}) : ((v128u8){0x19,0x01,0x02,0x03, 0x1B,0x05,0x06,0x07, 0x1D,0x09,0x0A,0x0B, 0x1F,0x0D,0x0E,0x0F}));
|
||||
dstHi = vec_perm((v128u8)dstHi, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x19,0x03,0x02,0x01, 0x1B,0x07,0x06,0x05, 0x1D,0x0B,0x0A,0x09, 0x1F,0x0F,0x0E,0x0D}) : ((v128u8){0x19,0x01,0x02,0x03, 0x1B,0x05,0x06,0x07, 0x1D,0x09,0x0A,0x0B, 0x1F,0x0D,0x0E,0x0F}));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -80,7 +80,7 @@ FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, con
|
|||
v128u16 srcSwapped;
|
||||
if ( (BE_BYTESWAP == BESwapSrc) || (BE_BYTESWAP == BESwapSrcDst) )
|
||||
{
|
||||
srcSwapped = vec_perm(srcColor, srcColor, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||
srcSwapped = vec_perm((v128u8)srcColor, (v128u8)srcColor, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -92,11 +92,11 @@ FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, con
|
|||
|
||||
if ( (BE_BYTESWAP == BESwapDst) || (BE_BYTESWAP == BESwapSrcDst) )
|
||||
{
|
||||
dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x11, 0x05,0x06,0x07,0x13, 0x09,0x0A,0x0B,0x15, 0x0D,0x0E,0x0F,0x17}) : ((v128u8){0x03,0x02,0x01,0x11, 0x07,0x06,0x05,0x13, 0x0B,0x0A,0x09,0x15, 0x0F,0x0E,0x0D,0x17}));
|
||||
dstLo = vec_perm((v128u8)dstLo, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x11, 0x05,0x06,0x07,0x13, 0x09,0x0A,0x0B,0x15, 0x0D,0x0E,0x0F,0x17}) : ((v128u8){0x03,0x02,0x01,0x11, 0x07,0x06,0x05,0x13, 0x0B,0x0A,0x09,0x15, 0x0F,0x0E,0x0D,0x17}));
|
||||
}
|
||||
else
|
||||
{
|
||||
dstLo = vec_perm(dstLo, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x11,0x03,0x02,0x01, 0x13,0x07,0x06,0x05, 0x15,0x0B,0x0A,0x09, 0x17,0x0F,0x0E,0x0D}) : ((v128u8){0x11,0x01,0x02,0x03, 0x13,0x05,0x06,0x07, 0x15,0x09,0x0A,0x0B, 0x17,0x0D,0x0E,0x0F}));
|
||||
dstLo = vec_perm((v128u8)dstLo, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x11,0x03,0x02,0x01, 0x13,0x07,0x06,0x05, 0x15,0x0B,0x0A,0x09, 0x17,0x0F,0x0E,0x0D}) : ((v128u8){0x11,0x01,0x02,0x03, 0x13,0x05,0x06,0x07, 0x15,0x09,0x0A,0x0B, 0x17,0x0D,0x0E,0x0F}));
|
||||
}
|
||||
|
||||
dstHi = vec_unpackh((vector pixel)srcSwapped);
|
||||
|
@ -104,11 +104,11 @@ FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, con
|
|||
|
||||
if ( (BE_BYTESWAP == BESwapDst) || (BE_BYTESWAP == BESwapSrcDst) )
|
||||
{
|
||||
dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x19, 0x05,0x06,0x07,0x1B, 0x09,0x0A,0x0B,0x1D, 0x0D,0x0E,0x0F,0x1F}) : ((v128u8){0x03,0x02,0x01,0x19, 0x07,0x06,0x05,0x1B, 0x0B,0x0A,0x09,0x1D, 0x0F,0x0E,0x0D,0x1F}));
|
||||
dstHi = vec_perm((v128u8)dstHi, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x01,0x02,0x03,0x19, 0x05,0x06,0x07,0x1B, 0x09,0x0A,0x0B,0x1D, 0x0D,0x0E,0x0F,0x1F}) : ((v128u8){0x03,0x02,0x01,0x19, 0x07,0x06,0x05,0x1B, 0x0B,0x0A,0x09,0x1D, 0x0F,0x0E,0x0D,0x1F}));
|
||||
}
|
||||
else
|
||||
{
|
||||
dstHi = vec_perm(dstHi, srcAlphaBits, (SWAP_RB) ? ((v128u8){0x19,0x03,0x02,0x01, 0x1B,0x07,0x06,0x05, 0x1D,0x0B,0x0A,0x09, 0x1F,0x0F,0x0E,0x0D}) : ((v128u8){0x19,0x01,0x02,0x03, 0x1B,0x05,0x06,0x07, 0x1D,0x09,0x0A,0x0B, 0x1F,0x0D,0x0E,0x0F}));
|
||||
dstHi = vec_perm((v128u8)dstHi, (v128u8)srcAlphaBits, (SWAP_RB) ? ((v128u8){0x19,0x03,0x02,0x01, 0x1B,0x07,0x06,0x05, 0x1D,0x0B,0x0A,0x09, 0x1F,0x0F,0x0E,0x0D}) : ((v128u8){0x19,0x01,0x02,0x03, 0x1B,0x05,0x06,0x07, 0x1D,0x09,0x0A,0x0B, 0x1F,0x0D,0x0E,0x0F}));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -202,13 +202,13 @@ FORCEINLINE v128u16 _ConvertColorBaseTo5551_AltiVec(const v128u32 &srcLo, const
|
|||
// Convert RGB
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgbLo = vec_perm( rgbLo, rgbLo, ((v128u8){3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14}) );
|
||||
rgbHi = vec_perm( rgbHi, rgbHi, ((v128u8){3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14}) );
|
||||
rgbLo = vec_perm( (v128u8)rgbLo, (v128u8)rgbLo, ((v128u8){3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14}) );
|
||||
rgbHi = vec_perm( (v128u8)rgbHi, (v128u8)rgbHi, ((v128u8){3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14}) );
|
||||
}
|
||||
else
|
||||
{
|
||||
rgbLo = vec_perm( rgbLo, rgbLo, ((v128u8){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12}) );
|
||||
rgbHi = vec_perm( rgbHi, rgbHi, ((v128u8){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12}) );
|
||||
rgbLo = vec_perm( (v128u8)rgbLo, (v128u8)rgbLo, ((v128u8){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12}) );
|
||||
rgbHi = vec_perm( (v128u8)rgbHi, (v128u8)rgbHi, ((v128u8){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12}) );
|
||||
}
|
||||
|
||||
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
|
||||
|
@ -234,10 +234,10 @@ FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src
|
|||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
return vec_or( vec_perm(src, src, ((v128u8){3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14})), ((v128u32){0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000}) );
|
||||
return vec_or( vec_perm((v128u8)src, (v128u8)src, ((v128u8){3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14})), ((v128u32){0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000}) );
|
||||
}
|
||||
|
||||
return vec_or( vec_perm(src, src, ((v128u8){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12})), ((v128u32){0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000}) );
|
||||
return vec_or( vec_perm((v128u8)src, (v128u8)src, ((v128u8){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12})), ((v128u32){0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000}) );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
|
@ -256,7 +256,7 @@ FORCEINLINE v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src)
|
|||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
return vec_perm(src, src, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}));
|
||||
return vec_perm((v128u8)src, (v128u8)src, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}));
|
||||
}
|
||||
|
||||
return src;
|
||||
|
@ -385,15 +385,15 @@ size_t ColorspaceConvertBuffer555XTo888_AltiVec(const u16 *src, u8 *dst, size_t
|
|||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
src_v128u32[0] = vec_perm( src_v128u32[0], src_v128u32[1], ((v128u8){0x05,0x03,0x02,0x01, 0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11}) );
|
||||
src_v128u32[1] = vec_perm( src_v128u32[1], src_v128u32[2], ((v128u8){0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16}) );
|
||||
src_v128u32[2] = vec_perm( src_v128u32[2], src_v128u32[3], ((v128u8){0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16, 0x1F,0x1E,0x1D,0x1B}) );
|
||||
src_v128u32[0] = vec_perm( (v128u8)src_v128u32[0], (v128u8)src_v128u32[1], ((v128u8){0x05,0x03,0x02,0x01, 0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11}) );
|
||||
src_v128u32[1] = vec_perm( (v128u8)src_v128u32[1], (v128u8)src_v128u32[2], ((v128u8){0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16}) );
|
||||
src_v128u32[2] = vec_perm( (v128u8)src_v128u32[2], (v128u8)src_v128u32[3], ((v128u8){0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16, 0x1F,0x1E,0x1D,0x1B}) );
|
||||
}
|
||||
else
|
||||
{
|
||||
src_v128u32[0] = vec_perm( src_v128u32[0], src_v128u32[1], ((v128u8){0x07,0x01,0x02,0x03, 0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13}) );
|
||||
src_v128u32[1] = vec_perm( src_v128u32[1], src_v128u32[2], ((v128u8){0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16}) );
|
||||
src_v128u32[2] = vec_perm( src_v128u32[2], src_v128u32[3], ((v128u8){0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16, 0x1D,0x1E,0x1F,0x19}) );
|
||||
src_v128u32[0] = vec_perm( (v128u8)src_v128u32[0], (v128u8)src_v128u32[1], ((v128u8){0x07,0x01,0x02,0x03, 0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13}) );
|
||||
src_v128u32[1] = vec_perm( (v128u8)src_v128u32[1], (v128u8)src_v128u32[2], ((v128u8){0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16}) );
|
||||
src_v128u32[2] = vec_perm( (v128u8)src_v128u32[2], (v128u8)src_v128u32[3], ((v128u8){0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16, 0x1D,0x1E,0x1F,0x19}) );
|
||||
}
|
||||
|
||||
vec_st( src_v128u32[0], 0, dst + (i * 3) );
|
||||
|
@ -419,15 +419,15 @@ size_t ColorspaceConvertBuffer888XTo888_AltiVec(const u32 *src, u8 *dst, size_t
|
|||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
src_v128u32[0] = vec_perm( src_v128u32[0], src_v128u32[1], ((v128u8){0x05,0x03,0x02,0x01, 0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11}) );
|
||||
src_v128u32[1] = vec_perm( src_v128u32[1], src_v128u32[2], ((v128u8){0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16}) );
|
||||
src_v128u32[2] = vec_perm( src_v128u32[2], src_v128u32[3], ((v128u8){0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16, 0x1F,0x1E,0x1D,0x1B}) );
|
||||
src_v128u32[0] = vec_perm( (v128u8)src_v128u32[0], (v128u8)src_v128u32[1], ((v128u8){0x05,0x03,0x02,0x01, 0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11}) );
|
||||
src_v128u32[1] = vec_perm( (v128u8)src_v128u32[1], (v128u8)src_v128u32[2], ((v128u8){0x0A,0x09,0x07,0x06, 0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16}) );
|
||||
src_v128u32[2] = vec_perm( (v128u8)src_v128u32[2], (v128u8)src_v128u32[3], ((v128u8){0x0F,0x0E,0x0D,0x0B, 0x15,0x13,0x12,0x11, 0x1A,0x19,0x17,0x16, 0x1F,0x1E,0x1D,0x1B}) );
|
||||
}
|
||||
else
|
||||
{
|
||||
src_v128u32[0] = vec_perm( src_v128u32[0], src_v128u32[1], ((v128u8){0x07,0x01,0x02,0x03, 0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13}) );
|
||||
src_v128u32[1] = vec_perm( src_v128u32[1], src_v128u32[2], ((v128u8){0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16}) );
|
||||
src_v128u32[2] = vec_perm( src_v128u32[2], src_v128u32[3], ((v128u8){0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16, 0x1D,0x1E,0x1F,0x19}) );
|
||||
src_v128u32[0] = vec_perm( (v128u8)src_v128u32[0], (v128u8)src_v128u32[1], ((v128u8){0x07,0x01,0x02,0x03, 0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13}) );
|
||||
src_v128u32[1] = vec_perm( (v128u8)src_v128u32[1], (v128u8)src_v128u32[2], ((v128u8){0x0A,0x0B,0x05,0x06, 0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16}) );
|
||||
src_v128u32[2] = vec_perm( (v128u8)src_v128u32[2], (v128u8)src_v128u32[3], ((v128u8){0x0D,0x0E,0x0F,0x09, 0x17,0x11,0x12,0x13, 0x1A,0x1B,0x15,0x16, 0x1D,0x1E,0x1F,0x19}) );
|
||||
}
|
||||
|
||||
vec_st( src_v128u32[0], 0, dst + (i * 3) );
|
||||
|
@ -655,4 +655,4 @@ template v128u16 ColorspaceCopy16_AltiVec<false>(const v128u16 &src);
|
|||
template v128u32 ColorspaceCopy32_AltiVec<true>(const v128u32 &src);
|
||||
template v128u32 ColorspaceCopy32_AltiVec<false>(const v128u32 &src);
|
||||
|
||||
#endif // ENABLE_SSE2
|
||||
#endif // ENABLE_ALTIVEC
|
||||
|
|
Loading…
Reference in New Issue