Colorspace Handler: Improve the accuracy of RGBA6665 to RGBA5551 color conversions in AltiVec.

- Also do some code cleanup where appropriate.
This commit is contained in:
rogerman 2025-07-15 16:40:27 -07:00
parent 66b525b8dc
commit 0367d1469d
1 changed files with 32 additions and 36 deletions

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2024 DeSmuME team
Copyright (C) 2016-2025 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -67,7 +67,7 @@ FORCEINLINE void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, co
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0};
const v128u16 srcAlphaBits16 = vec_splat_u16(0);
ColorspaceConvert555aTo8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
@ -115,14 +115,14 @@ FORCEINLINE void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, co
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0};
const v128u16 srcAlphaBits16 = vec_splat_u16(0);
ColorspaceConvert555aTo6665_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF};
const v128u16 srcAlphaBits16 = vec_splat_u16((s16)0xFFFF);
ColorspaceConvert555aTo8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
@ -136,14 +136,14 @@ FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcCol
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert5551To8888_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = (v128u16)vec_cmpgt( ((v128s16){0,0, 0,0, 0,0, 0,0}), (v128s16)srcColor );
const v128u16 srcAlphaBits16 = (v128u16)vec_cmpgt( vec_splat_s16(0), (v128s16)srcColor );
ColorspaceConvert555aTo8888_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
FORCEINLINE void ColorspaceConvert5551To6665_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u16 srcAlphaBits16 = vec_and( (v128u16)vec_cmpgt( ((v128s16){0,0, 0,0, 0,0, 0,0}), (v128s16)srcColor ), ((v128u16){0x1F1F,0x1F1F, 0x1F1F,0x1F1F, 0x1F1F,0x1F1F, 0x1F1F,0x1F1F}) );
const v128u16 srcAlphaBits16 = vec_and( (v128u16)vec_cmpgt( vec_splat_s16(0), (v128s16)srcColor ), ((v128u16){0x1F1F,0x1F1F, 0x1F1F,0x1F1F, 0x1F1F,0x1F1F, 0x1F1F,0x1F1F}) );
ColorspaceConvert555aTo6665_AltiVec<SWAP_RB, BE_BYTESWAP>(srcColor, srcAlphaBits16, dstLo, dstHi);
}
@ -195,8 +195,8 @@ FORCEINLINE v128u16 _ConvertColorBaseTo5551_AltiVec(const v128u32 &srcLo, const
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
{
rgbLo = vec_sl( srcLo, ((v128u32){2,2,2,2}) );
rgbHi = vec_sl( srcHi, ((v128u32){2,2,2,2}) );
rgbLo = (v128u32)vec_or( vec_sl((v128u8)srcLo, ((v128u8){2,2,2,3, 2,2,2,3, 2,2,2,3, 2,2,2,3})), vec_sr((v128u8)srcLo, ((v128u8){4,4,4,2, 4,4,4,2, 4,4,4,2, 4,4,4,2})) );
rgbHi = (v128u32)vec_or( vec_sl((v128u8)srcHi, ((v128u8){2,2,2,3, 2,2,2,3, 2,2,2,3, 2,2,2,3})), vec_sr((v128u8)srcLo, ((v128u8){4,4,4,2, 4,4,4,2, 4,4,4,2, 4,4,4,2})) );
// Convert alpha
dstAlpha = vec_packsu( vec_and(vec_sr(srcLo, ((v128u32){24,24,24,24})), ((v128u32){0x0000001F,0x0000001F,0x0000001F,0x0000001F})), vec_and(vec_sr(srcHi, ((v128u32){24,24,24,24})), ((v128u32){0x0000001F,0x0000001F,0x0000001F,0x0000001F})) );
@ -210,7 +210,7 @@ FORCEINLINE v128u16 _ConvertColorBaseTo5551_AltiVec(const v128u32 &srcLo, const
dstAlpha = vec_packsu( vec_sr(srcLo, ((v128u32){24,24,24,24})), vec_sr(srcHi, ((v128u32){24,24,24,24})) );
}
dstAlpha = vec_cmpgt(dstAlpha, ((v128u16){0,0,0,0,0,0,0,0}));
dstAlpha = vec_cmpgt(dstAlpha, vec_splat_u16(0));
dstAlpha = vec_and(dstAlpha, ((v128u16){0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000}));
// Convert RGB
@ -228,7 +228,7 @@ FORCEINLINE v128u16 _ConvertColorBaseTo5551_AltiVec(const v128u32 &srcLo, const
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
dstColor = vec_and(dstColor, ((v128u16){0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF}));
return vec_or(dstColor, dstAlpha);
return (v128u16)vec_or((v128u8)dstColor, (v128u8)dstAlpha);
}
template <bool SWAP_RB>
@ -280,14 +280,13 @@ template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
static size_t ColorspaceConvertBuffer555xTo8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
v128u32 dst32[2];
for (; i < pixCountVec128; i+=sizeof(v128u16)/sizeof(u16))
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555xTo8888Opaque_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
ColorspaceConvert555xTo8888Opaque_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dst32[1], dst32[0] );
vec_st(dst32[0], 0, dst+i);
vec_st(dst32[1], 16, dst+i);
}
return i;
@ -297,14 +296,13 @@ template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
size_t ColorspaceConvertBuffer555xTo6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
v128u32 dst32[2];
for (; i < pixCountVec128; i+=sizeof(v128u16)/sizeof(u16))
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555xTo6665Opaque_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
ColorspaceConvert555xTo6665Opaque_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dst32[1], dst32[0] );
vec_st(dst32[0], 0, dst+i);
vec_st(dst32[1], 16, dst+i);
}
return i;
@ -314,14 +312,13 @@ template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
static size_t ColorspaceConvertBuffer5551To8888_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
v128u32 dst32[2];
for (; i < pixCountVec128; i+=sizeof(v128u16)/sizeof(u16))
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To8888_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
ColorspaceConvert5551To8888_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dst32[1], dst32[0] );
vec_st(dst32[0], 0, dst+i);
vec_st(dst32[1], 16, dst+i);
}
return i;
@ -331,14 +328,13 @@ template <bool SWAP_RB, BESwapFlags BE_BYTESWAP>
size_t ColorspaceConvertBuffer5551To6665_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
v128u32 dst32[2];
for (; i < pixCountVec128; i+=sizeof(v128u16)/sizeof(u16))
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert5551To6665_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
ColorspaceConvert5551To6665_AltiVec<SWAP_RB, BE_BYTESWAP>( vec_ld(0, src+i), dst32[1], dst32[0] );
vec_st(dst32[0], 0, dst+i);
vec_st(dst32[1], 16, dst+i);
}
return i;
@ -349,7 +345,7 @@ size_t ColorspaceConvertBuffer8888To6665_AltiVec(const u32 *src, u32 *dst, size_
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
for (; i < pixCountVec128; i+=sizeof(v128u32)/sizeof(u32))
{
vec_st( ColorspaceConvert8888To6665_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
}
@ -362,7 +358,7 @@ size_t ColorspaceConvertBuffer6665To8888_AltiVec(const u32 *src, u32 *dst, size_
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
for (; i < pixCountVec128; i+=sizeof(v128u32)/sizeof(u32))
{
vec_st( ColorspaceConvert6665To8888_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
}
@ -375,7 +371,7 @@ size_t ColorspaceConvertBuffer8888To5551_AltiVec(const u32 *__restrict src, u16
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
for (; i < pixCountVec128; i+=sizeof(v128u32)/sizeof(u16))
{
vec_st( ColorspaceConvert8888To5551_AltiVec<SWAP_RB>(vec_ld(0, src+i), vec_ld(16, src+i)), 0, dst+i );
}
@ -388,7 +384,7 @@ size_t ColorspaceConvertBuffer6665To5551_AltiVec(const u32 *__restrict src, u16
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
for (; i < pixCountVec128; i+=sizeof(v128u32)/sizeof(u16))
{
vec_st( ColorspaceConvert6665To5551_AltiVec<SWAP_RB>(vec_ld(0, src+i), vec_ld(16, src+i)), 0, dst+i );
}
@ -401,7 +397,7 @@ size_t ColorspaceConvertBuffer888xTo8888Opaque_AltiVec(const u32 *src, u32 *dst,
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
for (; i < pixCountVec128; i+=sizeof(v128u32)/sizeof(u32))
{
vec_st( ColorspaceConvert888xTo8888Opaque_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
}
@ -416,7 +412,7 @@ size_t ColorspaceConvertBuffer555xTo888_AltiVec(const u16 *src, u8 *dst, size_t
v128u16 src_v128u16[2];
v128u32 src_v128u32[4];
for (; i < pixCountVec128; i+=16)
for (; i < pixCountVec128; i+=sizeof(v128u16))
{
src_v128u16[0] = vec_ld( 0, src+i);
src_v128u16[1] = vec_ld(16, src+i);