From 344f2e57f608f6a4621514bddd5c1fff7d8e3764 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 11 Dec 2016 21:54:21 +1000 Subject: [PATCH] VideoCommon: Use avg(color1, color2) for color3 in CMPR textures --- .../VideoCommon/TextureDecoder_Common.cpp | 20 +++++++++----- .../VideoCommon/TextureDecoder_Generic.cpp | 10 ++++--- .../Core/VideoCommon/TextureDecoder_x64.cpp | 26 ++++++++++++------- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/Source/Core/VideoCommon/TextureDecoder_Common.cpp b/Source/Core/VideoCommon/TextureDecoder_Common.cpp index ad58980bfb..cdaba05bc2 100644 --- a/Source/Core/VideoCommon/TextureDecoder_Common.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_Common.cpp @@ -654,6 +654,11 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth int red1 = Convert5To8((c1 >> 11) & 0x1F); int red2 = Convert5To8((c2 >> 11) & 0x1F); + // Approximation of x/3: 3/8 (1/2 - 1/8) + int blue3 = ((blue2 - blue1) >> 1) - ((blue2 - blue1) >> 3); + int green3 = ((green2 - green1) >> 1) - ((green2 - green1) >> 3); + int red3 = ((red2 - red1) >> 1) - ((red2 - red1) >> 3); + u16 ss = s & 3; u16 tt = t & 3; @@ -675,19 +680,20 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth color = MakeRGBA(red2, green2, blue2, 255); break; case 2: - color = MakeRGBA(red1 + (red2 - red1) / 3, green1 + (green2 - green1) / 3, - blue1 + (blue2 - blue1) / 3, 255); + color = MakeRGBA(red1 + red3, green1 + green3, blue1 + blue3, 255); break; case 3: - color = MakeRGBA(red2 + (red1 - red2) / 3, green2 + (green1 - green2) / 3, - blue2 + (blue1 - blue2) / 3, 255); + color = MakeRGBA(red2 - red3, green2 - green3, blue2 - blue3, 255); break; case 6: - color = MakeRGBA((int)ceil((float)(red1 + red2) / 2), (int)ceil((float)(green1 + green2) / 2), - (int)ceil((float)(blue1 + blue2) / 2), 255); + color = + MakeRGBA((red1 + red2 + 1) / 2, (green1 + green2 + 1) / 2, (blue1 + blue2 + 1) / 2, 255); break; case 7: - color = MakeRGBA(red2, green2, blue2, 0); + // color[3] is the same as color[2] (average of both colors), but transparent. + // This differs from DXT1 where color[3] is transparent black. + color = + MakeRGBA((red1 + red2 + 1) / 2, (green1 + green2 + 1) / 2, (blue1 + blue2 + 1) / 2, 0); break; default: color = 0; diff --git a/Source/Core/VideoCommon/TextureDecoder_Generic.cpp b/Source/Core/VideoCommon/TextureDecoder_Generic.cpp index fe0f53c845..bb48d1aca8 100644 --- a/Source/Core/VideoCommon/TextureDecoder_Generic.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_Generic.cpp @@ -169,6 +169,7 @@ static void DecodeDXTBlock(u32* dst, const DXTBlock* src, int pitch) colors[1] = MakeRGBA(red2, green2, blue2, 255); if (c1 > c2) { + // Approximation of x/3: 3/8 (1/2 - 1/8) int blue3 = ((blue2 - blue1) >> 1) - ((blue2 - blue1) >> 3); int green3 = ((green2 - green1) >> 1) - ((green2 - green1) >> 3); int red3 = ((red2 - red1) >> 1) - ((red2 - red1) >> 3); @@ -177,9 +178,12 @@ static void DecodeDXTBlock(u32* dst, const DXTBlock* src, int pitch) } else { - colors[2] = MakeRGBA((red1 + red2 + 1) / 2, // Average - (green1 + green2 + 1) / 2, (blue1 + blue2 + 1) / 2, 255); - colors[3] = MakeRGBA(red2, green2, blue2, 0); // Color2 but transparent + // color[3] is the same as color[2] (average of both colors), but transparent. + // This differs from DXT1 where color[3] is transparent black. + colors[2] = + MakeRGBA((red1 + red2 + 1) / 2, (green1 + green2 + 1) / 2, (blue1 + blue2 + 1) / 2, 255); + colors[3] = + MakeRGBA((red1 + red2 + 1) / 2, (green1 + green2 + 1) / 2, (blue1 + blue2 + 1) / 2, 0); } for (int y = 0; y < 4; y++) diff --git a/Source/Core/VideoCommon/TextureDecoder_x64.cpp b/Source/Core/VideoCommon/TextureDecoder_x64.cpp index 12e44a677d..4a1d2b8f32 100644 --- a/Source/Core/VideoCommon/TextureDecoder_x64.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_x64.cpp @@ -167,7 +167,7 @@ static inline void DecodeBytes_IA4(u32* dst, const u8* src) } #ifdef CHECK -static inline u32 makeRGBA(int r, int g, int b, int a) +static inline u32 MakeRGBA(int r, int g, int b, int a) { return (a << 24) | (b << 16) | (g << 8) | r; } @@ -189,6 +189,7 @@ static void DecodeDXTBlock(u32* dst, const DXTBlock* src, int pitch) colors[1] = MakeRGBA(red2, green2, blue2, 255); if (c1 > c2) { + // Approximation of x/3: 3/8 (1/2 - 1/8) int blue3 = ((blue2 - blue1) >> 1) - ((blue2 - blue1) >> 3); int green3 = ((green2 - green1) >> 1) - ((green2 - green1) >> 3); int red3 = ((red2 - red1) >> 1) - ((red2 - red1) >> 3); @@ -197,9 +198,12 @@ static void DecodeDXTBlock(u32* dst, const DXTBlock* src, int pitch) } else { - colors[2] = MakeRGBA((red1 + red2 + 1) / 2, // Average - (green1 + green2 + 1) / 2, (blue1 + blue2 + 1) / 2, 255); - colors[3] = MakeRGBA(red2, green2, blue2, 0); // Color2 but transparent + // color[3] is the same as color[2] (average of both colors), but transparent. + // This differs from DXT1 where color[3] is transparent black. + colors[2] = + MakeRGBA((red1 + red2 + 1) / 2, (green1 + green2 + 1) / 2, (blue1 + blue2 + 1) / 2, 255); + colors[3] = + MakeRGBA((red1 + red2 + 1) / 2, (green1 + green2 + 1) / 2, (blue1 + blue2 + 1) / 2, 0); } for (int y = 0; y < 4; y++) @@ -1225,9 +1229,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int const __m128i rrggbb21 = _mm_avg_epu16(rrggbb0, rrggbb1); const __m128i rgb210 = _mm_srli_si128(_mm_packus_epi16(rrggbb21, rrggbb21), 8); rgb2 = rgb210; - rgb3 = _mm_and_si128( - _mm_srli_si128(_mm_shuffle_epi32(argb888x4, _MM_SHUFFLE(1, 1, 1, 1)), 8), - _mm_srli_epi32(allFFs128, 8)); + rgb3 = _mm_and_si128(rgb210, _mm_srli_epi32(allFFs128, 8)); } // if (rgb0 > rgb1): @@ -1262,7 +1264,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int // 0x00FFFFFF) // Make this color fully transparent: rgb3 = _mm_or_si128(rgb3, - _mm_and_si128(_mm_and_si128(rgb1, _mm_srli_epi32(allFFs128, 8)), + _mm_and_si128(_mm_and_si128(rgb2, _mm_srli_epi32(allFFs128, 8)), _mm_slli_si128(allFFs128, 8))); } @@ -1284,8 +1286,12 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int // REFERENCE: u32 tmp0[4][4], tmp1[4][4]; - DecodeDXTBlock(&(tmp0[0][0]), (const DXTBlock*)src, 4); - DecodeDXTBlock(&(tmp1[0][0]), (const DXTBlock*)(src + 8), 4); + DecodeDXTBlock(&(tmp0[0][0]), + reinterpret_cast(src + sizeof(DXTBlock) * 2 * xStep), + 4); + DecodeDXTBlock( + &(tmp1[0][0]), + reinterpret_cast((src + sizeof(DXTBlock) * 2 * xStep) + 8), 4); #endif u32* dst32 = (dst + (y + z * 4) * width + x);