diff --git a/Source/Core/VideoBackends/Software/TextureSampler.cpp b/Source/Core/VideoBackends/Software/TextureSampler.cpp index 60cd2da972..9629a7600b 100644 --- a/Source/Core/VideoBackends/Software/TextureSampler.cpp +++ b/Source/Core/VideoBackends/Software/TextureSampler.cpp @@ -179,18 +179,20 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample) WrapCoord(imageSPlus1, tm0.wrap_s, imageWidth); WrapCoord(imageTPlus1, tm0.wrap_t, imageHeight); + TlutFormat tlutfmt = (TlutFormat) texTlut.tlut_format; + if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type)) { - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, tlutfmt); SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT)); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlutAddress, tlutfmt); AddTexel(sampledTex, texel, (fractS) * (128 - fractT)); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlutAddress, tlutfmt); AddTexel(sampledTex, texel, (128 - fractS) * (fractT)); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlutAddress, tlutfmt); AddTexel(sampledTex, texel, (fractS) * (fractT)); } else @@ -224,7 +226,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample) WrapCoord(imageT, tm0.wrap_t, imageHeight); if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type)) - TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, (TlutFormat) texTlut.tlut_format); else TexDecoder_DecodeTexelRGBA8FromTmem(sample, imageSrc, imageSrcOdd, imageS, imageT, imageWidth); } diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index ad414e36cc..cbfdb47d62 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -490,7 +490,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage, { if (!(texformat == GX_TF_RGBA8 && from_tmem)) { - pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlutaddr, tlutfmt); + pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlutaddr, (TlutFormat) tlutfmt); } else { @@ -566,7 +566,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage, const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) : src_data; - TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlutaddr, tlutfmt); + TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlutaddr, (TlutFormat) tlutfmt); mip_src_data += TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat); entry->Load(mip_width, mip_height, expanded_mip_width, level); diff --git a/Source/Core/VideoCommon/TextureDecoder.h b/Source/Core/VideoCommon/TextureDecoder.h index c8ffa0a885..9838881ce6 100644 --- a/Source/Core/VideoCommon/TextureDecoder.h +++ b/Source/Core/VideoCommon/TextureDecoder.h @@ -52,6 +52,13 @@ enum TextureFormat GX_CTF_Z16L = 0xC | _GX_TF_ZTF | _GX_TF_CTF, }; +enum TlutFormat +{ + GX_TL_IA8 = 0x0, + GX_TL_RGB565 = 0x1, + GX_TL_RGB5A3 = 0x2, +}; + int TexDecoder_GetTexelSizeInNibbles(int format); int TexDecoder_GetTextureSizeInBytes(int width, int height, int format); int TexDecoder_GetBlockWidthInTexels(u32 format); @@ -71,12 +78,12 @@ enum PC_TexFormat PC_TEX_FMT_DXT1, }; -PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt); +PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt); PC_TexFormat TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8 *src_ar, const u8 *src_gb, int width, int height); -void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt); +void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, TlutFormat tlutfmt); void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* src_gb, int s, int t, int imageWidth); void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center); /* Internal method, implemented by TextureDecoder_Generic and TextureDecoder_x64. */ -PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, int tlutfmt); +PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt); diff --git a/Source/Core/VideoCommon/TextureDecoder_Common.cpp b/Source/Core/VideoCommon/TextureDecoder_Common.cpp index 63e6dc672e..4cb8167104 100644 --- a/Source/Core/VideoCommon/TextureDecoder_Common.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_Common.cpp @@ -242,7 +242,7 @@ static void TexDecoder_DrawOverlay(u8 *dst, int width, int height, int texformat } } -PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt) +PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt) { PC_TexFormat pc_texformat = _TexDecoder_DecodeImpl((u32*)dst, src, width, height, texformat, tlutaddr, tlutfmt); @@ -301,7 +301,7 @@ struct DXTBlock u8 lines[4]; }; -void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt) +void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, TlutFormat tlutfmt) { /* General formula for computing texture offset // @@ -334,13 +334,13 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth switch (tlutfmt) { - case 0: + case GX_TL_IA8: *((u32*)dst) = decodeIA8Swapped(tlut[val]); break; - case 1: + case GX_TL_RGB565: *((u32*)dst) = decode565RGBA(Common::swap16(tlut[val])); break; - case 2: + case GX_TL_RGB5A3: *((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val])); break; } @@ -399,13 +399,13 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth switch (tlutfmt) { - case 0: + case GX_TL_IA8: *((u32*)dst) = decodeIA8Swapped(tlut[val]); break; - case 1: + case GX_TL_RGB565: *((u32*)dst) = decode565RGBA(Common::swap16(tlut[val])); break; - case 2: + case GX_TL_RGB5A3: *((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val])); break; } @@ -464,13 +464,13 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth switch (tlutfmt) { - case 0: + case GX_TL_IA8: *((u32*)dst) = decodeIA8Swapped(tlut[val]); break; - case 1: + case GX_TL_RGB565: *((u32*)dst) = decode565RGBA(Common::swap16(tlut[val])); break; - case 2: + case GX_TL_RGB5A3: *((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val])); break; } diff --git a/Source/Core/VideoCommon/TextureDecoder_Generic.cpp b/Source/Core/VideoCommon/TextureDecoder_Generic.cpp index 3808483742..3dff970afa 100644 --- a/Source/Core/VideoCommon/TextureDecoder_Generic.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_Generic.cpp @@ -246,7 +246,7 @@ static void decodeDXTBlockRGBA(u32 *dst, const DXTBlock *src, int pitch) // TODO: complete SSE2 optimization of less often used texture formats. // TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads. -PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, int tlutfmt) +PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt) { const int Wsteps4 = (width + 3) / 4; @@ -255,7 +255,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he switch (texformat) { case GX_TF_C4: - if (tlutfmt == 2) + if (tlutfmt == GX_TL_RGB5A3) { // Special decoding is required for TLUT format 5A3 for (int y = 0; y < height; y += 8) @@ -263,14 +263,14 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); } - else if (tlutfmt == 0) + else if (tlutfmt == GX_TL_IA8) { for (int y = 0; y < height; y += 8) for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); } - else + else if (tlutfmt == GX_TL_RGB565) { for (int y = 0; y < height; y += 8) for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) @@ -317,7 +317,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he } break; case GX_TF_C8: - if (tlutfmt == 2) + if (tlutfmt == GX_TL_RGB5A3) { // Special decoding is required for TLUT format 5A3 for (int y = 0; y < height; y += 4) @@ -325,14 +325,14 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); } - else if (tlutfmt == 0) + else if (tlutfmt == GX_TL_IA8) { for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); } - else + else if (tlutfmt == GX_TL_RGB565) { for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) @@ -366,7 +366,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he } break; case GX_TF_C14X2: - if (tlutfmt == 2) + if (tlutfmt == GX_TL_RGB5A3) { // Special decoding is required for TLUT format 5A3 for (int y = 0; y < height; y += 4) @@ -374,14 +374,14 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); } - else if (tlutfmt == 0) + else if (tlutfmt == GX_TL_IA8) { for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); } - else + else if (tlutfmt == GX_TL_RGB565) { for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) diff --git a/Source/Core/VideoCommon/TextureDecoder_x64.cpp b/Source/Core/VideoCommon/TextureDecoder_x64.cpp index 0e7f284b2b..867566032b 100644 --- a/Source/Core/VideoCommon/TextureDecoder_x64.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_x64.cpp @@ -287,7 +287,7 @@ static inline void SetOpenMPThreadCount(int width, int height) // TODO: complete SSE2 optimization of less often used texture formats. // TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads. -PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, int tlutfmt) +PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt) { SetOpenMPThreadCount(width, height); @@ -297,7 +297,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he switch (texformat) { case GX_TF_C4: - if (tlutfmt == 2) + if (tlutfmt == GX_TL_RGB5A3) { // Special decoding is required for TLUT format 5A3 #pragma omp parallel for @@ -306,7 +306,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); } - else if (tlutfmt == 0) + else if (tlutfmt == GX_TL_IA8) { #pragma omp parallel for for (int y = 0; y < height; y += 8) @@ -315,7 +315,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); } - else + else if (tlutfmt == GX_TL_RGB565) { #pragma omp parallel for for (int y = 0; y < height; y += 8) @@ -555,7 +555,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he } break; case GX_TF_C8: - if (tlutfmt == 2) + if (tlutfmt == GX_TL_RGB5A3) { // Special decoding is required for TLUT format 5A3 #pragma omp parallel for @@ -564,7 +564,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); } - else if (tlutfmt == 0) + else if (tlutfmt == GX_TL_IA8) { #pragma omp parallel for for (int y = 0; y < height; y += 4) @@ -573,7 +573,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); } - else + else if (tlutfmt == GX_TL_RGB565) { #pragma omp parallel for for (int y = 0; y < height; y += 4) @@ -668,7 +668,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he } break; case GX_TF_C14X2: - if (tlutfmt == 2) + if (tlutfmt == GX_TL_RGB5A3) { // Special decoding is required for TLUT format 5A3 #pragma omp parallel for @@ -677,7 +677,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); } - else if (tlutfmt == 0) + else if (tlutfmt == GX_TL_IA8) { #pragma omp parallel for for (int y = 0; y < height; y += 4) @@ -685,7 +685,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); } - else + else if (tlutfmt == GX_TL_RGB565) { #pragma omp parallel for for (int y = 0; y < height; y += 4)