From 304814e7cca0d9ac1497a955f7a6741a62fe4396 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 9 Jan 2013 10:19:18 +0100 Subject: [PATCH] Video_Software: Implement proper RGBA8 texture loading from tmem. For RGBA8 textures, AR and GB tiles are stored in separate tmem banks. TextureDecoder did not support that previously. --- .../Core/VideoCommon/Src/TextureDecoder.cpp | 22 ++++++++++ Source/Core/VideoCommon/Src/TextureDecoder.h | 1 + .../Plugin_VideoSoftware/Src/BPMemLoader.cpp | 39 +++++++++++++---- .../Src/TextureSampler.cpp | 42 ++++++++++++++----- 4 files changed, 85 insertions(+), 19 deletions(-) diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/TextureDecoder.cpp index 180c901730..362a42e015 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp @@ -2515,6 +2515,28 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth } } +void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* src_gb, int s, int t, int imageWidth) +{ + u16 sBlk = s >> 2; + u16 tBlk = t >> 2; + u16 widthBlks = (imageWidth >> 2) + 1; + u32 base_ar = (tBlk * widthBlks + sBlk) << 4; + u32 base_gb = (tBlk * widthBlks + sBlk) << 4; + u16 blkS = s & 3; + u16 blkT = t & 3; + u32 blk_off = (blkT << 2) + blkS; + + u32 offset_ar = (base_ar + blk_off) << 1; + u32 offset_gb = (base_gb + blk_off) << 1; + const u8* val_addr_ar = src_ar + offset_ar; + const u8* val_addr_gb = src_gb + offset_gb; + + dst[3] = val_addr_ar[0]; // A + dst[0] = val_addr_ar[1]; // R + dst[1] = val_addr_gb[0]; // G + dst[2] = val_addr_gb[1]; // B +} + const char* texfmt[] = { // pixel diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.h b/Source/Core/VideoCommon/Src/TextureDecoder.h index 1901c86d27..dd4b17eea3 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.h +++ b/Source/Core/VideoCommon/Src/TextureDecoder.h @@ -86,6 +86,7 @@ enum PC_TexFormat PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt,bool rgbaOnly = false); PC_TexFormat GetPC_TexFormat(int texformat, int tlutfmt); void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt); +void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* src_gb, int s, int t, int imageWidth); void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center); #endif diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp index 3db8c5d493..6a17a40eb1 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp @@ -115,18 +115,39 @@ void SWBPWritten(int address, int newvalue) case BPMEM_PRELOAD_MODE: if (newvalue != 0) { - // NOTE(neobrain): Apparently tmemodd doesn't affect hardware behavior at all (libogc uses it just as a buffe$ + // TODO: Not quite sure if this is completely correct (likely not) + // NOTE: libogc's implementation of GX_PreloadEntireTexture seems flawed, so it's not necessarily a good reference for RE'ing this feature. + BPS_TmemConfig& tmem_cfg = bpmem.tmem_config; - u8* ram_ptr = Memory::GetPointer(tmem_cfg.preload_addr << 5); - u32 tmem_addr = tmem_cfg.preload_tmem_even * TMEM_LINE_SIZE; - u32 size = tmem_cfg.preload_tile_info.count * 32; + u8* src_ptr = Memory::GetPointer(tmem_cfg.preload_addr << 5); // TODO: Should we add mask here on GC? + u32 size = tmem_cfg.preload_tile_info.count * TMEM_LINE_SIZE; + u32 tmem_addr_even = tmem_cfg.preload_tmem_even * TMEM_LINE_SIZE; - // Check if the game has overflowed TMEM, and copy up to the limit. - // Paper Mario does this when entering the Great Boogly Tree (Chap 2) - if ((tmem_addr + size) > TMEM_SIZE) - size = TMEM_SIZE - tmem_addr; + if (tmem_cfg.preload_tile_info.type != 3) + { + if (tmem_addr_even + size > TMEM_SIZE) + size = TMEM_SIZE - tmem_addr_even; - memcpy(texMem + tmem_addr, ram_ptr, size); + memcpy(texMem + tmem_addr_even, src_ptr, size); + } + else // RGBA8 tiles (and CI14, but that might just be stupid libogc!) + { + // AR and GB tiles are stored in separate TMEM banks => can't use a single memcpy for everything + u32 tmem_addr_odd = tmem_cfg.preload_tmem_odd * TMEM_LINE_SIZE; + + for (int i = 0; i < tmem_cfg.preload_tile_info.count; ++i) + { + if (tmem_addr_even + TMEM_LINE_SIZE > TMEM_SIZE || + tmem_addr_even + TMEM_LINE_SIZE > TMEM_SIZE) + break; + + memcpy(texMem + tmem_addr_even, src_ptr, TMEM_LINE_SIZE); + memcpy(texMem + tmem_addr_odd, src_ptr + TMEM_LINE_SIZE, TMEM_LINE_SIZE); + tmem_addr_even += TMEM_LINE_SIZE; + tmem_addr_odd += TMEM_LINE_SIZE; + src_ptr += TMEM_LINE_SIZE * 2; + } + } } break; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.cpp index 66b6b68e22..56101dc69d 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.cpp @@ -120,10 +120,12 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample) TexImage0& ti0 = texUnit.texImage0[subTexmap]; TexTLUT& texTlut = texUnit.texTlut[subTexmap]; - u8 *imageSrc; + u8 *imageSrc, *imageSrcOdd = NULL; if (texUnit.texImage1[subTexmap].image_type) { imageSrc = &texMem[texUnit.texImage1[subTexmap].tmem_even * TMEM_LINE_SIZE]; + if (ti0.format == GX_TF_RGBA8) + imageSrcOdd = &texMem[texUnit.texImage2[subTexmap].tmem_odd * TMEM_LINE_SIZE]; } else { @@ -190,17 +192,34 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample) WrapCoord(imageSPlus1, tm0.wrap_s, imageWidth); WrapCoord(imageTPlus1, tm0.wrap_t, imageHeight); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); - SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT)); + if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type)) + { + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT)); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); - AddTexel(sampledTex, texel, (fractS) * (128 - fractT)); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + AddTexel(sampledTex, texel, (fractS) * (128 - fractT)); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); - AddTexel(sampledTex, texel, (128 - fractS) * (fractT)); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + AddTexel(sampledTex, texel, (128 - fractS) * (fractT)); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); - AddTexel(sampledTex, texel, (fractS) * (fractT)); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + AddTexel(sampledTex, texel, (fractS) * (fractT)); + } + else + { + TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageS, imageT, imageWidth); + SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT)); + + TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageSPlus1, imageT, imageWidth); + AddTexel(sampledTex, texel, (fractS) * (128 - fractT)); + + TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageS, imageTPlus1, imageWidth); + AddTexel(sampledTex, texel, (128 - fractS) * (fractT)); + + TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageSPlus1, imageTPlus1, imageWidth); + AddTexel(sampledTex, texel, (fractS) * (fractT)); + } sample[0] = (u8)(texel[0] >> 14); sample[1] = (u8)(texel[1] >> 14); @@ -217,7 +236,10 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample) WrapCoord(imageS, tm0.wrap_s, imageWidth); WrapCoord(imageT, tm0.wrap_t, imageHeight); - TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type)) + TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + else + TexDecoder_DecodeTexelRGBA8FromTmem(sample, imageSrc, imageSrcOdd, imageS, imageT, imageWidth); } }