From c0a4760f0efbb99274c33f30154a7f43aab70494 Mon Sep 17 00:00:00 2001 From: magumagu Date: Mon, 26 Jan 2015 15:33:23 -0800 Subject: [PATCH 1/3] Decode EFB copies used as paletted textures. A number of games make an EFB copy in I4/I8 format, then use it as a texture in C4/C8 format. Detect when this happens, and decode the copy on the GPU using the specified palette. This has a few advantages: it allows using EFB2Tex for a few more games, it, it preserves the resolution of scaled EFB copies, and it's probably a bit faster. D3D only at the moment, but porting to OpenGL should be straightforward.. --- .../Core/VideoBackends/D3D/TextureCache.cpp | 160 ++++++++++++++- Source/Core/VideoBackends/D3D/TextureCache.h | 7 + Source/Core/VideoBackends/D3D/main.cpp | 1 + .../Core/VideoBackends/OGL/TextureCache.cpp | 12 +- Source/Core/VideoBackends/OGL/TextureCache.h | 1 + Source/Core/VideoBackends/OGL/main.cpp | 1 + Source/Core/VideoCommon/TextureCacheBase.cpp | 187 +++++++++++------- Source/Core/VideoCommon/TextureCacheBase.h | 61 +++++- Source/Core/VideoCommon/VertexManagerBase.cpp | 2 + Source/Core/VideoCommon/VideoConfig.h | 1 + 10 files changed, 352 insertions(+), 81 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp index 73d45f9ad2..45b2cadee8 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D/TextureCache.cpp @@ -4,6 +4,7 @@ #include "Core/HW/Memmap.h" #include "VideoBackends/D3D/D3DBase.h" +#include "VideoBackends/D3D/D3DShader.h" #include "VideoBackends/D3D/D3DState.h" #include "VideoBackends/D3D/D3DUtil.h" #include "VideoBackends/D3D/FramebufferManager.h" @@ -14,6 +15,7 @@ #include "VideoBackends/D3D/TextureEncoder.h" #include "VideoBackends/D3D/VertexShaderCache.h" #include "VideoCommon/ImageWrite.h" +#include "VideoCommon/LookUpTables.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/VideoConfig.h" @@ -179,17 +181,167 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo size_in_bytes = (u32)encoded_size; - TextureCache::MakeRangeDynamic(addr, (u32)encoded_size); + TextureCache::MakeRangeDynamic(dstAddr, (u32)encoded_size); this->hash = hash; } } +const char palette_shader[] = +R"HLSL( +sampler samp0 : register(s0); +Texture2DArray Tex0 : register(t0); +Buffer Tex1 : register(t1); +uniform float Multiply; + +uint Convert3To8(uint v) +{ + // Swizzle bits: 00000123 -> 12312312 + return (v << 5) | (v << 2) | (v >> 1); +} + +uint Convert4To8(uint v) +{ + // Swizzle bits: 00001234 -> 12341234 + return (v << 4) | v; +} + +uint Convert5To8(uint v) +{ + // Swizzle bits: 00012345 -> 12345123 + return (v << 3) | (v >> 2); +} + +uint Convert6To8(uint v) +{ + // Swizzle bits: 00123456 -> 12345612 + return (v << 2) | (v >> 4); +} + +float4 DecodePixel_RGB5A3(uint val) +{ + int r,g,b,a; + if ((val&0x8000)) + { + r=Convert5To8((val>>10) & 0x1f); + g=Convert5To8((val>>5 ) & 0x1f); + b=Convert5To8((val ) & 0x1f); + a=0xFF; + } + else + { + a=Convert3To8((val>>12) & 0x7); + r=Convert4To8((val>>8 ) & 0xf); + g=Convert4To8((val>>4 ) & 0xf); + b=Convert4To8((val ) & 0xf); + } + return float4(r, g, b, a) / 255; +} + +float4 DecodePixel_RGB565(uint val) +{ + int r, g, b, a; + r = Convert5To8((val >> 11) & 0x1f); + g = Convert6To8((val >> 5) & 0x3f); + b = Convert5To8((val) & 0x1f); + a = 0xFF; + return float4(r, g, b, a) / 255; +} + +float4 DecodePixel_IA8(uint val) +{ + int i = val & 0xFF; + int a = val >> 8; + return float4(i, i, i, a) / 255; +} + +void main( + out float4 ocol0 : SV_Target, + in float4 pos : SV_Position, + in float3 uv0 : TEXCOORD0) +{ + uint src = round(Tex0.Sample(samp0,uv0) * Multiply).r; + src = Tex1.Load(src); + src = ((src << 8) & 0xFF00) | (src >> 8); + ocol0 = DECODE(src); +} +)HLSL"; + +void TextureCache::ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format) +{ + g_renderer->ResetAPIState(); + + // stretch picture with increased internal resolution + const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, (float)unconverted->config.width, (float)unconverted->config.height); + D3D::context->RSSetViewports(1, &vp); + + D3D11_BOX box{ 0, 0, 0, 512, 1, 1 }; + D3D::context->UpdateSubresource(palette_buf, 0, &box, palette, 0, 0); + + D3D::stateman->SetTexture(1, palette_buf_srv); + + // TODO: Add support for C14X2 format. (Different multiplier, more palette entries.) + float params[4] = { unconverted->format == 0 ? 15.f : 255.f }; + D3D::context->UpdateSubresource(palette_uniform, 0, nullptr, ¶ms, 0, 0); + D3D::stateman->SetPixelConstants(palette_uniform); + + const D3D11_RECT sourcerect = CD3D11_RECT(0, 0, unconverted->config.width, unconverted->config.height); + + D3D::SetPointCopySampler(); + + // Make sure we don't draw with the texture set as both a source and target. + // (This can happen because we don't unbind textures when we free them.) + D3D::stateman->UnsetTexture(static_cast(entry)->texture->GetSRV()); + + D3D::context->OMSetRenderTargets(1, &static_cast(entry)->texture->GetRTV(), nullptr); + + // Create texture copy + D3D::drawShadedTexQuad( + static_cast(unconverted)->texture->GetSRV(), + &sourcerect, unconverted->config.width, unconverted->config.height, + palette_pixel_shader[format], + VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(), + GeometryShaderCache::GetCopyGeometryShader()); + + D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); + + g_renderer->RestoreAPIState(); +} + +ID3D11PixelShader *GetConvertShader(const char* Type) +{ + std::string shader = "#define DECODE DecodePixel_"; + shader.append(Type); + shader.append("\n"); + shader.append(palette_shader); + return D3D::CompileAndCreatePixelShader(shader); +} + TextureCache::TextureCache() { // FIXME: Is it safe here? g_encoder = new PSTextureEncoder; g_encoder->Init(); + + palette_buf = nullptr; + palette_buf_srv = nullptr; + palette_uniform = nullptr; + palette_pixel_shader[GX_TL_IA8] = GetConvertShader("IA8"); + palette_pixel_shader[GX_TL_RGB565] = GetConvertShader("RGB565"); + palette_pixel_shader[GX_TL_RGB5A3] = GetConvertShader("RGB5A3"); + auto lutBd = CD3D11_BUFFER_DESC(sizeof(u16) * 256, D3D11_BIND_SHADER_RESOURCE); + HRESULT hr = D3D::device->CreateBuffer(&lutBd, nullptr, &palette_buf); + CHECK(SUCCEEDED(hr), "create palette decoder lut buffer"); + D3D::SetDebugObjectName(palette_buf, "texture decoder lut buffer"); + // TODO: C14X2 format. + auto outlutUavDesc = CD3D11_SHADER_RESOURCE_VIEW_DESC(palette_buf, DXGI_FORMAT_R16_UINT, 0, 256, 0); + hr = D3D::device->CreateShaderResourceView(palette_buf, &outlutUavDesc, &palette_buf_srv); + CHECK(SUCCEEDED(hr), "create palette decoder lut srv"); + D3D::SetDebugObjectName(palette_buf_srv, "texture decoder lut srv"); + const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(16, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); + hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &palette_uniform); + CHECK(SUCCEEDED(hr), "Create palette decoder constant buffer"); + D3D::SetDebugObjectName((ID3D11DeviceChild*)palette_uniform, "a constant buffer used in TextureCache::CopyRenderTargetToTexture"); } TextureCache::~TextureCache() @@ -200,6 +352,12 @@ TextureCache::~TextureCache() g_encoder->Shutdown(); delete g_encoder; g_encoder = nullptr; + + SAFE_RELEASE(palette_buf); + SAFE_RELEASE(palette_buf_srv); + SAFE_RELEASE(palette_uniform); + for (ID3D11PixelShader*& shader : palette_pixel_shader) + SAFE_RELEASE(shader); } } diff --git a/Source/Core/VideoBackends/D3D/TextureCache.h b/Source/Core/VideoBackends/D3D/TextureCache.h index 49dfc15340..bbe8523cbf 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.h +++ b/Source/Core/VideoBackends/D3D/TextureCache.h @@ -42,8 +42,15 @@ private: u64 EncodeToRamFromTexture(u32 address, void* source_texture, u32 SourceW, u32 SourceH, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, const EFBRectangle& source) {return 0;}; + void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format) override; + void CompileShaders() override { } void DeleteShaders() override { } + + ID3D11Buffer* palette_buf; + ID3D11ShaderResourceView* palette_buf_srv; + ID3D11Buffer* palette_uniform; + ID3D11PixelShader* palette_pixel_shader[3]; }; } diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp index 57e47fd629..f554544953 100644 --- a/Source/Core/VideoBackends/D3D/main.cpp +++ b/Source/Core/VideoBackends/D3D/main.cpp @@ -82,6 +82,7 @@ void InitBackendInfo() g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupports3DVision = true; g_Config.backend_info.bSupportsPostProcessing = false; + g_Config.backend_info.bSupportsPaletteConversion = true; IDXGIFactory* factory; IDXGIAdapter* ad; diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp index 83c4af7a98..e579909a9d 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp @@ -204,7 +204,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo if (false == g_ActiveConfig.bCopyEFBToTexture) { int encoded_size = TextureConverter::EncodeToRamFromTexture( - addr, + dstAddr, read_texture, srcFormat == PEControl::Z24, isIntensity, @@ -212,12 +212,12 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo scaleByHalf, srcRect); - u8* dst = Memory::GetPointer(addr); + u8* dst = Memory::GetPointer(dstAddr); u64 const new_hash = GetHash64(dst,encoded_size,g_ActiveConfig.iSafeTextureCache_ColorSamples); size_in_bytes = (u32)encoded_size; - TextureCache::MakeRangeDynamic(addr,encoded_size); + TextureCache::MakeRangeDynamic(dstAddr, encoded_size); hash = new_hash; } @@ -359,4 +359,10 @@ void TextureCache::DeleteShaders() s_DepthMatrixProgram.Destroy(); } +void TextureCache::ConvertTexture(TextureCache::TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format) +{ + // TODO: Implement. + return; +} + } diff --git a/Source/Core/VideoBackends/OGL/TextureCache.h b/Source/Core/VideoBackends/OGL/TextureCache.h index a855b450b6..bdf6fd6309 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.h +++ b/Source/Core/VideoBackends/OGL/TextureCache.h @@ -48,6 +48,7 @@ private: ~TextureCache(); TCacheEntryBase* CreateTexture(const TCacheEntryConfig& config) override; + void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format) override; void CompileShaders() override; void DeleteShaders() override; diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 5bea0c239c..83ab7593e7 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -140,6 +140,7 @@ static void InitBackendInfo() g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupports3DVision = false; g_Config.backend_info.bSupportsPostProcessing = true; + g_Config.backend_info.bSupportsPaletteConversion = false; g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index ac47e1929f..6ab9705c4c 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -32,6 +32,7 @@ size_t TextureCache::temp_size; TextureCache::TexCache TextureCache::textures; TextureCache::TexPool TextureCache::texture_pool; +TextureCache::TCacheEntryBase* TextureCache::bound_textures[8]; TextureCache::BackupConfig TextureCache::backup_config; @@ -74,6 +75,8 @@ void TextureCache::RequestInvalidateTextureCache() void TextureCache::Invalidate() { + UnbindTextures(); + for (auto& tex : textures) { delete tex.second; @@ -143,7 +146,7 @@ void TextureCache::Cleanup(int _frameCount) } if (_frameCount > TEXTURE_KILL_THRESHOLD + iter->second->frameCount && // EFB copies living on the host GPU are unrecoverable and thus shouldn't be deleted - !iter->second->IsEfbCopy()) + !iter->second->IsUnrecoverable()) { FreeTexture(iter->second); iter = textures.erase(iter); @@ -174,17 +177,17 @@ void TextureCache::Cleanup(int _frameCount) } } -void TextureCache::InvalidateRange(u32 start_address, u32 size) +void TextureCache::MakeRangeDynamic(u32 start_address, u32 size) { TexCache::iterator - iter = textures.begin(), - tcend = textures.end(); - while (iter != tcend) + iter = textures.begin(); + + while (iter != textures.end()) { if (iter->second->OverlapsMemoryRange(start_address, size)) { FreeTexture(iter->second); - textures.erase(iter++); + iter = textures.erase(iter); } else { @@ -193,55 +196,21 @@ void TextureCache::InvalidateRange(u32 start_address, u32 size) } } -void TextureCache::MakeRangeDynamic(u32 start_address, u32 size) -{ - TexCache::iterator - iter = textures.lower_bound(start_address), - tcend = textures.upper_bound(start_address + size); - - if (iter != textures.begin()) - --iter; - - for (; iter != tcend; ++iter) - { - if (iter->second->OverlapsMemoryRange(start_address, size)) - { - iter->second->SetHashes(TEXHASH_INVALID); - } - } -} - bool TextureCache::TCacheEntryBase::OverlapsMemoryRange(u32 range_address, u32 range_size) const { - if (addr + size_in_bytes <= range_address) + if (!addr.HasMemAddress()) return false; - if (addr >= range_address + range_size) + u32 memaddr = addr.GetMemAddress(); + if (memaddr + size_in_bytes <= range_address) + return false; + + if (memaddr >= range_address + range_size) return false; return true; } -void TextureCache::ClearRenderTargets() -{ - TexCache::iterator - iter = textures.begin(), - tcend = textures.end(); - - while (iter != tcend) - { - if (iter->second->IsEfbCopy()) - { - FreeTexture(iter->second); - textures.erase(iter++); - } - else - { - ++iter; - } - } -} - void TextureCache::DumpTexture(TCacheEntryBase* entry, std::string basename, unsigned int level) { std::string szDir = File::GetUserPath(D_DUMPTEXTURES_IDX) + @@ -267,16 +236,30 @@ static u32 CalculateLevelSize(u32 level_0_size, u32 level) } // Used by TextureCache::Load -static TextureCache::TCacheEntryBase* ReturnEntry(unsigned int stage, TextureCache::TCacheEntryBase* entry) +TextureCache::TCacheEntryBase* TextureCache::ReturnEntry(unsigned int stage, TCacheEntryBase* entry) { entry->frameCount = FRAMECOUNT_INVALID; - entry->Bind(stage); + bound_textures[stage] = entry; GFX_DEBUGGER_PAUSE_AT(NEXT_TEXTURE_CHANGE, true); return entry; } +void TextureCache::BindTextures() +{ + for (int i = 0; i < 8; ++i) + { + if (bound_textures[i]) + bound_textures[i]->Bind(i); + } +} + +void TextureCache::UnbindTextures() +{ + std::fill(std::begin(bound_textures), std::end(bound_textures), nullptr); +} + TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) { const FourTexUnits &tex = bpmem.tex[stage >> 2]; @@ -309,6 +292,11 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) u32 full_format = texformat; const bool isPaletteTexture = (texformat == GX_TF_C4 || texformat == GX_TF_C8 || texformat == GX_TF_C14X2); + + // Reject invalid tlut format. + if (isPaletteTexture && tlutfmt > GX_TL_RGB5A3) + return nullptr; + if (isPaletteTexture) full_format = texformat | (tlutfmt << 16); @@ -323,19 +311,43 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) // TODO: This doesn't hash GB tiles for preloaded RGBA8 textures (instead, it's hashing more data from the low tmem bank than it should) tex_hash = GetHash64(src_data, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples); u32 palette_size = 0; + u64 tlut_hash = 0; if (isPaletteTexture) { palette_size = TexDecoder_GetPaletteSize(texformat); - u64 tlut_hash = GetHash64(&texMem[tlutaddr], palette_size, g_ActiveConfig.iSafeTextureCache_ColorSamples); - - // Mix the tlut hash into the texture hash. So we only have to compare it once. - tex_hash ^= tlut_hash; + tlut_hash = GetHash64(&texMem[tlutaddr], palette_size, g_ActiveConfig.iSafeTextureCache_ColorSamples); } // GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in the mipmap chain // e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,0x0, so we limit the mipmap count to 6 there tex_levels = std::min(IntLog2(std::max(width, height)) + 1, tex_levels); + // Compute a texture ID; this isn't everything about a texture, rather just + // enough to group together textures with related memory addresses. + TextureAddress texID; + TextureAddress paletteDecodedID; + if (from_tmem) + { + u32 tmem_addr = bpmem.tex[stage / 4].texImage1[stage % 4].tmem_even * TMEM_LINE_SIZE; + if (texformat == GX_TF_RGBA8 && from_tmem) + { + u32 tmem_odd_addr = bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE; + texID = TextureAddress::TMemRGBA8(tmem_addr, tmem_odd_addr); + } + else + { + texID = TextureAddress::TMem(tmem_addr); + if (isPaletteTexture) + paletteDecodedID = TextureAddress::TMemPalette(tmem_addr, tlutaddr); + } + } + else + { + texID = TextureAddress::Mem(address); + if (isPaletteTexture) + paletteDecodedID = TextureAddress::MemPalette(address, tlutaddr); + } + // Find all texture cache entries for the current texture address, and decide whether to use one of // them, or to create a new one // @@ -360,7 +372,13 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) // // For efb copies, the entry created in CopyRenderTargetToTexture always has to be used, or else it was // done in vain. - std::pair iter_range = textures.equal_range(address); + std::pair iter_range = textures.equal_range(texID); + bool palette_decoded_entry = false; + if (isPaletteTexture && iter_range.first == iter_range.second) + { + iter_range = textures.equal_range(paletteDecodedID); + palette_decoded_entry = true; + } TexCache::iterator iter = iter_range.first; TexCache::iterator oldest_entry = iter; int temp_frameCount = 0x7fffffff; @@ -370,14 +388,39 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) TCacheEntryBase* entry = iter->second; if (entry->IsEfbCopy()) { - // For EFB copies, only the hash and the texture address need to match. Ignore the hash when - // using EFB to texture, because there's no hash in this case - if (g_ActiveConfig.bCopyEFBToTexture || entry->hash == tex_hash) + // EFB copies have slightly different rules: the hash doesn't need to match + // in EFB2Tex mode, and EFB copy formats have different meanings from texture + // formats. + if (g_ActiveConfig.bCopyEFBToTexture || + (tex_hash == entry->hash && (!isPaletteTexture || g_Config.backend_info.bSupportsPaletteConversion))) { - // TODO: Print a warning if the format changes! In this case, - // we could reinterpret the internal texture object data to the new pixel format - // (similar to what is already being done in Renderer::ReinterpretPixelFormat()) - // TODO: Convert paletted textures, which are efb copies, using the right palette, so they display correctly + // TODO: We should check format/width/height/levels for EFB copies. Checking + // format is complicated because EFB copy formats don't exactly match + // texture formats. I'm not sure what effect checking width/height/levels + // would have. + if (!palette_decoded_entry && isPaletteTexture && g_Config.backend_info.bSupportsPaletteConversion) + { + // Perform palette decoding. + // TODO: Skip decoding if we find a match. + std::pair decoded_iter_range = textures.equal_range(paletteDecodedID); + while (decoded_iter_range.first != decoded_iter_range.second) + { + // Pool this texture and make a new one later. + FreeTexture(decoded_iter_range.first->second); + decoded_iter_range.first = textures.erase(decoded_iter_range.first); + } + + TCacheEntryBase *decoded_entry = AllocateTexture(entry->config); + + decoded_entry->SetGeneralParameters(paletteDecodedID, texture_size, full_format); + decoded_entry->SetDimensions(entry->native_width, entry->native_height, 1); + decoded_entry->SetHashes(TEXHASH_INVALID); + decoded_entry->frameCount = FRAMECOUNT_INVALID; + + g_texture_cache->ConvertTexture(decoded_entry, entry, &texMem[tlutaddr], (TlutFormat)tlutfmt); + textures.insert(TexCache::value_type(paletteDecodedID, decoded_entry)); + entry = decoded_entry; + } return ReturnEntry(stage, entry); } else @@ -389,12 +432,14 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) continue; } } - - // For normal textures, all texture parameters need to match - if (entry->hash == tex_hash && entry->format == full_format && entry->native_levels >= tex_levels && - entry->native_width == nativeW && entry->native_height == nativeH) + else { - return ReturnEntry(stage, entry); + // For normal textures, all texture parameters need to match + if (entry->hash == (tex_hash ^ tlut_hash) && entry->format == full_format && entry->native_levels >= tex_levels && + entry->native_width == nativeW && entry->native_height == nativeH) + { + return ReturnEntry(stage, entry); + } } // Find the entry which hasn't been used for the longest time @@ -468,11 +513,11 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) TCacheEntryBase* entry = AllocateTexture(config); GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true); - textures.insert(TexCache::value_type(address, entry)); + textures.insert(TexCache::value_type(isPaletteTexture ? paletteDecodedID : texID, entry)); - entry->SetGeneralParameters(address, texture_size, full_format); + entry->SetGeneralParameters(isPaletteTexture ? paletteDecodedID : texID, texture_size, full_format); entry->SetDimensions(nativeW, nativeH, tex_levels); - entry->hash = tex_hash; + entry->hash = tex_hash ^ tlut_hash; // load texture entry->Load(width, height, expandedWidth, 0); @@ -828,7 +873,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat unsigned int scaled_tex_h = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledY(tex_h) : tex_h; // remove all texture cache entries at dstAddr - std::pair iter_range = textures.equal_range(dstAddr); + std::pair iter_range = textures.equal_range(TextureAddress::Mem(dstAddr)); TexCache::iterator iter = iter_range.first; while (iter != iter_range.second) { @@ -846,7 +891,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat TCacheEntryBase* entry = AllocateTexture(config); // TODO: Using the wrong dstFormat, dumb... - entry->SetGeneralParameters(dstAddr, 0, dstFormat); + entry->SetGeneralParameters(TextureAddress::Mem(dstAddr), 0, dstFormat); entry->SetDimensions(tex_w, tex_h, 1); entry->SetHashes(TEXHASH_INVALID); @@ -854,7 +899,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat entry->FromRenderTarget(dstAddr, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf, cbufid, colmat); - textures.insert(TexCache::value_type(dstAddr, entry)); + textures.insert(TexCache::value_type(TextureAddress::Mem(dstAddr), entry)); } TextureCache::TCacheEntryBase* TextureCache::AllocateTexture(const TCacheEntryConfig& config) diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 0ef6a1ff54..1cc51e2000 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -43,13 +43,56 @@ public: }; }; - + class TextureAddress + { + u32 address1; + u32 address2; + enum AddressKind + { + // A texture in RAM + RAM, + // A texture loaded into TMEM + TMEM, + // A texture in RAM, fully decoded using a palette. + RAM_PALETTE, + // An RGBA8 texture in TMEM. + TMEM_RGBA8, + // A palette texture in TMEM. + TMEM_PALETTE, + // Uninitialized address. + INVALID + }; + AddressKind kind; + TextureAddress(u32 a, u32 b, AddressKind k) : address1(a), address2(b), kind(k) {} + public: + TextureAddress() : kind(INVALID), address1(0), address2(0) {} + static TextureAddress Mem(u32 a) { return TextureAddress(a, 0, RAM); } + static TextureAddress MemPalette(u32 a, u32 b) { return TextureAddress(a, b, RAM_PALETTE); } + static TextureAddress TMem(u32 a) { return TextureAddress(a, 0, TMEM); } + static TextureAddress TMemRGBA8(u32 a, u32 b) { return TextureAddress(a, b, TMEM_RGBA8); } + static TextureAddress TMemPalette(u32 a, u32 b) { return TextureAddress(a, b, TMEM_PALETTE); } + bool operator == (const TextureAddress& b) const + { + return kind == b.kind && address1 == b.address1 && address2 == b.address2; + } + bool operator < (const TextureAddress& b) const + { + if (kind != b.kind) + return kind < b.kind; + if (address1 != b.address1) + return address1 < b.address1; + return address2 < b.address2; + } + bool IsMemOnlyAddress() const { return kind == RAM; } + bool HasMemAddress() const { return kind == RAM || kind == RAM_PALETTE; } + u32 GetMemAddress() const { return address1; } + }; struct TCacheEntryBase { const TCacheEntryConfig config; // common members - u32 addr; + TextureAddress addr; u32 size_in_bytes; u64 hash; u32 format; @@ -61,7 +104,7 @@ public: int frameCount; - void SetGeneralParameters(u32 _addr, u32 _size, u32 _format) + void SetGeneralParameters(TextureAddress _addr, u32 _size, u32 _format) { addr = _addr; size_in_bytes = _size; @@ -96,6 +139,7 @@ public: bool OverlapsMemoryRange(u32 range_address, u32 range_size) const; bool IsEfbCopy() { return config.rendertarget; } + bool IsUnrecoverable() { return IsEfbCopy() && addr.IsMemOnlyAddress(); } }; virtual ~TextureCache(); // needs virtual for DX11 dtor @@ -107,9 +151,7 @@ public: static void Cleanup(int frameCount); static void Invalidate(); - static void InvalidateRange(u32 start_address, u32 size); static void MakeRangeDynamic(u32 start_address, u32 size); - static void ClearRenderTargets(); // currently only used by OGL virtual TCacheEntryBase* CreateTexture(const TCacheEntryConfig& config) = 0; @@ -117,11 +159,15 @@ public: virtual void DeleteShaders() = 0; // currently only implemented by OGL static TCacheEntryBase* Load(const u32 stage); + static void UnbindTextures(); + static void BindTextures(); static void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf); static void RequestInvalidateTextureCache(); + virtual void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format) = 0; + protected: TextureCache(); @@ -135,11 +181,14 @@ private: static TCacheEntryBase* AllocateTexture(const TCacheEntryConfig& config); static void FreeTexture(TCacheEntryBase* entry); - typedef std::multimap TexCache; + static TCacheEntryBase* ReturnEntry(unsigned int stage, TCacheEntryBase* entry); + + typedef std::multimap TexCache; typedef std::unordered_multimap TexPool; static TexCache textures; static TexPool texture_pool; + static TCacheEntryBase* bound_textures[8]; // Backup configuration values static struct BackupConfig diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 2ebb42122f..bf7800cc9a 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -209,6 +209,7 @@ void VertexManager::Flush() if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true; + TextureCache::UnbindTextures(); for (unsigned int i : usedtextures) { g_renderer->SetSamplerState(i & 3, i >> 2); @@ -224,6 +225,7 @@ void VertexManager::Flush() ERROR_LOG(VIDEO, "error loading texture"); } } + TextureCache::BindTextures(); } // set global vertex constants diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 38e8047f75..760a068c0f 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -159,6 +159,7 @@ struct VideoConfig final bool bSupportsBBox; bool bSupportsGSInstancing; // Needed by GeometryShaderGen, so must stay in VideoCommon bool bSupportsPostProcessing; + bool bSupportsPaletteConversion; } backend_info; // Utility From ddc815dd7a77d70e42a18d18443732ec2f6f782a Mon Sep 17 00:00:00 2001 From: magumagu Date: Thu, 19 Feb 2015 15:19:31 -0800 Subject: [PATCH 2/3] Remove TextureAddress struct. --- Source/Core/VideoCommon/TextureCacheBase.cpp | 110 +++++++------------ Source/Core/VideoCommon/TextureCacheBase.h | 54 +-------- 2 files changed, 45 insertions(+), 119 deletions(-) diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 6ab9705c4c..736ab57428 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -146,7 +146,7 @@ void TextureCache::Cleanup(int _frameCount) } if (_frameCount > TEXTURE_KILL_THRESHOLD + iter->second->frameCount && // EFB copies living on the host GPU are unrecoverable and thus shouldn't be deleted - !iter->second->IsUnrecoverable()) + !iter->second->IsEfbCopy()) { FreeTexture(iter->second); iter = textures.erase(iter); @@ -198,14 +198,10 @@ void TextureCache::MakeRangeDynamic(u32 start_address, u32 size) bool TextureCache::TCacheEntryBase::OverlapsMemoryRange(u32 range_address, u32 range_size) const { - if (!addr.HasMemAddress()) + if (addr + size_in_bytes <= range_address) return false; - u32 memaddr = addr.GetMemAddress(); - if (memaddr + size_in_bytes <= range_address) - return false; - - if (memaddr >= range_address + range_size) + if (addr >= range_address + range_size) return false; return true; @@ -322,32 +318,6 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) // e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,0x0, so we limit the mipmap count to 6 there tex_levels = std::min(IntLog2(std::max(width, height)) + 1, tex_levels); - // Compute a texture ID; this isn't everything about a texture, rather just - // enough to group together textures with related memory addresses. - TextureAddress texID; - TextureAddress paletteDecodedID; - if (from_tmem) - { - u32 tmem_addr = bpmem.tex[stage / 4].texImage1[stage % 4].tmem_even * TMEM_LINE_SIZE; - if (texformat == GX_TF_RGBA8 && from_tmem) - { - u32 tmem_odd_addr = bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE; - texID = TextureAddress::TMemRGBA8(tmem_addr, tmem_odd_addr); - } - else - { - texID = TextureAddress::TMem(tmem_addr); - if (isPaletteTexture) - paletteDecodedID = TextureAddress::TMemPalette(tmem_addr, tlutaddr); - } - } - else - { - texID = TextureAddress::Mem(address); - if (isPaletteTexture) - paletteDecodedID = TextureAddress::MemPalette(address, tlutaddr); - } - // Find all texture cache entries for the current texture address, and decide whether to use one of // them, or to create a new one // @@ -372,16 +342,11 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) // // For efb copies, the entry created in CopyRenderTargetToTexture always has to be used, or else it was // done in vain. - std::pair iter_range = textures.equal_range(texID); - bool palette_decoded_entry = false; - if (isPaletteTexture && iter_range.first == iter_range.second) - { - iter_range = textures.equal_range(paletteDecodedID); - palette_decoded_entry = true; - } + std::pair iter_range = textures.equal_range(address); TexCache::iterator iter = iter_range.first; TexCache::iterator oldest_entry = iter; int temp_frameCount = 0x7fffffff; + TexCache::iterator unconverted_copy = textures.end(); while (iter != iter_range.second) { @@ -398,35 +363,21 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) // format is complicated because EFB copy formats don't exactly match // texture formats. I'm not sure what effect checking width/height/levels // would have. - if (!palette_decoded_entry && isPaletteTexture && g_Config.backend_info.bSupportsPaletteConversion) - { - // Perform palette decoding. - // TODO: Skip decoding if we find a match. - std::pair decoded_iter_range = textures.equal_range(paletteDecodedID); - while (decoded_iter_range.first != decoded_iter_range.second) - { - // Pool this texture and make a new one later. - FreeTexture(decoded_iter_range.first->second); - decoded_iter_range.first = textures.erase(decoded_iter_range.first); - } + if (!isPaletteTexture || !g_Config.backend_info.bSupportsPaletteConversion) + return ReturnEntry(stage, entry); - TCacheEntryBase *decoded_entry = AllocateTexture(entry->config); - - decoded_entry->SetGeneralParameters(paletteDecodedID, texture_size, full_format); - decoded_entry->SetDimensions(entry->native_width, entry->native_height, 1); - decoded_entry->SetHashes(TEXHASH_INVALID); - decoded_entry->frameCount = FRAMECOUNT_INVALID; - - g_texture_cache->ConvertTexture(decoded_entry, entry, &texMem[tlutaddr], (TlutFormat)tlutfmt); - textures.insert(TexCache::value_type(paletteDecodedID, decoded_entry)); - entry = decoded_entry; - } - return ReturnEntry(stage, entry); + // Note that we found an unconverted EFB copy, then continue. We'll + // perform the conversion later. Currently, we only convert EFB copies to + // palette textures; we could do other conversions if it proved to be + // beneficial. + unconverted_copy = iter; } else { - // Keeping an unused entry for an efb copy in the cache is pointless, because a new entry - // will be created in CopyRenderTargetToTexture + // Aggressively prune EFB copies: if it isn't useful here, it will probably + // never be useful again. It's theoretically possible for a game to do + // something weird where the copy could become useful in the future, but in + // practice it doesn't happen. FreeTexture(entry); iter = textures.erase(iter); continue; @@ -451,6 +402,23 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) ++iter; } + if (unconverted_copy != textures.end()) + { + // Perform palette decoding. + TCacheEntryBase *entry = unconverted_copy->second; + TCacheEntryBase *decoded_entry = AllocateTexture(entry->config); + + decoded_entry->SetGeneralParameters(address, texture_size, full_format); + decoded_entry->SetDimensions(entry->native_width, entry->native_height, 1); + decoded_entry->SetHashes(tex_hash ^ tlut_hash); + decoded_entry->frameCount = FRAMECOUNT_INVALID; + decoded_entry->is_efb_copy = false; + + g_texture_cache->ConvertTexture(decoded_entry, entry, &texMem[tlutaddr], (TlutFormat)tlutfmt); + textures.insert(TexCache::value_type(address, decoded_entry)); + return ReturnEntry(stage, decoded_entry); + } + // If at least one entry was not used for the same frame, overwrite the oldest one if (temp_frameCount != 0x7fffffff) { @@ -513,11 +481,12 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) TCacheEntryBase* entry = AllocateTexture(config); GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true); - textures.insert(TexCache::value_type(isPaletteTexture ? paletteDecodedID : texID, entry)); + textures.insert(TexCache::value_type(address, entry)); - entry->SetGeneralParameters(isPaletteTexture ? paletteDecodedID : texID, texture_size, full_format); + entry->SetGeneralParameters(address, texture_size, full_format); entry->SetDimensions(nativeW, nativeH, tex_levels); entry->hash = tex_hash ^ tlut_hash; + entry->is_efb_copy = false; // load texture entry->Load(width, height, expandedWidth, 0); @@ -873,7 +842,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat unsigned int scaled_tex_h = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledY(tex_h) : tex_h; // remove all texture cache entries at dstAddr - std::pair iter_range = textures.equal_range(TextureAddress::Mem(dstAddr)); + std::pair iter_range = textures.equal_range(dstAddr); TexCache::iterator iter = iter_range.first; while (iter != iter_range.second) { @@ -891,15 +860,16 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat TCacheEntryBase* entry = AllocateTexture(config); // TODO: Using the wrong dstFormat, dumb... - entry->SetGeneralParameters(TextureAddress::Mem(dstAddr), 0, dstFormat); + entry->SetGeneralParameters(dstAddr, 0, dstFormat); entry->SetDimensions(tex_w, tex_h, 1); entry->SetHashes(TEXHASH_INVALID); entry->frameCount = FRAMECOUNT_INVALID; + entry->is_efb_copy = true; entry->FromRenderTarget(dstAddr, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf, cbufid, colmat); - textures.insert(TexCache::value_type(TextureAddress::Mem(dstAddr), entry)); + textures.insert(TexCache::value_type(dstAddr, entry)); } TextureCache::TCacheEntryBase* TextureCache::AllocateTexture(const TCacheEntryConfig& config) diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 1cc51e2000..0b92fd31d3 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -43,59 +43,16 @@ public: }; }; - class TextureAddress - { - u32 address1; - u32 address2; - enum AddressKind - { - // A texture in RAM - RAM, - // A texture loaded into TMEM - TMEM, - // A texture in RAM, fully decoded using a palette. - RAM_PALETTE, - // An RGBA8 texture in TMEM. - TMEM_RGBA8, - // A palette texture in TMEM. - TMEM_PALETTE, - // Uninitialized address. - INVALID - }; - AddressKind kind; - TextureAddress(u32 a, u32 b, AddressKind k) : address1(a), address2(b), kind(k) {} - public: - TextureAddress() : kind(INVALID), address1(0), address2(0) {} - static TextureAddress Mem(u32 a) { return TextureAddress(a, 0, RAM); } - static TextureAddress MemPalette(u32 a, u32 b) { return TextureAddress(a, b, RAM_PALETTE); } - static TextureAddress TMem(u32 a) { return TextureAddress(a, 0, TMEM); } - static TextureAddress TMemRGBA8(u32 a, u32 b) { return TextureAddress(a, b, TMEM_RGBA8); } - static TextureAddress TMemPalette(u32 a, u32 b) { return TextureAddress(a, b, TMEM_PALETTE); } - bool operator == (const TextureAddress& b) const - { - return kind == b.kind && address1 == b.address1 && address2 == b.address2; - } - bool operator < (const TextureAddress& b) const - { - if (kind != b.kind) - return kind < b.kind; - if (address1 != b.address1) - return address1 < b.address1; - return address2 < b.address2; - } - bool IsMemOnlyAddress() const { return kind == RAM; } - bool HasMemAddress() const { return kind == RAM || kind == RAM_PALETTE; } - u32 GetMemAddress() const { return address1; } - }; struct TCacheEntryBase { const TCacheEntryConfig config; // common members - TextureAddress addr; + u32 addr; u32 size_in_bytes; u64 hash; u32 format; + bool is_efb_copy; unsigned int native_width, native_height; // Texture dimensions from the GameCube's point of view unsigned int native_levels; @@ -104,7 +61,7 @@ public: int frameCount; - void SetGeneralParameters(TextureAddress _addr, u32 _size, u32 _format) + void SetGeneralParameters(u32 _addr, u32 _size, u32 _format) { addr = _addr; size_in_bytes = _size; @@ -138,8 +95,7 @@ public: bool OverlapsMemoryRange(u32 range_address, u32 range_size) const; - bool IsEfbCopy() { return config.rendertarget; } - bool IsUnrecoverable() { return IsEfbCopy() && addr.IsMemOnlyAddress(); } + bool IsEfbCopy() { return is_efb_copy; } }; virtual ~TextureCache(); // needs virtual for DX11 dtor @@ -183,7 +139,7 @@ private: static TCacheEntryBase* ReturnEntry(unsigned int stage, TCacheEntryBase* entry); - typedef std::multimap TexCache; + typedef std::multimap TexCache; typedef std::unordered_multimap TexPool; static TexCache textures; From 074397c12dd95b6c5119054ed6674043fb4641cc Mon Sep 17 00:00:00 2001 From: magumagu Date: Thu, 19 Feb 2015 15:53:52 -0800 Subject: [PATCH 3/3] Explicitly set up AllocateTexture configuration for palette conversion. No functional change. --- Source/Core/VideoCommon/TextureCacheBase.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 736ab57428..62fdfa80d2 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -406,7 +406,13 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) { // Perform palette decoding. TCacheEntryBase *entry = unconverted_copy->second; - TCacheEntryBase *decoded_entry = AllocateTexture(entry->config); + + TCacheEntryConfig config; + config.rendertarget = true; + config.width = entry->config.width; + config.height = entry->config.height; + config.layers = FramebufferManagerBase::GetEFBLayers(); + TCacheEntryBase *decoded_entry = AllocateTexture(config); decoded_entry->SetGeneralParameters(address, texture_size, full_format); decoded_entry->SetDimensions(entry->native_width, entry->native_height, 1);