From dfae87d69a7c7136b4f7b8af45af62bb45967e4f Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 14 Jul 2019 13:36:41 +1000 Subject: [PATCH 1/3] TextureConversionShader: Swap bytes for RG8/GB8 format Matches the software renderer. --- Source/Core/VideoCommon/TextureConversionShader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 69a42e26d5..9348413fd1 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -824,13 +824,13 @@ const char* GenerateEncodingShader(const EFBCopyParams& params, APIType api_type if (params.depth) WriteZ16Encoder(p, api_type, params); // Z16H else - WriteCC8Encoder(p, "rg", api_type, params); + WriteCC8Encoder(p, "gr", api_type, params); break; case EFBCopyFormat::GB8: if (params.depth) WriteZ16LEncoder(p, api_type, params); // Z16L else - WriteCC8Encoder(p, "gb", api_type, params); + WriteCC8Encoder(p, "bg", api_type, params); break; case EFBCopyFormat::XFB: WriteXFBEncoder(p, api_type, params); From 77f406c8a8dc638bd6404dd71b7537eed9498b69 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 14 Jul 2019 14:07:37 +1000 Subject: [PATCH 2/3] TextureCache: Combine ApplyPaletteToEntry and ConvertTexture --- Source/Core/VideoCommon/TextureCacheBase.cpp | 85 +++++++++----------- Source/Core/VideoCommon/TextureCacheBase.h | 4 - 2 files changed, 38 insertions(+), 51 deletions(-) diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 490b51683e..f21e8e0e0e 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -252,6 +252,8 @@ void TextureCacheBase::SetBackupConfig(const VideoConfig& config) TextureCacheBase::TCacheEntry* TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt) { + DEBUG_ASSERT(g_ActiveConfig.backend_info.bSupportsPaletteConversion); + TextureConfig new_config = entry->texture->GetConfig(); new_config.levels = 1; new_config.flags |= AbstractTextureFlag_RenderTarget; @@ -269,8 +271,42 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTForma decoded_entry->SetNotCopy(); decoded_entry->may_have_overlapping_textures = entry->may_have_overlapping_textures; - ConvertTexture(decoded_entry, entry, palette, tlutfmt); - textures_by_address.emplace(entry->addr, decoded_entry); + g_renderer->BeginUtilityDrawing(); + + const u32 palette_size = entry->format == TextureFormat::I4 ? 32 : 512; + u32 texel_buffer_offset; + if (g_vertex_manager->UploadTexelBuffer(palette, palette_size, + TexelBufferFormat::TEXEL_BUFFER_FORMAT_R16_UINT, + &texel_buffer_offset)) + { + struct Uniforms + { + float multiplier; + u32 texel_buffer_offset; + u32 pad[2]; + }; + static_assert(std::is_standard_layout::value); + Uniforms uniforms = {}; + uniforms.multiplier = entry->format == TextureFormat::I4 ? 15.0f : 255.0f; + uniforms.texel_buffer_offset = texel_buffer_offset; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + + g_renderer->SetAndDiscardFramebuffer(decoded_entry->framebuffer.get()); + g_renderer->SetViewportAndScissor(decoded_entry->texture->GetRect()); + g_renderer->SetPipeline(g_shader_cache->GetPaletteConversionPipeline(tlutfmt)); + g_renderer->SetTexture(1, entry->texture.get()); + g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState()); + g_renderer->Draw(0, 3); + g_renderer->EndUtilityDrawing(); + decoded_entry->texture->FinishedRendering(); + } + else + { + ERROR_LOG(VIDEO, "Texel buffer upload of %u bytes failed", palette_size); + g_renderer->EndUtilityDrawing(); + } + + textures_by_address.emplace(decoded_entry->addr, decoded_entry); return decoded_entry; } @@ -2301,51 +2337,6 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& g_vertex_manager->OnEFBCopyToRAM(); } -bool TextureCacheBase::ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, - const void* palette, TLUTFormat format) -{ - DEBUG_ASSERT(entry->texture->GetConfig().IsRenderTarget() && entry->framebuffer); - if (!g_ActiveConfig.backend_info.bSupportsPaletteConversion) - { - ERROR_LOG(VIDEO, "Backend does not support palette conversion!"); - return false; - } - - g_renderer->BeginUtilityDrawing(); - - const u32 palette_size = unconverted->format == TextureFormat::I4 ? 32 : 512; - u32 texel_buffer_offset; - if (!g_vertex_manager->UploadTexelBuffer(palette, palette_size, - TexelBufferFormat::TEXEL_BUFFER_FORMAT_R16_UINT, - &texel_buffer_offset)) - { - ERROR_LOG(VIDEO, "Texel buffer upload failed"); - return false; - } - - struct Uniforms - { - float multiplier; - u32 texel_buffer_offset; - u32 pad[2]; - }; - static_assert(std::is_standard_layout::value); - Uniforms uniforms = {}; - uniforms.multiplier = unconverted->format == TextureFormat::I4 ? 15.0f : 255.0f; - uniforms.texel_buffer_offset = texel_buffer_offset; - g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); - - g_renderer->SetAndDiscardFramebuffer(entry->framebuffer.get()); - g_renderer->SetViewportAndScissor(entry->texture->GetRect()); - g_renderer->SetPipeline(g_shader_cache->GetPaletteConversionPipeline(format)); - g_renderer->SetTexture(1, unconverted->texture.get()); - g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState()); - g_renderer->Draw(0, 3); - g_renderer->EndUtilityDrawing(); - entry->texture->FinishedRendering(); - return true; -} - bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, u32 data_size, TextureFormat format, u32 width, u32 height, u32 aligned_width, u32 aligned_height, diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 6c647359dd..51c4f20af4 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -228,10 +228,6 @@ public: static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients); protected: - // Applies a palette to an EFB copy/texture. - bool ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette, - TLUTFormat format); - // Decodes the specified data to the GPU texture specified by entry. // Returns false if the configuration is not supported. // width, height are the size of the image in pixels. From 946571b7595b6b24e02cf8622f2bd5935b5aa360 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 14 Jul 2019 15:24:12 +1000 Subject: [PATCH 3/3] TextureCache: Support reinterpreting formats for VRAM textures --- .../Core/VideoCommon/FramebufferShaderGen.cpp | 149 +++++++++++++++++- .../Core/VideoCommon/FramebufferShaderGen.h | 2 + Source/Core/VideoCommon/ShaderCache.cpp | 39 ++++- Source/Core/VideoCommon/ShaderCache.h | 9 ++ Source/Core/VideoCommon/TextureCacheBase.cpp | 99 +++++++++++- Source/Core/VideoCommon/TextureCacheBase.h | 2 + Source/Core/VideoCommon/TextureDecoder.h | 41 +++++ 7 files changed, 335 insertions(+), 6 deletions(-) diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.cpp b/Source/Core/VideoCommon/FramebufferShaderGen.cpp index dfb86d2be2..f944562d9c 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.cpp +++ b/Source/Core/VideoCommon/FramebufferShaderGen.cpp @@ -1,6 +1,7 @@ #include "VideoCommon/FramebufferShaderGen.h" #include #include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/TextureDecoder.h" #include "VideoCommon/VertexShaderGen.h" namespace FramebufferShaderGen @@ -68,6 +69,26 @@ static void EmitSampleTexture(std::stringstream& ss, u32 n, const char* coords) } } +// Emits a texel fetch/load instruction. Assumes that "coords" is a 4-element vector, with z +// containing the layer, and w containing the mipmap level. +static void EmitTextureLoad(std::stringstream& ss, u32 n, const char* coords) +{ + switch (GetAPIType()) + { + case APIType::D3D: + ss << "tex" << n << ".Load(" << coords << ")"; + break; + + case APIType::OpenGL: + case APIType::Vulkan: + ss << "texelFetch(samp" << n << ", (" << coords << ").xyz, (" << coords << ").w)"; + break; + + default: + break; + } +} + static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, u32 num_color_inputs, bool position_input, u32 num_tex_outputs, u32 num_color_outputs, @@ -133,7 +154,7 @@ static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, u32 num_color_inputs, const char* output_type = "float4", - const char* extra_vars = "") + const char* extra_vars = "", bool emit_frag_coord = false) { switch (GetAPIType()) { @@ -144,6 +165,8 @@ static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, ss << "in float3 v_tex" << i << " : TEXCOORD" << i << ", "; for (u32 i = 0; i < num_color_inputs; i++) ss << "in float4 v_col" << i << " : COLOR" << i << ", "; + if (emit_frag_coord) + ss << "in float4 frag_coord : SV_Position, "; ss << extra_vars << "out " << output_type << " ocol0 : SV_Target)\n"; } break; @@ -170,6 +193,8 @@ static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, ss << "FRAGMENT_OUTPUT_LOCATION(0) out " << output_type << " ocol0;\n"; ss << extra_vars << "\n"; + if (emit_frag_coord) + ss << "#define frag_coord gl_FragCoord\n"; ss << "void main()\n"; } break; @@ -496,4 +521,126 @@ std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samp return ss.str(); } +std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format) +{ + std::stringstream ss; + EmitSamplerDeclarations(ss, 0, 1, false); + EmitPixelMainDeclaration(ss, 1, 0, "float4", "", true); + ss << "{\n"; + ss << " int layer = int(v_tex0.z);\n"; + ss << " int4 coords = int4(int2(frag_coord.xy), layer, 0);\n"; + + // Convert to a 32-bit value encompassing all channels, filling the most significant bits with + // zeroes. + ss << " uint raw_value;\n"; + switch (from_format) + { + case TextureFormat::I8: + case TextureFormat::C8: + { + ss << " float4 temp_value = "; + EmitTextureLoad(ss, 0, "coords"); + ss << ";\n"; + ss << " raw_value = uint(temp_value.r * 255.0);\n"; + } + break; + + case TextureFormat::IA8: + { + ss << " float4 temp_value = "; + EmitTextureLoad(ss, 0, "coords"); + ss << ";\n"; + ss << " raw_value = uint(temp_value.r * 255.0) | (uint(temp_value.a * 255.0) << 8);\n"; + } + break; + + case TextureFormat::IA4: + { + ss << " float4 temp_value = "; + EmitTextureLoad(ss, 0, "coords"); + ss << ";\n"; + ss << " raw_value = uint(temp_value.r * 15.0) | (uint(temp_value.a * 15.0) << 4);\n"; + } + break; + + case TextureFormat::RGB565: + { + ss << " float4 temp_value = "; + EmitTextureLoad(ss, 0, "coords"); + ss << ";\n"; + ss << " raw_value = uint(temp_value.b * 31.0) | (uint(temp_value.g * 63.0) << 5) |\n"; + ss << " (uint(temp_value.r * 31.0) << 11);\n"; + } + break; + + case TextureFormat::RGB5A3: + { + ss << " float4 temp_value = "; + EmitTextureLoad(ss, 0, "coords"); + ss << ";\n"; + + // 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits + ss << " if (temp_value.a > 0.878f) {\n"; + ss << " raw_value = (uint(temp_value.b * 31.0)) | (uint(temp_value.g * 31.0) << 5) |\n"; + ss << " (uint(temp_value.r * 31.0) << 10) | 0x8000u;\n"; + ss << " } else {\n"; + ss << " raw_value = (uint(temp_value.b * 15.0)) | (uint(temp_value.g * 15.0) << 4) |\n"; + ss << " (uint(temp_value.r * 15.0) << 8) | (uint(temp_value.a * 7.0) << 12);\n"; + ss << " }\n"; + } + break; + } + + // Now convert it to its new representation. + switch (to_format) + { + case TextureFormat::I8: + case TextureFormat::C8: + { + ss << " ocol0.rgba = (float(raw_value & 0xFFu) / 255.0).rrrr;\n"; + } + break; + + case TextureFormat::IA8: + { + ss << " ocol0.rgb = (float(raw_value & 0xFFu) / 255.0).rrr;\n"; + ss << " ocol0.a = float((raw_value >> 8) & 0xFFu) / 255.0;\n"; + } + break; + + case TextureFormat::IA4: + { + ss << " ocol0.rgb = (float(raw_value & 0xFu) / 15.0).rrr;\n"; + ss << " ocol0.a = float((raw_value >> 4) & 0xFu) / 15.0;\n"; + } + break; + + case TextureFormat::RGB565: + { + ss << " ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0\n"; + ss << " float((raw_value >> 5) & 0x1Fu) / 31.0,\n"; + ss << " float(raw_value & 0x1Fu) / 31.0,, 1.0);\n"; + } + break; + + case TextureFormat::RGB5A3: + { + ss << " if ((raw_value & 0x8000u) != 0u) {\n"; + ss << " ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n"; + ss << " float((raw_value >> 5) & 0x1Fu) / 31.0,\n"; + ss << " float(raw_value & 0x1Fu) / 31.0, 1.0);\n"; + ss << " } else {\n"; + ss << " ocol0 = float4(float((raw_value >> 8) & 0x0Fu) / 15.0,\n"; + ss << " float((raw_value >> 4) & 0x0Fu) / 15.0,\n"; + ss << " float(raw_value & 0x0Fu) / 15.0,\n"; + ss << " float((raw_value >> 12) & 0x07u) / 7.0);\n"; + ss << " }\n"; + } + break; + } + + ss << "}\n"; + return ss.str(); +} + } // namespace FramebufferShaderGen diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.h b/Source/Core/VideoCommon/FramebufferShaderGen.h index 0e065521cf..b0134b5897 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.h +++ b/Source/Core/VideoCommon/FramebufferShaderGen.h @@ -3,6 +3,7 @@ #include "VideoCommon/VideoCommon.h" enum class EFBReinterpretType; +enum class TextureFormat; namespace FramebufferShaderGen { @@ -28,5 +29,6 @@ std::string GenerateClearVertexShader(); std::string GenerateEFBPokeVertexShader(); std::string GenerateColorPixelShader(); std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples); +std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format); } // namespace FramebufferShaderGen diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 1f578f6bd8..3d4f286b68 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -1255,6 +1255,44 @@ const AbstractPipeline* ShaderCache::GetPaletteConversionPipeline(TLUTFormat for return m_palette_conversion_pipelines[static_cast(format)].get(); } +const AbstractPipeline* ShaderCache::GetTextureReinterpretPipeline(TextureFormat from_format, + TextureFormat to_format) +{ + const auto key = std::make_pair(from_format, to_format); + auto iter = m_texture_reinterpret_pipelines.find(key); + if (iter != m_texture_reinterpret_pipelines.end()) + return iter->second.get(); + + std::string shader_source = + FramebufferShaderGen::GenerateTextureReinterpretShader(from_format, to_format); + if (shader_source.empty()) + { + m_texture_reinterpret_pipelines.emplace(key, nullptr); + return nullptr; + } + + std::unique_ptr shader = + g_renderer->CreateShaderFromSource(ShaderStage::Pixel, shader_source); + if (!shader) + { + m_texture_reinterpret_pipelines.emplace(key, nullptr); + return nullptr; + } + + AbstractPipelineConfig config; + config.vertex_format = nullptr; + config.vertex_shader = m_screen_quad_vertex_shader.get(); + config.geometry_shader = nullptr; + config.pixel_shader = shader.get(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = RenderState::GetRGBA8FramebufferState(); + config.usage = AbstractPipelineUsage::Utility; + auto iiter = m_texture_reinterpret_pipelines.emplace(key, g_renderer->CreatePipeline(config)); + return iiter.first->second.get(); +} + const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format, TLUTFormat palette_format) { @@ -1282,5 +1320,4 @@ const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format auto iiter = m_texture_decoding_shaders.emplace(key, std::move(shader)); return iiter.first->second.get(); } - } // namespace VideoCommon diff --git a/Source/Core/VideoCommon/ShaderCache.h b/Source/Core/VideoCommon/ShaderCache.h index 16f6ca6f4a..ffea19ee60 100644 --- a/Source/Core/VideoCommon/ShaderCache.h +++ b/Source/Core/VideoCommon/ShaderCache.h @@ -34,6 +34,7 @@ class NativeVertexFormat; enum class AbstractTextureFormat : u32; +enum class TextureFormat; enum class TLUTFormat; namespace VideoCommon @@ -104,6 +105,10 @@ public: // Palette texture conversion pipelines const AbstractPipeline* GetPaletteConversionPipeline(TLUTFormat format); + // Texture reinterpret pipelines + const AbstractPipeline* GetTextureReinterpretPipeline(TextureFormat from_format, + TextureFormat to_format); + // Texture decoding compute shaders const AbstractShader* GetTextureDecodingShader(TextureFormat format, TLUTFormat palette_format); @@ -238,6 +243,10 @@ private: std::array, NUM_PALETTE_CONVERSION_SHADERS> m_palette_conversion_pipelines; + // Texture reinterpreting pipeline + std::map, std::unique_ptr> + m_texture_reinterpret_pipelines; + // Texture decoding shaders std::map, std::unique_ptr> m_texture_decoding_shaders; }; diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index f21e8e0e0e..5fcf667514 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -311,6 +311,44 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTForma return decoded_entry; } +TextureCacheBase::TCacheEntry* TextureCacheBase::ReinterpretEntry(const TCacheEntry* existing_entry, + TextureFormat new_format) +{ + TextureConfig new_config = existing_entry->texture->GetConfig(); + new_config.levels = 1; + new_config.flags |= AbstractTextureFlag_RenderTarget; + + TCacheEntry* reinterpreted_entry = AllocateCacheEntry(new_config); + if (!reinterpreted_entry) + return nullptr; + + reinterpreted_entry->SetGeneralParameters(existing_entry->addr, existing_entry->size_in_bytes, + new_format, existing_entry->should_force_safe_hashing); + reinterpreted_entry->SetDimensions(existing_entry->native_width, existing_entry->native_height, + 1); + reinterpreted_entry->SetHashes(existing_entry->base_hash, existing_entry->hash); + reinterpreted_entry->frameCount = existing_entry->frameCount; + reinterpreted_entry->SetNotCopy(); + reinterpreted_entry->is_efb_copy = existing_entry->is_efb_copy; + reinterpreted_entry->may_have_overlapping_textures = + existing_entry->may_have_overlapping_textures; + + g_renderer->BeginUtilityDrawing(); + g_renderer->SetAndDiscardFramebuffer(reinterpreted_entry->framebuffer.get()); + g_renderer->SetViewportAndScissor(reinterpreted_entry->texture->GetRect()); + g_renderer->SetPipeline( + g_shader_cache->GetTextureReinterpretPipeline(existing_entry->format.texfmt, new_format)); + g_renderer->SetTexture(0, existing_entry->texture.get()); + g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState()); + g_renderer->Draw(0, 3); + g_renderer->EndUtilityDrawing(); + reinterpreted_entry->texture->FinishedRendering(); + + textures_by_address.emplace(reinterpreted_entry->addr, reinterpreted_entry); + + return reinterpreted_entry; +} + void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* entry, u32 new_width, u32 new_height) { @@ -385,6 +423,18 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale { if (entry->hash == entry->CalculateHash()) { + // If the texture formats are not compatible or convertible, skip it. + if (!IsCompatibleTextureFormat(entry_to_update->format.texfmt, entry->format.texfmt)) + { + if (!CanReinterpretTextureOnGPU(entry_to_update->format.texfmt, entry->format.texfmt)) + { + ++iter.first; + continue; + } + + entry = ReinterpretEntry(entry, entry_to_update->format.texfmt); + } + if (isPaletteTexture) { TCacheEntry* decoded_entry = ApplyPaletteToEntry(entry, palette, tlutfmt); @@ -930,6 +980,7 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo TexAddrCache::iterator oldest_entry = iter; int temp_frameCount = 0x7fffffff; TexAddrCache::iterator unconverted_copy = textures_by_address.end(); + TexAddrCache::iterator unreinterpreted_copy = textures_by_address.end(); while (iter != iter_range.second) { @@ -958,10 +1009,38 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo (!isPaletteTexture || g_Config.backend_info.bSupportsPaletteConversion)) || IsPlayingBackFifologWithBrokenEFBCopies) { - // TODO: We should check format/width/height/levels for EFB copies. Checking - // format is complicated because EFB copy formats don't exactly match - // texture formats. I'm not sure what effect checking width/height/levels - // would have. + // The texture format in VRAM must match the format that the copy was created with. Some + // formats are inherently compatible, as the channel and bit layout is identical (e.g. + // I8/C8). Others have the same number of bits per texel, and can be reinterpreted on the + // GPU (e.g. IA4 and I8 or RGB565 and RGBA5). The only known game which reinteprets texels + // in this manner is Spiderman Shattered Dimensions, where it creates a copy in B8 format, + // and sets it up as a IA4 texture. + if (!IsCompatibleTextureFormat(entry->format.texfmt, texformat)) + { + // Can we reinterpret this in VRAM? + if (CanReinterpretTextureOnGPU(entry->format.texfmt, texformat)) + { + // Delay the conversion until afterwards, it's possible this texture has already been + // converted. + unreinterpreted_copy = iter++; + continue; + } + else + { + // If the EFB copies are in a different format and are not reinterpretable, use the RAM + // copy. + ++iter; + continue; + } + } + else + { + // Prefer the already-converted copy. + unconverted_copy = textures_by_address.end(); + } + + // TODO: We should check width/height/levels for EFB copies. I'm not sure what effect + // checking width/height/levels would have. if (!isPaletteTexture || !g_Config.backend_info.bSupportsPaletteConversion) return entry; @@ -1010,6 +1089,18 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo ++iter; } + if (unreinterpreted_copy != textures_by_address.end()) + { + TCacheEntry* decoded_entry = ReinterpretEntry(unreinterpreted_copy->second, texformat); + + // It's possible to combine reinterpreted textures + palettes. + if (unreinterpreted_copy == unconverted_copy && decoded_entry) + decoded_entry = ApplyPaletteToEntry(decoded_entry, &texMem[tlutaddr], tlutfmt); + + if (decoded_entry) + return decoded_entry; + } + if (unconverted_copy != textures_by_address.end()) { TCacheEntry* decoded_entry = diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 51c4f20af4..2f42df9a05 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -277,6 +277,8 @@ private: TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt); + TCacheEntry* ReinterpretEntry(const TCacheEntry* existing_entry, TextureFormat new_format); + TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, TLUTFormat tlutfmt); void StitchXFBCopy(TCacheEntry* entry_to_update); diff --git a/Source/Core/VideoCommon/TextureDecoder.h b/Source/Core/VideoCommon/TextureDecoder.h index a1313afab1..7a08a6514a 100644 --- a/Source/Core/VideoCommon/TextureDecoder.h +++ b/Source/Core/VideoCommon/TextureDecoder.h @@ -99,6 +99,47 @@ static inline bool IsValidTLUTFormat(TLUTFormat tlutfmt) tlutfmt == TLUTFormat::RGB5A3; } +static inline bool IsCompatibleTextureFormat(TextureFormat from_format, TextureFormat to_format) +{ + if (from_format == to_format) + return true; + + // Indexed and paletted formats are "compatible", that is do not require conversion. + switch (from_format) + { + case TextureFormat::I4: + case TextureFormat::C4: + return to_format == TextureFormat::I4 || to_format == TextureFormat::C4; + + case TextureFormat::I8: + case TextureFormat::C8: + return to_format == TextureFormat::I8 || to_format == TextureFormat::C8; + + default: + return false; + } +} + +static inline bool CanReinterpretTextureOnGPU(TextureFormat from_format, TextureFormat to_format) +{ + // Currently, we can only reinterpret textures of the same width. + switch (from_format) + { + case TextureFormat::I8: + case TextureFormat::IA4: + return to_format == TextureFormat::I8 || to_format == TextureFormat::IA4; + + case TextureFormat::IA8: + case TextureFormat::RGB565: + case TextureFormat::RGB5A3: + return to_format == TextureFormat::IA8 || to_format == TextureFormat::RGB565 || + to_format == TextureFormat::RGB5A3; + + default: + return false; + } +} + int TexDecoder_GetTexelSizeInNibbles(TextureFormat format); int TexDecoder_GetTextureSizeInBytes(int width, int height, TextureFormat format); int TexDecoder_GetBlockWidthInTexels(TextureFormat format);