From 5fb6ceac452fa7960301d132f2f044073045564a Mon Sep 17 00:00:00 2001 From: Tommaso Checchi Date: Wed, 18 Oct 2017 02:02:56 -0700 Subject: [PATCH] Fixed fog, lava, water in Mario Galaxy (and possibly other games) in higher IRs by detecting & fixing the mipmaps that aren't just used for downscaling, but are handmade to create a gradient. --- Source/Core/VideoBackends/D3D/D3DState.cpp | 2 +- Source/Core/VideoBackends/D3D/D3DState.h | 2 +- .../Core/VideoBackends/OGL/SamplerCache.cpp | 2 +- .../Core/VideoBackends/Vulkan/ObjectCache.cpp | 2 +- Source/Core/VideoCommon/RenderState.cpp | 4 +- Source/Core/VideoCommon/RenderState.h | 16 +- Source/Core/VideoCommon/TextureCacheBase.cpp | 184 ++++++++++++++++-- Source/Core/VideoCommon/TextureCacheBase.h | 6 +- Source/Core/VideoCommon/VertexManagerBase.cpp | 16 +- 9 files changed, 204 insertions(+), 30 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/D3DState.cpp b/Source/Core/VideoBackends/D3D/D3DState.cpp index 56bcb58fa1..5732ffd21b 100644 --- a/Source/Core/VideoBackends/D3D/D3DState.cpp +++ b/Source/Core/VideoBackends/D3D/D3DState.cpp @@ -299,7 +299,7 @@ ID3D11SamplerState* StateCache::Get(SamplerState state) sampdc.AddressV = address_modes[static_cast(state.wrap_v.Value())]; sampdc.MaxLOD = state.max_lod / 16.f; sampdc.MinLOD = state.min_lod / 16.f; - sampdc.MipLODBias = (s32)state.lod_bias / 32.0f; + sampdc.MipLODBias = (s32)state.lod_bias / 256.f; if (state.anisotropic_filtering) { diff --git a/Source/Core/VideoBackends/D3D/D3DState.h b/Source/Core/VideoBackends/D3D/D3DState.h index fc7cda88b3..0c083f7c1b 100644 --- a/Source/Core/VideoBackends/D3D/D3DState.h +++ b/Source/Core/VideoBackends/D3D/D3DState.h @@ -40,7 +40,7 @@ private: std::unordered_map m_depth; std::unordered_map m_raster; std::unordered_map m_blend; - std::unordered_map m_sampler; + std::unordered_map m_sampler; }; namespace D3D diff --git a/Source/Core/VideoBackends/OGL/SamplerCache.cpp b/Source/Core/VideoBackends/OGL/SamplerCache.cpp index 59d3d3a842..32be0ac4d9 100644 --- a/Source/Core/VideoBackends/OGL/SamplerCache.cpp +++ b/Source/Core/VideoBackends/OGL/SamplerCache.cpp @@ -100,7 +100,7 @@ void SamplerCache::SetParameters(GLuint sampler_id, const SamplerState& params) glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, params.max_lod / 16.f); if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL) - glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, params.lod_bias / 32.f); + glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, params.lod_bias / 256.f); if (params.anisotropic_filtering && g_ogl_config.bSupportsAniso) { diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp index c0196cb7d6..1a5cfbf34e 100644 --- a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp @@ -329,7 +329,7 @@ VkSampler ObjectCache::GetSampler(const SamplerState& info) address_modes[static_cast(info.wrap_u.Value())], // VkSamplerAddressMode addressModeU address_modes[static_cast(info.wrap_v.Value())], // VkSamplerAddressMode addressModeV VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW - info.lod_bias / 32.0f, // float mipLodBias + info.lod_bias / 256.0f, // float mipLodBias VK_FALSE, // VkBool32 anisotropyEnable 0.0f, // float maxAnisotropy VK_FALSE, // VkBool32 compareEnable diff --git a/Source/Core/VideoCommon/RenderState.cpp b/Source/Core/VideoCommon/RenderState.cpp index 8f8d421730..5cad8c2584 100644 --- a/Source/Core/VideoCommon/RenderState.cpp +++ b/Source/Core/VideoCommon/RenderState.cpp @@ -180,8 +180,8 @@ void SamplerState::Generate(const BPMemory& bp, u32 index) // If mipmaps are disabled, clamp min/max lod max_lod = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm1.max_lod : 0; - min_lod = std::min(max_lod.Value(), tm1.min_lod); - lod_bias = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm0.lod_bias : 0; + min_lod = std::min(max_lod.Value(), static_cast(tm1.min_lod)); + lod_bias = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm0.lod_bias * (256 / 32) : 0; // Address modes static constexpr std::array address_modes = { diff --git a/Source/Core/VideoCommon/RenderState.h b/Source/Core/VideoCommon/RenderState.h index 76482b6bb9..d72c2d3984 100644 --- a/Source/Core/VideoCommon/RenderState.h +++ b/Source/Core/VideoCommon/RenderState.h @@ -76,13 +76,15 @@ union BlendingState union SamplerState { - enum class Filter : u32 + using StorageType = u64; + + enum class Filter : StorageType { Point, Linear }; - enum class AddressMode : u32 + enum class AddressMode : StorageType { Clamp, Repeat, @@ -101,12 +103,12 @@ union SamplerState BitField<2, 1, Filter> mipmap_filter; BitField<3, 2, AddressMode> wrap_u; BitField<5, 2, AddressMode> wrap_v; - BitField<7, 8, u32> min_lod; // multiplied by 16 - BitField<15, 8, u32> max_lod; // multiplied by 16 - BitField<23, 8, s32> lod_bias; // multiplied by 32 - BitField<31, 1, u32> anisotropic_filtering; + BitField<7, 16, s64> lod_bias; // multiplied by 256 + BitField<23, 8, u64> min_lod; // multiplied by 16 + BitField<31, 8, u64> max_lod; // multiplied by 16 + BitField<39, 1, u64> anisotropic_filtering; - u32 hex; + StorageType hex; }; namespace RenderState diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index a2be3c64a5..42a3beda5c 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include #include @@ -431,7 +432,8 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale return entry_to_update; } -void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level) +void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level, + bool is_arbitrary) { std::string szDir = File::GetUserPath(D_DUMPTEXTURES_IDX) + SConfig::GetInstance().GetGameID(); @@ -441,8 +443,9 @@ void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, uns if (level > 0) { - basename += StringFromFormat("_mip%i", level); + basename += StringFromFormat(is_arbitrary ? "_arb_mip%i" : "_mip%i", level); } + std::string filename = szDir + "/" + basename + ".png"; if (!File::Exists(filename)) @@ -477,6 +480,124 @@ void TextureCacheBase::BindTextures() } } +class ArbitraryMipmapDetector +{ +private: + using PixelRGBAf = std::array; + +public: + explicit ArbitraryMipmapDetector() = default; + + void AddLevel(u32 width, u32 height, u32 row_length, const u8* buffer) + { + levels.push_back({width, height, row_length, buffer}); + } + + bool HasArbitraryMipmaps(u8* downsample_buffer) const + { + if (levels.size() < 2) + return false; + + // This is the average per-pixel, per-channel difference in percent between what we + // expect a normal blurred mipmap to look like and what we actually received + constexpr auto THRESHOLD_PERCENT = 35.f; + + for (std::size_t i = 0; i < levels.size() - 1; ++i) + { + const auto& level = levels[i]; + const auto& mip = levels[i + 1]; + + // Manually downsample the current layer with a simple box blur + // This is not necessarily close to whatever the original artists used, however + // It should still be closer than a thing that's not a downscale at all + level.Downsample(downsample_buffer, mip); + + // Find the average difference between pixels in this level but downsampled + // and the next level + auto diff = mip.AverageDiff(downsample_buffer); + if (diff > THRESHOLD_PERCENT) + return true; + } + return false; + } + +private: + static float SRGBToLinear(u8 srgb_byte) + { + auto srgb_float = static_cast(srgb_byte) / 256.f; + // approximations found on + // http://chilliant.blogspot.com/2012/08/srgb-approximations-for-hlsl.html + return srgb_float * (srgb_float * (srgb_float * 0.305306011f + 0.682171111f) + 0.012522878f); + } + + static u8 LinearToSRGB(float linear) + { + return static_cast(std::max(1.055f * std::pow(linear, 0.416666667f) - 0.055f, 0.f) * 256.f); + } + + struct Level + { + u32 width; + u32 height; + u32 row_length; + const u8* buffer; + + PixelRGBAf Sample(u32 x, u32 y) const + { + const auto* p = buffer + (x + y * row_length) * 4; + return {SRGBToLinear(p[0]), SRGBToLinear(p[1]), SRGBToLinear(p[2]), SRGBToLinear(p[3])}; + } + + // Puts a downsampled image in dst. dst must be at least width*height*4 + void Downsample(u8* dst, const Level& dst_shape) const + { + for (u32 i = 0; i < dst_shape.height; ++i) + { + for (u32 j = 0; j < dst_shape.width; ++j) + { + auto x = j * 2; + auto y = i * 2; + const std::array samples = {Sample(x, y), Sample(x + 1, y), + Sample(x, y + 1), Sample(x + 1, y + 1)}; + auto* dst_pixel = dst + (j + i * dst_shape.row_length) * 4; + dst_pixel[0] = + LinearToSRGB((samples[0][0] + samples[0][1] + samples[0][2] + samples[0][3]) * 0.25f); + dst_pixel[1] = + LinearToSRGB((samples[1][0] + samples[1][1] + samples[1][2] + samples[1][3]) * 0.25f); + dst_pixel[2] = + LinearToSRGB((samples[2][0] + samples[2][1] + samples[2][2] + samples[2][3]) * 0.25f); + dst_pixel[3] = + LinearToSRGB((samples[3][0] + samples[3][1] + samples[3][2] + samples[3][3]) * 0.25f); + } + } + } + + float AverageDiff(const u8* other) const + { + float average_diff = 0.f; + const auto* ptr1 = buffer; + const auto* ptr2 = other; + for (u32 i = 0; i < height; ++i) + { + const auto* row1 = ptr1; + const auto* row2 = ptr2; + for (u32 j = 0; j < width; ++j, row1 += 4, row2 += 4) + { + average_diff += std::abs(row1[0] - row2[0]); + average_diff += std::abs(row1[1] - row2[1]); + average_diff += std::abs(row1[2] - row2[2]); + average_diff += std::abs(row1[3] - row2[3]); + } + ptr1 += row_length; + ptr2 += row_length; + } + + return average_diff / (width * height * 4) / 2.56f; + } + }; + std::vector levels; +}; + TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) { // if this stage was not invalidated by changes to texture registers, keep the current texture @@ -774,6 +895,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) config.levels = texLevels; config.format = hires_tex ? hires_tex->GetFormat() : AbstractTextureFormat::RGBA8; + ArbitraryMipmapDetector arbitrary_mip_detector; + TCacheEntry* entry = AllocateCacheEntry(config); GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true); @@ -788,6 +911,9 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) level.data_size); } + // Initialized to null because only software loading uses this buffer + u8* dst_buffer = nullptr; + if (!hires_tex && decode_on_gpu) { u32 row_stride = bytes_per_block * (expandedWidth / bsw); @@ -797,19 +923,41 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) else if (!hires_tex) { size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight; - CheckTempSize(decoded_texture_size); + + // Allocate memory for all levels at once + size_t total_texture_size = decoded_texture_size; + size_t mip_downsample_buffer_size = decoded_texture_size / 4; + size_t prev_level_size = decoded_texture_size; + for (u32 i = 1; i < tex_levels; ++i) + { + prev_level_size /= 4; + total_texture_size += prev_level_size; + } + + // Add space for the downsampling at the end + total_texture_size += mip_downsample_buffer_size; + + CheckTempSize(total_texture_size); + dst_buffer = temp; + if (!(texformat == TextureFormat::RGBA8 && from_tmem)) { - TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut, tlutfmt); + TexDecoder_Decode(dst_buffer, src_data, expandedWidth, expandedHeight, texformat, tlut, + tlutfmt); } else { u8* src_data_gb = &texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE]; - TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight); + TexDecoder_DecodeRGBA8FromTmem(dst_buffer, src_data, src_data_gb, expandedWidth, + expandedHeight); } - entry->texture->Load(0, width, height, expandedWidth, temp, decoded_texture_size); + entry->texture->Load(0, width, height, expandedWidth, dst_buffer, decoded_texture_size); + + arbitrary_mip_detector.AddLevel(width, height, expandedWidth, dst_buffer); + + dst_buffer += decoded_texture_size; } iter = textures_by_address.emplace(address, entry); @@ -832,7 +980,6 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) { basename = HiresTexture::GenBaseName(src_data, texture_size, &texMem[tlutaddr], palette_size, width, height, texformat, use_mipmaps, true); - DumpTexture(entry, basename, 0); } if (hires_tex) @@ -878,18 +1025,29 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) } else { - // No need to call CheckTempSize here, as mips will always be smaller than the base level. + // No need to call CheckTempSize here, as the whole buffer is preallocated at the beginning size_t decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height; - TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, - tlut, tlutfmt); - entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, temp, + TexDecoder_Decode(dst_buffer, mip_src_data, expanded_mip_width, expanded_mip_height, + texformat, tlut, tlutfmt); + entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, dst_buffer, decoded_mip_size); + + arbitrary_mip_detector.AddLevel(mip_width, mip_height, expanded_mip_width, dst_buffer); + + dst_buffer += decoded_mip_size; } mip_src_data += mip_size; + } + } - if (g_ActiveConfig.bDumpTextures) - DumpTexture(entry, basename, level); + entry->has_arbitrary_mips = arbitrary_mip_detector.HasArbitraryMipmaps(dst_buffer); + + if (g_ActiveConfig.bDumpTextures) + { + for (u32 level = 0; level < texLevels; ++level) + { + DumpTexture(entry, basename, level, entry->has_arbitrary_mips); } } diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index f5b2a1a8a0..6b69caf036 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -81,7 +81,9 @@ public: bool is_efb_copy; bool is_custom_tex; bool may_have_overlapping_textures = true; - bool tmem_only = false; // indicates that this texture only exists in the tmem cache + bool tmem_only = false; // indicates that this texture only exists in the tmem cache + bool has_arbitrary_mips = false; // indicates that the mips in this texture are arbitrary + // content, aren't just downscaled unsigned int native_width, native_height; // Texture dimensions from the GameCube's point of view @@ -224,7 +226,7 @@ private: TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, TLUTFormat tlutfmt); - void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level); + void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level, bool is_arbitrary); void CheckTempSize(size_t required_size); TCacheEntry* AllocateCacheEntry(const TextureConfig& config); diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 9138b0db09..870b9ad1e2 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -209,7 +209,7 @@ std::pair VertexManagerBase::ResetFlushAspectRatioCount() return val; } -static void SetSamplerState(u32 index, bool custom_tex) +static void SetSamplerState(u32 index, bool custom_tex, bool has_arbitrary_mips) { const FourTexUnits& tex = bpmem.tex[index / 4]; const TexMode0& tm0 = tex.texMode0[index % 4]; @@ -252,6 +252,18 @@ static void SetSamplerState(u32 index, bool custom_tex) state.anisotropic_filtering = 0; } + if (has_arbitrary_mips && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) + { + // Apply a secondary bias calculated from the IR scale to pull inwards mipmaps + // that have arbitrary contents, eg. are used for fog effects where the + // distance they kick in at is important to preserve at any resolution. + state.lod_bias = + state.lod_bias + std::log2(static_cast(g_ActiveConfig.iEFBScale)) * 256.f; + + // Anisotropic also pushes mips farther away so it cannot be used either + state.anisotropic_filtering = 0; + } + g_renderer->SetSamplerState(index, state); } @@ -323,7 +335,7 @@ void VertexManagerBase::Flush() if (tentry) { - SetSamplerState(i, tentry->is_custom_tex); + SetSamplerState(i, tentry->is_custom_tex, tentry->has_arbitrary_mips); PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height); } else