Merge pull request #6118 from Tomcc/master

Resolution independent mipmaps (high IR Super Mario Galaxy Fix)
This commit is contained in:
Leo Lam 2017-10-31 21:37:20 +01:00 committed by GitHub
commit e29cd19f73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 204 additions and 30 deletions

View File

@ -299,7 +299,7 @@ ID3D11SamplerState* StateCache::Get(SamplerState state)
sampdc.AddressV = address_modes[static_cast<u32>(state.wrap_v.Value())]; sampdc.AddressV = address_modes[static_cast<u32>(state.wrap_v.Value())];
sampdc.MaxLOD = state.max_lod / 16.f; sampdc.MaxLOD = state.max_lod / 16.f;
sampdc.MinLOD = state.min_lod / 16.f; sampdc.MinLOD = state.min_lod / 16.f;
sampdc.MipLODBias = (s32)state.lod_bias / 32.0f; sampdc.MipLODBias = (s32)state.lod_bias / 256.f;
if (state.anisotropic_filtering) if (state.anisotropic_filtering)
{ {

View File

@ -40,7 +40,7 @@ private:
std::unordered_map<u32, ID3D11DepthStencilState*> m_depth; std::unordered_map<u32, ID3D11DepthStencilState*> m_depth;
std::unordered_map<u32, ID3D11RasterizerState*> m_raster; std::unordered_map<u32, ID3D11RasterizerState*> m_raster;
std::unordered_map<u32, ID3D11BlendState*> m_blend; std::unordered_map<u32, ID3D11BlendState*> m_blend;
std::unordered_map<u32, ID3D11SamplerState*> m_sampler; std::unordered_map<SamplerState::StorageType, ID3D11SamplerState*> m_sampler;
}; };
namespace D3D namespace D3D

View File

@ -100,7 +100,7 @@ void SamplerCache::SetParameters(GLuint sampler_id, const SamplerState& params)
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, params.max_lod / 16.f); glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, params.max_lod / 16.f);
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL) if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL)
glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, params.lod_bias / 32.f); glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, params.lod_bias / 256.f);
if (params.anisotropic_filtering && g_ogl_config.bSupportsAniso) if (params.anisotropic_filtering && g_ogl_config.bSupportsAniso)
{ {

View File

@ -329,7 +329,7 @@ VkSampler ObjectCache::GetSampler(const SamplerState& info)
address_modes[static_cast<u32>(info.wrap_u.Value())], // VkSamplerAddressMode addressModeU address_modes[static_cast<u32>(info.wrap_u.Value())], // VkSamplerAddressMode addressModeU
address_modes[static_cast<u32>(info.wrap_v.Value())], // VkSamplerAddressMode addressModeV address_modes[static_cast<u32>(info.wrap_v.Value())], // VkSamplerAddressMode addressModeV
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW
info.lod_bias / 32.0f, // float mipLodBias info.lod_bias / 256.0f, // float mipLodBias
VK_FALSE, // VkBool32 anisotropyEnable VK_FALSE, // VkBool32 anisotropyEnable
0.0f, // float maxAnisotropy 0.0f, // float maxAnisotropy
VK_FALSE, // VkBool32 compareEnable VK_FALSE, // VkBool32 compareEnable

View File

@ -180,8 +180,8 @@ void SamplerState::Generate(const BPMemory& bp, u32 index)
// If mipmaps are disabled, clamp min/max lod // If mipmaps are disabled, clamp min/max lod
max_lod = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm1.max_lod : 0; max_lod = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm1.max_lod : 0;
min_lod = std::min(max_lod.Value(), tm1.min_lod); min_lod = std::min(max_lod.Value(), static_cast<u64>(tm1.min_lod));
lod_bias = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm0.lod_bias : 0; lod_bias = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm0.lod_bias * (256 / 32) : 0;
// Address modes // Address modes
static constexpr std::array<AddressMode, 4> address_modes = { static constexpr std::array<AddressMode, 4> address_modes = {

View File

@ -76,13 +76,15 @@ union BlendingState
union SamplerState union SamplerState
{ {
enum class Filter : u32 using StorageType = u64;
enum class Filter : StorageType
{ {
Point, Point,
Linear Linear
}; };
enum class AddressMode : u32 enum class AddressMode : StorageType
{ {
Clamp, Clamp,
Repeat, Repeat,
@ -101,12 +103,12 @@ union SamplerState
BitField<2, 1, Filter> mipmap_filter; BitField<2, 1, Filter> mipmap_filter;
BitField<3, 2, AddressMode> wrap_u; BitField<3, 2, AddressMode> wrap_u;
BitField<5, 2, AddressMode> wrap_v; BitField<5, 2, AddressMode> wrap_v;
BitField<7, 8, u32> min_lod; // multiplied by 16 BitField<7, 16, s64> lod_bias; // multiplied by 256
BitField<15, 8, u32> max_lod; // multiplied by 16 BitField<23, 8, u64> min_lod; // multiplied by 16
BitField<23, 8, s32> lod_bias; // multiplied by 32 BitField<31, 8, u64> max_lod; // multiplied by 16
BitField<31, 1, u32> anisotropic_filtering; BitField<39, 1, u64> anisotropic_filtering;
u32 hex; StorageType hex;
}; };
namespace RenderState namespace RenderState

View File

@ -3,6 +3,7 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <algorithm> #include <algorithm>
#include <cmath>
#include <cstring> #include <cstring>
#include <memory> #include <memory>
#include <string> #include <string>
@ -431,7 +432,8 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale
return entry_to_update; return entry_to_update;
} }
void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level) void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level,
bool is_arbitrary)
{ {
std::string szDir = File::GetUserPath(D_DUMPTEXTURES_IDX) + SConfig::GetInstance().GetGameID(); std::string szDir = File::GetUserPath(D_DUMPTEXTURES_IDX) + SConfig::GetInstance().GetGameID();
@ -441,8 +443,9 @@ void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, uns
if (level > 0) if (level > 0)
{ {
basename += StringFromFormat("_mip%i", level); basename += StringFromFormat(is_arbitrary ? "_arb_mip%i" : "_mip%i", level);
} }
std::string filename = szDir + "/" + basename + ".png"; std::string filename = szDir + "/" + basename + ".png";
if (!File::Exists(filename)) if (!File::Exists(filename))
@ -477,6 +480,124 @@ void TextureCacheBase::BindTextures()
} }
} }
class ArbitraryMipmapDetector
{
private:
using PixelRGBAf = std::array<float, 4>;
public:
explicit ArbitraryMipmapDetector() = default;
void AddLevel(u32 width, u32 height, u32 row_length, const u8* buffer)
{
levels.push_back({width, height, row_length, buffer});
}
bool HasArbitraryMipmaps(u8* downsample_buffer) const
{
if (levels.size() < 2)
return false;
// This is the average per-pixel, per-channel difference in percent between what we
// expect a normal blurred mipmap to look like and what we actually received
constexpr auto THRESHOLD_PERCENT = 35.f;
for (std::size_t i = 0; i < levels.size() - 1; ++i)
{
const auto& level = levels[i];
const auto& mip = levels[i + 1];
// Manually downsample the current layer with a simple box blur
// This is not necessarily close to whatever the original artists used, however
// It should still be closer than a thing that's not a downscale at all
level.Downsample(downsample_buffer, mip);
// Find the average difference between pixels in this level but downsampled
// and the next level
auto diff = mip.AverageDiff(downsample_buffer);
if (diff > THRESHOLD_PERCENT)
return true;
}
return false;
}
private:
static float SRGBToLinear(u8 srgb_byte)
{
auto srgb_float = static_cast<float>(srgb_byte) / 256.f;
// approximations found on
// http://chilliant.blogspot.com/2012/08/srgb-approximations-for-hlsl.html
return srgb_float * (srgb_float * (srgb_float * 0.305306011f + 0.682171111f) + 0.012522878f);
}
static u8 LinearToSRGB(float linear)
{
return static_cast<u8>(std::max(1.055f * std::pow(linear, 0.416666667f) - 0.055f, 0.f) * 256.f);
}
struct Level
{
u32 width;
u32 height;
u32 row_length;
const u8* buffer;
PixelRGBAf Sample(u32 x, u32 y) const
{
const auto* p = buffer + (x + y * row_length) * 4;
return {SRGBToLinear(p[0]), SRGBToLinear(p[1]), SRGBToLinear(p[2]), SRGBToLinear(p[3])};
}
// Puts a downsampled image in dst. dst must be at least width*height*4
void Downsample(u8* dst, const Level& dst_shape) const
{
for (u32 i = 0; i < dst_shape.height; ++i)
{
for (u32 j = 0; j < dst_shape.width; ++j)
{
auto x = j * 2;
auto y = i * 2;
const std::array<PixelRGBAf, 4> samples = {Sample(x, y), Sample(x + 1, y),
Sample(x, y + 1), Sample(x + 1, y + 1)};
auto* dst_pixel = dst + (j + i * dst_shape.row_length) * 4;
dst_pixel[0] =
LinearToSRGB((samples[0][0] + samples[0][1] + samples[0][2] + samples[0][3]) * 0.25f);
dst_pixel[1] =
LinearToSRGB((samples[1][0] + samples[1][1] + samples[1][2] + samples[1][3]) * 0.25f);
dst_pixel[2] =
LinearToSRGB((samples[2][0] + samples[2][1] + samples[2][2] + samples[2][3]) * 0.25f);
dst_pixel[3] =
LinearToSRGB((samples[3][0] + samples[3][1] + samples[3][2] + samples[3][3]) * 0.25f);
}
}
}
float AverageDiff(const u8* other) const
{
float average_diff = 0.f;
const auto* ptr1 = buffer;
const auto* ptr2 = other;
for (u32 i = 0; i < height; ++i)
{
const auto* row1 = ptr1;
const auto* row2 = ptr2;
for (u32 j = 0; j < width; ++j, row1 += 4, row2 += 4)
{
average_diff += std::abs(row1[0] - row2[0]);
average_diff += std::abs(row1[1] - row2[1]);
average_diff += std::abs(row1[2] - row2[2]);
average_diff += std::abs(row1[3] - row2[3]);
}
ptr1 += row_length;
ptr2 += row_length;
}
return average_diff / (width * height * 4) / 2.56f;
}
};
std::vector<Level> levels;
};
TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
{ {
// if this stage was not invalidated by changes to texture registers, keep the current texture // if this stage was not invalidated by changes to texture registers, keep the current texture
@ -774,6 +895,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
config.levels = texLevels; config.levels = texLevels;
config.format = hires_tex ? hires_tex->GetFormat() : AbstractTextureFormat::RGBA8; config.format = hires_tex ? hires_tex->GetFormat() : AbstractTextureFormat::RGBA8;
ArbitraryMipmapDetector arbitrary_mip_detector;
TCacheEntry* entry = AllocateCacheEntry(config); TCacheEntry* entry = AllocateCacheEntry(config);
GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true);
@ -788,6 +911,9 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
level.data_size); level.data_size);
} }
// Initialized to null because only software loading uses this buffer
u8* dst_buffer = nullptr;
if (!hires_tex && decode_on_gpu) if (!hires_tex && decode_on_gpu)
{ {
u32 row_stride = bytes_per_block * (expandedWidth / bsw); u32 row_stride = bytes_per_block * (expandedWidth / bsw);
@ -797,19 +923,41 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
else if (!hires_tex) else if (!hires_tex)
{ {
size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight; size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight;
CheckTempSize(decoded_texture_size);
// Allocate memory for all levels at once
size_t total_texture_size = decoded_texture_size;
size_t mip_downsample_buffer_size = decoded_texture_size / 4;
size_t prev_level_size = decoded_texture_size;
for (u32 i = 1; i < tex_levels; ++i)
{
prev_level_size /= 4;
total_texture_size += prev_level_size;
}
// Add space for the downsampling at the end
total_texture_size += mip_downsample_buffer_size;
CheckTempSize(total_texture_size);
dst_buffer = temp;
if (!(texformat == TextureFormat::RGBA8 && from_tmem)) if (!(texformat == TextureFormat::RGBA8 && from_tmem))
{ {
TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut, tlutfmt); TexDecoder_Decode(dst_buffer, src_data, expandedWidth, expandedHeight, texformat, tlut,
tlutfmt);
} }
else else
{ {
u8* src_data_gb = u8* src_data_gb =
&texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE]; &texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight); TexDecoder_DecodeRGBA8FromTmem(dst_buffer, src_data, src_data_gb, expandedWidth,
expandedHeight);
} }
entry->texture->Load(0, width, height, expandedWidth, temp, decoded_texture_size); entry->texture->Load(0, width, height, expandedWidth, dst_buffer, decoded_texture_size);
arbitrary_mip_detector.AddLevel(width, height, expandedWidth, dst_buffer);
dst_buffer += decoded_texture_size;
} }
iter = textures_by_address.emplace(address, entry); iter = textures_by_address.emplace(address, entry);
@ -832,7 +980,6 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
{ {
basename = HiresTexture::GenBaseName(src_data, texture_size, &texMem[tlutaddr], palette_size, basename = HiresTexture::GenBaseName(src_data, texture_size, &texMem[tlutaddr], palette_size,
width, height, texformat, use_mipmaps, true); width, height, texformat, use_mipmaps, true);
DumpTexture(entry, basename, 0);
} }
if (hires_tex) if (hires_tex)
@ -878,18 +1025,29 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
} }
else else
{ {
// No need to call CheckTempSize here, as mips will always be smaller than the base level. // No need to call CheckTempSize here, as the whole buffer is preallocated at the beginning
size_t decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height; size_t decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height;
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, TexDecoder_Decode(dst_buffer, mip_src_data, expanded_mip_width, expanded_mip_height,
tlut, tlutfmt); texformat, tlut, tlutfmt);
entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, temp, entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, dst_buffer,
decoded_mip_size); decoded_mip_size);
arbitrary_mip_detector.AddLevel(mip_width, mip_height, expanded_mip_width, dst_buffer);
dst_buffer += decoded_mip_size;
} }
mip_src_data += mip_size; mip_src_data += mip_size;
}
}
if (g_ActiveConfig.bDumpTextures) entry->has_arbitrary_mips = arbitrary_mip_detector.HasArbitraryMipmaps(dst_buffer);
DumpTexture(entry, basename, level);
if (g_ActiveConfig.bDumpTextures)
{
for (u32 level = 0; level < texLevels; ++level)
{
DumpTexture(entry, basename, level, entry->has_arbitrary_mips);
} }
} }

View File

@ -81,7 +81,9 @@ public:
bool is_efb_copy; bool is_efb_copy;
bool is_custom_tex; bool is_custom_tex;
bool may_have_overlapping_textures = true; bool may_have_overlapping_textures = true;
bool tmem_only = false; // indicates that this texture only exists in the tmem cache bool tmem_only = false; // indicates that this texture only exists in the tmem cache
bool has_arbitrary_mips = false; // indicates that the mips in this texture are arbitrary
// content, aren't just downscaled
unsigned int native_width, unsigned int native_width,
native_height; // Texture dimensions from the GameCube's point of view native_height; // Texture dimensions from the GameCube's point of view
@ -224,7 +226,7 @@ private:
TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
TLUTFormat tlutfmt); TLUTFormat tlutfmt);
void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level); void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level, bool is_arbitrary);
void CheckTempSize(size_t required_size); void CheckTempSize(size_t required_size);
TCacheEntry* AllocateCacheEntry(const TextureConfig& config); TCacheEntry* AllocateCacheEntry(const TextureConfig& config);

View File

@ -209,7 +209,7 @@ std::pair<size_t, size_t> VertexManagerBase::ResetFlushAspectRatioCount()
return val; return val;
} }
static void SetSamplerState(u32 index, bool custom_tex) static void SetSamplerState(u32 index, bool custom_tex, bool has_arbitrary_mips)
{ {
const FourTexUnits& tex = bpmem.tex[index / 4]; const FourTexUnits& tex = bpmem.tex[index / 4];
const TexMode0& tm0 = tex.texMode0[index % 4]; const TexMode0& tm0 = tex.texMode0[index % 4];
@ -252,6 +252,18 @@ static void SetSamplerState(u32 index, bool custom_tex)
state.anisotropic_filtering = 0; state.anisotropic_filtering = 0;
} }
if (has_arbitrary_mips && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0))
{
// Apply a secondary bias calculated from the IR scale to pull inwards mipmaps
// that have arbitrary contents, eg. are used for fog effects where the
// distance they kick in at is important to preserve at any resolution.
state.lod_bias =
state.lod_bias + std::log2(static_cast<float>(g_ActiveConfig.iEFBScale)) * 256.f;
// Anisotropic also pushes mips farther away so it cannot be used either
state.anisotropic_filtering = 0;
}
g_renderer->SetSamplerState(index, state); g_renderer->SetSamplerState(index, state);
} }
@ -323,7 +335,7 @@ void VertexManagerBase::Flush()
if (tentry) if (tentry)
{ {
SetSamplerState(i, tentry->is_custom_tex); SetSamplerState(i, tentry->is_custom_tex, tentry->has_arbitrary_mips);
PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height); PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height);
} }
else else