From 228f516bb4426a41a4d1c1756751557f7a0eecda Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 24 May 2019 15:34:31 -0400 Subject: [PATCH] texture_cache uncompress-compress is untopological. This makes conflicts between non compress and compress textures to be auto recycled. It also limits the amount of mipmaps a texture can have if it goes above it's limit. --- .../renderer_opengl/gl_texture_cache.cpp | 10 +++---- src/video_core/texture_cache/surface_base.h | 18 +++++++++++-- .../texture_cache/surface_params.cpp | 4 +++ src/video_core/texture_cache/surface_params.h | 14 ++++++++++ src/video_core/texture_cache/texture_cache.h | 26 ++++++++++--------- 5 files changed, 53 insertions(+), 19 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 9e9734f9ee..e6f08a7640 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -195,17 +195,17 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte switch (params.target) { case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.num_levels, internal_format, params.width); + glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); break; case SurfaceTarget::Texture2D: case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.num_levels, internal_format, params.width, + glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, params.height); break; case SurfaceTarget::Texture3D: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.num_levels, internal_format, params.width, + glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, params.height, params.depth); break; default: @@ -245,7 +245,7 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { // TODO(Rodrigo): Optimize alignment SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.num_levels; ++level) { + for (u32 level = 0; level < params.emulated_levels; ++level) { glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); @@ -264,7 +264,7 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { void CachedSurface::UploadTexture(std::vector& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Upload); SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.num_levels; ++level) { + for (u32 level = 0; level < params.emulated_levels; ++level) { UploadTextureMipmap(level, staging_buffer); } } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 77c2d67582..70b5258c99 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -32,6 +32,12 @@ enum class MatchStructureResult : u32 { None = 2, }; +enum class MatchTopologyResult : u32 { + FullMatch = 0, + CompressUnmatch = 1, + None = 2, +}; + class StagingCache { public: StagingCache() {} @@ -136,12 +142,20 @@ public: params.target == SurfaceTarget::Texture2D && params.num_levels == 1; } - bool MatchesTopology(const SurfaceParams& rhs) const { + MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const { const u32 src_bpp{params.GetBytesPerPixel()}; const u32 dst_bpp{rhs.GetBytesPerPixel()}; const bool ib1 = params.IsBuffer(); const bool ib2 = rhs.IsBuffer(); - return std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2); + if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { + const bool cb1 = params.IsCompressed(); + const bool cb2 = rhs.IsCompressed(); + if (cb1 == cb2) { + return MatchTopologyResult::FullMatch; + } + return MatchTopologyResult::CompressUnmatch; + } + return MatchTopologyResult::None; } MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index d9d157d023..77c09264a4 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -85,6 +85,7 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, } params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); params.num_levels = config.tic.max_mip_level + 1; + params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); params.is_layered = params.IsLayered(); return params; } @@ -109,6 +110,7 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.depth = 1; params.pitch = 0; params.num_levels = 1; + params.emulated_levels = 1; params.is_layered = false; return params; } @@ -139,6 +141,7 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.depth = 1; params.target = SurfaceTarget::Texture2D; params.num_levels = 1; + params.emulated_levels = 1; params.is_layered = false; return params; } @@ -163,6 +166,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; + params.emulated_levels = 1; params.is_layered = params.IsLayered(); return params; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index c3affd6218..5fde695b68 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -160,6 +160,19 @@ public: return std::min(t_src_height, t_dst_height); } + u32 MaxPossibleMipmap() const { + const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; + const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; + const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); + if (target != VideoCore::Surface::SurfaceTarget::Texture3D) + return max_mipmap; + return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); + } + + bool IsCompressed() const { + return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; + } + /// Returns the default block width. u32 GetDefaultBlockWidth() const { return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); @@ -205,6 +218,7 @@ public: u32 depth; u32 pitch; u32 num_levels; + u32 emulated_levels; VideoCore::Surface::PixelFormat pixel_format; VideoCore::Surface::ComponentType component_type; VideoCore::Surface::SurfaceType type; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d2093e5815..69ef7a2bd1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -305,7 +305,7 @@ private: * due to topological reasons. **/ RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, const bool untopological) { + const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { if (Settings::values.use_accurate_gpu_emulation) { return RecycleStrategy::Flush; } @@ -320,8 +320,8 @@ private: } } // Untopological decision - if (untopological) { - return RecycleStrategy::Ignore; + if (untopological == MatchTopologyResult::CompressUnmatch) { + return RecycleStrategy::Flush; } return RecycleStrategy::Ignore; } @@ -341,7 +341,7 @@ private: std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, - const bool untopological) { + const MatchTopologyResult untopological) { const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; for (auto surface : overlaps) { Unregister(surface); @@ -502,9 +502,10 @@ private: // matches at certain level we are pretty much done. if (l1_cache.count(cache_addr) > 0) { TSurface current_surface = l1_cache[cache_addr]; - if (!current_surface->MatchesTopology(params)) { + auto topological_result = current_surface->MatchesTopology(params); + if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && @@ -534,8 +535,9 @@ private: // we do a topological test to ensure we can find some relationship. If it fails // inmediatly recycle the texture for (auto surface : overlaps) { - if (!surface->MatchesTopology(params)) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + auto topological_result = surface->MatchesTopology(params); + if (topological_result != MatchTopologyResult::FullMatch) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } } @@ -553,7 +555,7 @@ private: return *view; } } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = @@ -576,13 +578,13 @@ private: pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } return {current_surface, *view}; } // The next case is unsafe, so if we r in accurate GPU, just skip it if (Settings::values.use_accurate_gpu_emulation) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { @@ -599,7 +601,7 @@ private: } } // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,