From 60bf761afbb125abd324e4b798d18a1611b5777b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jun 2019 19:12:00 -0400 Subject: [PATCH] texture_cache: Implement Buffer Copy and detect Turing GPUs Image Copies --- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 + .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_resource_manager.cpp | 8 ++ .../renderer_opengl/gl_resource_manager.h | 3 + .../renderer_opengl/gl_texture_cache.cpp | 92 ++++++++++++++++++- .../renderer_opengl/gl_texture_cache.h | 9 +- src/video_core/texture_cache/texture_cache.h | 40 ++++++-- 8 files changed, 148 insertions(+), 12 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 65a88b06c1..ad15ea54e7 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -28,6 +28,7 @@ Device::Device() { max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = TestComponentIndexingBug(); + is_turing_plus = GLAD_GL_NV_mesh_shader; } Device::Device(std::nullptr_t) { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8c8c937600..1afe16779b 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -34,6 +34,10 @@ public: return has_component_indexing_bug; } + bool IsTuringGPU() const { + return is_turing_plus; + } + private: static bool TestVariableAoffi(); static bool TestComponentIndexingBug(); @@ -43,6 +47,7 @@ private: u32 max_varyings{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; + bool is_turing_plus{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8fe115aec1..97c55f2ecb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -82,7 +82,7 @@ struct DrawParameters { RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) - : texture_cache{system, *this}, shader_cache{*this, system, emu_window, device}, + : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, global_cache{*this}, system{system}, screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { OpenGLState::ApplyDefaultState(); diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 9f81c15cb7..a1f91d6775 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -148,6 +148,14 @@ void OGLBuffer::Release() { handle = 0; } +void OGLBuffer::MakePersistant(std::size_t buffer_size) { + if (handle == 0 || buffer_size == 0) + return; + + const GLbitfield flags = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT | GL_MAP_READ_BIT; + glNamedBufferStorage(handle, static_cast(buffer_size), nullptr, flags); +} + void OGLSync::Create() { if (handle != 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 310ee2bf34..f2873ef96a 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -186,6 +186,9 @@ public: /// Deletes the internal OpenGL resource void Release(); + // Converts the buffer into a persistant storage buffer + void MakePersistant(std::size_t buffer_size); + GLuint handle = 0; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index e6f08a7640..bddb15cb11 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "common/bit_util.h" #include "common/common_types.h" #include "common/microprofile.h" #include "common/scope_exit.h" @@ -435,8 +436,10 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { } TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, - VideoCore::RasterizerInterface& rasterizer) + VideoCore::RasterizerInterface& rasterizer, + const Device& device) : TextureCacheBase{system, rasterizer} { + support_info.depth_color_image_copies = !device.IsTuringGPU(); src_framebuffer.Create(); dst_framebuffer.Create(); } @@ -449,6 +452,14 @@ Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, const VideoCommon::CopyParams& copy_params) { + if (!support_info.depth_color_image_copies) { + const auto& src_params = src_surface->GetSurfaceParams(); + const auto& dst_params = dst_surface->GetSurfaceParams(); + if (src_params.type != dst_params.type) { + // A fallback is needed + return; + } + } const auto src_handle = src_surface->GetTexture(); const auto src_target = src_surface->GetTarget(); const auto dst_handle = dst_surface->GetTexture(); @@ -517,4 +528,83 @@ void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view, is_linear ? GL_LINEAR : GL_NEAREST); } +void TextureCacheOpenGL::BufferCopy(Surface src_surface, Surface dst_surface) { + const auto& src_params = src_surface->GetSurfaceParams(); + const auto& dst_params = dst_surface->GetSurfaceParams(); + + const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); + const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); + + const std::size_t source_size = src_surface->GetHostSizeInBytes(); + const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); + + const std::size_t buffer_size = std::max(source_size, dest_size); + + GLuint copy_pbo_handle = FetchPBO(buffer_size); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); + + if (source_format.compressed) { + glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast(source_size), + nullptr); + } else { + glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, + static_cast(source_size), nullptr); + } + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); + + const GLsizei width = static_cast(dst_params.width); + const GLsizei height = static_cast(dst_params.height); + const GLsizei depth = static_cast(dst_params.depth); + if (dest_format.compressed) { + LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); + UNREACHABLE(); + } else { + switch (dst_params.target) { + case SurfaceTarget::Texture1D: + glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, + dest_format.type, nullptr); + break; + case SurfaceTarget::Texture2D: + glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, + dest_format.format, dest_format.type, nullptr); + break; + case SurfaceTarget::Texture3D: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubeArray: + glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, + dest_format.format, dest_format.type, nullptr); + break; + case SurfaceTarget::TextureCubemap: + glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, + dest_format.format, dest_format.type, nullptr); + break; + default: + LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", + static_cast(dst_params.target)); + UNREACHABLE(); + } + } + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + glTextureBarrier(); +} + +GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { + if (buffer_size < 0) { + UNREACHABLE(); + return 0; + } + const u32 l2 = Common::Log2Ceil64(static_cast(buffer_size)); + OGLBuffer& cp = copy_pbo_cache[l2]; + if (cp.handle == 0) { + const std::size_t ceil_size = 1ULL << l2; + cp.Create(); + cp.MakePersistant(ceil_size); + } + return cp.handle; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 0b333e9e3c..f514f137c0 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/texture_cache/texture_cache.h" @@ -129,7 +130,8 @@ private: class TextureCacheOpenGL final : public TextureCacheBase { public: - explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer); + explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const Device& device); ~TextureCacheOpenGL(); protected: @@ -141,9 +143,14 @@ protected: void ImageBlit(View src_view, View dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) override; + void BufferCopy(Surface src_surface, Surface dst_surface) override; + private: + GLuint FetchPBO(std::size_t buffer_size); + OGLFramebuffer src_framebuffer; OGLFramebuffer dst_framebuffer; + std::unordered_map copy_pbo_cache; }; } // namespace OpenGL diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 69ef7a2bd1..e0d0e1f700 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -214,6 +214,13 @@ public: } protected: + // This structure is used for communicating with the backend, on which behaviors + // it supports and what not, to avoid assuming certain things about hardware. + // The backend is RESPONSIBLE for filling this settings on creation. + struct Support { + bool depth_color_image_copies; + } support_info; + TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { @@ -233,6 +240,10 @@ protected: virtual void ImageBlit(TView src_view, TView dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) = 0; + // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture + // and reading it from a sepparate buffer. + virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0; + void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -377,9 +388,14 @@ private: const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); - std::vector bricks = current_surface->BreakDown(params); - for (auto& brick : bricks) { - ImageCopy(current_surface, new_surface, brick); + const auto& cr_params = current_surface->GetSurfaceParams(); + if (!support_info.depth_color_image_copies && cr_params.type != params.type) { + BufferCopy(current_surface, new_surface); + } else { + std::vector bricks = current_surface->BreakDown(params); + for (auto& brick : bricks) { + ImageCopy(current_surface, new_surface, brick); + } } Unregister(current_surface); Register(new_surface); @@ -505,7 +521,8 @@ private: auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + topological_result); } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && @@ -537,7 +554,8 @@ private: for (auto surface : overlaps) { auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + topological_result); } } @@ -555,7 +573,8 @@ private: return *view; } } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = @@ -578,13 +597,15 @@ private: pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } return {current_surface, *view}; } // The next case is unsafe, so if we r in accurate GPU, just skip it if (Settings::values.use_accurate_gpu_emulation) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { @@ -601,7 +622,8 @@ private: } } // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,