From da8f17715dbdc7eec92f5f0c11c968a51b86cab4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 16 Feb 2020 09:51:37 -0400 Subject: [PATCH] GPU: Refactor synchronization on Async GPU --- src/video_core/dma_pusher.cpp | 1 + src/video_core/engines/maxwell_3d.cpp | 8 ++++++-- src/video_core/gpu.cpp | 3 +++ src/video_core/gpu.h | 1 + src/video_core/gpu_thread.cpp | 6 +++--- src/video_core/rasterizer_interface.h | 6 ++++++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 16 ++++++++++++++++ src/video_core/renderer_opengl/gl_rasterizer.h | 2 ++ src/video_core/renderer_vulkan/vk_rasterizer.cpp | 14 ++++++++++++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 2 ++ src/video_core/texture_cache/texture_cache.h | 4 ++-- 11 files changed, 56 insertions(+), 7 deletions(-) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 0b77afc717..2516ea9935 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -32,6 +32,7 @@ void DmaPusher::DispatchCalls() { } } gpu.FlushCommands(); + gpu.SyncGuestHost(); } bool DmaPusher::Step() { diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index baa74ad4ca..2298a62738 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -403,9 +403,13 @@ void Maxwell3D::ProcessQueryGet() { "Units other than CROP are unimplemented"); switch (regs.query.query_get.operation) { - case Regs::QueryOperation::Release: - StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); + case Regs::QueryOperation::Release: { + rasterizer.FlushCommands(); + rasterizer.SyncGuestHost(); + const u64 result = regs.query.query_sequence; + StampQueryResult(result, regs.query.query_get.short_query == 0); break; + } case Regs::QueryOperation::Acquire: // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that // matches the current payload. diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index a606f4abdb..13bca5a78d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -142,6 +142,9 @@ void GPU::FlushCommands() { renderer->Rasterizer().FlushCommands(); } +void GPU::SyncGuestHost() { + renderer->Rasterizer().SyncGuestHost(); +} // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. // So the values you see in docs might be multiplied by 4. diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 1a2d747bef..99ed190bcd 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -156,6 +156,7 @@ public: void CallMethod(const MethodCall& method_call); void FlushCommands(); + void SyncGuestHost(); /// Returns a reference to the Maxwell3D GPU engine. Engines::Maxwell3D& Maxwell3D(); diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 10cda686b9..1994d3bb45 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -40,7 +40,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic } else if (const auto data = std::get_if(&next.data)) { renderer.Rasterizer().FlushRegion(data->addr, data->size); } else if (const auto data = std::get_if(&next.data)) { - renderer.Rasterizer().InvalidateRegion(data->addr, data->size); + renderer.Rasterizer().OnCPUWrite(data->addr, data->size); } else if (std::holds_alternative(next.data)) { return; } else { @@ -82,12 +82,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { } void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { - system.Renderer().Rasterizer().InvalidateRegion(addr, size); + system.Renderer().Rasterizer().OnCPUWrite(addr, size); } void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important - InvalidateRegion(addr, size); + system.Renderer().Rasterizer().OnCPUWrite(addr, size); } void ThreadManager::WaitIdle() const { diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 8ae5b9c4e9..0d05a3fc71 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -58,6 +58,12 @@ public: /// Notify rasterizer that any caches of the specified region should be invalidated virtual void InvalidateRegion(VAddr addr, u64 size) = 0; + /// Notify rasterizer that any caches of the specified region are desync with guest + virtual void OnCPUWrite(VAddr addr, u64 size) = 0; + + /// Sync memory between guest and host. + virtual void SyncGuestHost() = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ac4485a18a..5379127454 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -660,6 +660,22 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { query_cache.InvalidateRegion(addr, size); } +void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + if (!addr || !size) { + return; + } + texture_cache.OnCPUWrite(addr, size); + shader_cache.InvalidateRegion(addr, size); + buffer_cache.InvalidateRegion(addr, size); +} + +void RasterizerOpenGL::SyncGuestHost() { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + texture_cache.SyncGuestHost(); + // buffer_cache.SyncGuestHost(); +} + void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { if (Settings::IsGPULevelExtreme()) { FlushRegion(addr, size); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index caea174d2c..a870024c63 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -67,6 +67,8 @@ public: void FlushAll() override; void FlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; + void OnCPUWrite(VAddr addr, u64 size) override; + void SyncGuestHost() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushCommands() override; void TickFrame() override; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 71007bbe85..ad59f558de 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -524,6 +524,20 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { query_cache.InvalidateRegion(addr, size); } +void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { + if (!addr || !size) { + return; + } + texture_cache.OnCPUWrite(addr, size); + pipeline_cache.InvalidateRegion(addr, size); + buffer_cache.InvalidateRegion(addr, size); +} + +void RasterizerVulkan::SyncGuestHost() { + texture_cache.SyncGuestHost(); + // buffer_cache.SyncGuestHost(); +} + void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { FlushRegion(addr, size); InvalidateRegion(addr, size); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index d9108f862a..285f731bce 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -119,6 +119,8 @@ public: void FlushAll() override; void FlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; + void OnCPUWrite(VAddr addr, u64 size) override; + void SyncGuestHost() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushCommands() override; void TickFrame() override; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 441fda53d8..c23b9f9b9f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -63,7 +63,7 @@ public: } } - void OnCPUWrite(CacheAddr addr, std::size_t size) { + void OnCPUWrite(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; for (const auto& surface : GetSurfacesInRegion(addr, size)) { @@ -549,7 +549,7 @@ private: } const auto& final_params = new_surface->GetSurfaceParams(); if (cr_params.type != final_params.type) { - if (Settings::values.use_accurate_gpu_emulation) { + if (Settings::IsGPULevelExtreme()) { BufferCopy(current_surface, new_surface); } } else {