video_core: gpu: Refactor out synchronous/asynchronous GPU implementations.

- We must always use a GPU thread now, even with synchronous GPU.
2020-12-11 22:26:14 -08:00 · 2020-12-11 22:26:14 -08:00 · 14c825bd1c
parent 5d4715cc6a
commit 14c825bd1c
10 changed files with 130 additions and 289 deletions
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@ -47,6 +47,7 @@ add_library(video_core STATIC
    engines/shader_bytecode.h
    engines/shader_header.h
    engines/shader_type.h
    framebuffer_config.h
    macro/macro.cpp
    macro/macro.h
    macro/macro_hle.cpp
@ -58,10 +59,6 @@ add_library(video_core STATIC
    fence_manager.h
    gpu.cpp
    gpu.h
    gpu_asynch.cpp
    gpu_asynch.h
    gpu_synch.cpp
    gpu_synch.h
    gpu_thread.cpp
    gpu_thread.h
    guest_driver.cpp
--- a/src/video_core/framebuffer_config.h
+++ b/src/video_core/framebuffer_config.h
@ -0,0 +1,31 @@
 // Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 namespace Tegra {
 /**
 * Struct describing framebuffer configuration
 */
 struct FramebufferConfig {
    enum class PixelFormat : u32 {
        A8B8G8R8_UNORM = 1,
        RGB565_UNORM = 4,
        B8G8R8A8_UNORM = 5,
    };
    VAddr address{};
    u32 offset{};
    u32 width{};
    u32 height{};
    u32 stride{};
    PixelFormat pixel_format{};
    using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
    TransformFlags transform_flags{};
    Common::Rectangle<int> crop_rect;
 };
 } // namespace Tegra
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@ -10,6 +10,7 @@
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/frontend/emu_window.h"
 #include "core/hardware_interrupt_manager.h"
 #include "core/memory.h"
 #include "core/settings.h"
 #include "video_core/engines/fermi_2d.h"
@ -36,7 +37,8 @@ GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
      kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
      maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
      kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
-      shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {}
+      shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
      gpu_thread{system_} {}
 GPU::~GPU() = default;
@ -198,10 +200,6 @@ void GPU::SyncGuestHost() {
    renderer->Rasterizer().SyncGuestHost();
 }
 void GPU::OnCommandListEnd() {
    renderer->Rasterizer().ReleaseFences();
 }
 enum class GpuSemaphoreOperation {
    AcquireEqual = 0x1,
    WriteLong = 0x2,
@ -461,4 +459,72 @@ void GPU::ProcessSemaphoreAcquire() {
    }
 }
 void GPU::Start() {
    gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
    cpu_context = renderer->GetRenderWindow().CreateSharedContext();
    cpu_context->MakeCurrent();
 }
 void GPU::ObtainContext() {
    cpu_context->MakeCurrent();
 }
 void GPU::ReleaseContext() {
    cpu_context->DoneCurrent();
 }
 void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
    gpu_thread.SubmitList(std::move(entries));
 }
 void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
    if (!use_nvdec) {
        return;
    }
    // This condition fires when a video stream ends, clear all intermediary data
    if (entries[0].raw == 0xDEADB33F) {
        cdma_pusher.reset();
        return;
    }
    if (!cdma_pusher) {
        cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
    }
    // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
    // TODO(ameerj): RE proper async nvdec operation
    // gpu_thread.SubmitCommandBuffer(std::move(entries));
    cdma_pusher->Push(std::move(entries));
    cdma_pusher->DispatchCalls();
 }
 void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
    gpu_thread.SwapBuffers(framebuffer);
 }
 void GPU::FlushRegion(VAddr addr, u64 size) {
    gpu_thread.FlushRegion(addr, size);
 }
 void GPU::InvalidateRegion(VAddr addr, u64 size) {
    gpu_thread.InvalidateRegion(addr, size);
 }
 void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
    gpu_thread.FlushAndInvalidateRegion(addr, size);
 }
 void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
    auto& interrupt_manager = system.InterruptManager();
    interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
 }
 void GPU::WaitIdle() const {
    gpu_thread.WaitIdle();
 }
 void GPU::OnCommandListEnd() {
    gpu_thread.OnCommandListEnd();
 }
 } // namespace Tegra
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@ -15,6 +15,8 @@
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/cdma_pusher.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/framebuffer_config.h"
 #include "video_core/gpu_thread.h"
 using CacheAddr = std::uintptr_t;
 [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
@ -101,28 +103,6 @@ enum class DepthFormat : u32 {
 struct CommandListHeader;
 class DebugContext;
 /**
 * Struct describing framebuffer configuration
 */
 struct FramebufferConfig {
    enum class PixelFormat : u32 {
        A8B8G8R8_UNORM = 1,
        RGB565_UNORM = 4,
        B8G8R8A8_UNORM = 5,
    };
    VAddr address;
    u32 offset;
    u32 width;
    u32 height;
    u32 stride;
    PixelFormat pixel_format;
    using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
    TransformFlags transform_flags;
    Common::Rectangle<int> crop_rect;
 };
 namespace Engines {
 class Fermi2D;
 class Maxwell3D;
@ -141,7 +121,7 @@ enum class EngineID {
 class MemoryManager;
-class GPU {
+class GPU final {
 public:
    struct MethodCall {
        u32 method{};
@ -159,7 +139,7 @@ public:
    };
    explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_);
-    virtual ~GPU();
+    ~GPU();
    /// Binds a renderer to the GPU.
    void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
@ -176,7 +156,7 @@ public:
    /// Synchronizes CPU writes with Host GPU memory.
    void SyncGuestHost();
    /// Signal the ending of command list.
-    virtual void OnCommandListEnd();
+    void OnCommandListEnd();
    /// Request a host GPU memory flush from the CPU.
    [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
@ -240,7 +220,7 @@ public:
    }
    // Waits for the GPU to finish working
-    virtual void WaitIdle() const = 0;
+    void WaitIdle() const;
    /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
    void WaitFence(u32 syncpoint_id, u32 value);
@ -330,34 +310,34 @@ public:
    /// Performs any additional setup necessary in order to begin GPU emulation.
    /// This can be used to launch any necessary threads and register any necessary
    /// core timing events.
-    virtual void Start() = 0;
+    void Start();
    /// Obtain the CPU Context
-    virtual void ObtainContext() = 0;
+    void ObtainContext();
    /// Release the CPU Context
-    virtual void ReleaseContext() = 0;
+    void ReleaseContext();
    /// Push GPU command entries to be processed
-    virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
+    void PushGPUEntries(Tegra::CommandList&& entries);
    /// Push GPU command buffer entries to be processed
-    virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0;
+    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries);
    /// Swap buffers (render frame)
-    virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
+    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    virtual void FlushRegion(VAddr addr, u64 size) = 0;
+    void FlushRegion(VAddr addr, u64 size);
    /// Notify rasterizer that any caches of the specified region should be invalidated
-    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+    void InvalidateRegion(VAddr addr, u64 size);
    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size);
 protected:
-    virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
+    void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const;
 private:
    void ProcessBindMethod(const MethodCall& method_call);
@ -426,6 +406,9 @@ private:
    u64 last_flush_fence{};
    std::mutex flush_request_mutex;
    VideoCommon::GPUThread::ThreadManager gpu_thread;
    std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
    const bool is_async;
 };
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@ -1,86 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "core/core.h"
 #include "core/hardware_interrupt_manager.h"
 #include "video_core/gpu_asynch.h"
 #include "video_core/gpu_thread.h"
 #include "video_core/renderer_base.h"
 namespace VideoCommon {
 GPUAsynch::GPUAsynch(Core::System& system_, bool use_nvdec_)
    : GPU{system_, true, use_nvdec_}, gpu_thread{system_} {}
 GPUAsynch::~GPUAsynch() = default;
 void GPUAsynch::Start() {
    gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
    cpu_context = renderer->GetRenderWindow().CreateSharedContext();
    cpu_context->MakeCurrent();
 }
 void GPUAsynch::ObtainContext() {
    cpu_context->MakeCurrent();
 }
 void GPUAsynch::ReleaseContext() {
    cpu_context->DoneCurrent();
 }
 void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
    gpu_thread.SubmitList(std::move(entries));
 }
 void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
    if (!use_nvdec) {
        return;
    }
    // This condition fires when a video stream ends, clear all intermediary data
    if (entries[0].raw == 0xDEADB33F) {
        cdma_pusher.reset();
        return;
    }
    if (!cdma_pusher) {
        cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
    }
    // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
    // TODO(ameerj): RE proper async nvdec operation
    // gpu_thread.SubmitCommandBuffer(std::move(entries));
    cdma_pusher->Push(std::move(entries));
    cdma_pusher->DispatchCalls();
 }
 void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
    gpu_thread.SwapBuffers(framebuffer);
 }
 void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
    gpu_thread.FlushRegion(addr, size);
 }
 void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
    gpu_thread.InvalidateRegion(addr, size);
 }
 void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
    gpu_thread.FlushAndInvalidateRegion(addr, size);
 }
 void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
    auto& interrupt_manager = system.InterruptManager();
    interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
 }
 void GPUAsynch::WaitIdle() const {
    gpu_thread.WaitIdle();
 }
 void GPUAsynch::OnCommandListEnd() {
    gpu_thread.OnCommandListEnd();
 }
 } // namespace VideoCommon
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@ -1,47 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include "video_core/gpu.h"
 #include "video_core/gpu_thread.h"
 namespace Core::Frontend {
 class GraphicsContext;
 }
 namespace VideoCore {
 class RendererBase;
 } // namespace VideoCore
 namespace VideoCommon {
 /// Implementation of GPU interface that runs the GPU asynchronously
 class GPUAsynch final : public Tegra::GPU {
 public:
    explicit GPUAsynch(Core::System& system_, bool use_nvdec_);
    ~GPUAsynch() override;
    void Start() override;
    void ObtainContext() override;
    void ReleaseContext() override;
    void PushGPUEntries(Tegra::CommandList&& entries) override;
    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
    void FlushRegion(VAddr addr, u64 size) override;
    void InvalidateRegion(VAddr addr, u64 size) override;
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
    void WaitIdle() const override;
    void OnCommandListEnd() override;
 protected:
    void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
 private:
    GPUThread::ThreadManager gpu_thread;
    std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
 };
 } // namespace VideoCommon
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@ -1,61 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "video_core/gpu_synch.h"
 #include "video_core/renderer_base.h"
 namespace VideoCommon {
 GPUSynch::GPUSynch(Core::System& system_, bool use_nvdec_) : GPU{system_, false, use_nvdec_} {}
 GPUSynch::~GPUSynch() = default;
 void GPUSynch::Start() {}
 void GPUSynch::ObtainContext() {
    renderer->Context().MakeCurrent();
 }
 void GPUSynch::ReleaseContext() {
    renderer->Context().DoneCurrent();
 }
 void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
    dma_pusher->Push(std::move(entries));
    dma_pusher->DispatchCalls();
 }
 void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
    if (!use_nvdec) {
        return;
    }
    // This condition fires when a video stream ends, clears all intermediary data
    if (entries[0].raw == 0xDEADB33F) {
        cdma_pusher.reset();
        return;
    }
    if (!cdma_pusher) {
        cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
    }
    cdma_pusher->Push(std::move(entries));
    cdma_pusher->DispatchCalls();
 }
 void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
    renderer->SwapBuffers(framebuffer);
 }
 void GPUSynch::FlushRegion(VAddr addr, u64 size) {
    renderer->Rasterizer().FlushRegion(addr, size);
 }
 void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
    renderer->Rasterizer().InvalidateRegion(addr, size);
 }
 void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
    renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
 }
 } // namespace VideoCommon
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@ -1,41 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include "video_core/gpu.h"
 namespace Core::Frontend {
 class GraphicsContext;
 }
 namespace VideoCore {
 class RendererBase;
 } // namespace VideoCore
 namespace VideoCommon {
 /// Implementation of GPU interface that runs the GPU synchronously
 class GPUSynch final : public Tegra::GPU {
 public:
    explicit GPUSynch(Core::System& system_, bool use_nvdec_);
    ~GPUSynch() override;
    void Start() override;
    void ObtainContext() override;
    void ReleaseContext() override;
    void PushGPUEntries(Tegra::CommandList&& entries) override;
    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
    void FlushRegion(VAddr addr, u64 size) override;
    void InvalidateRegion(VAddr addr, u64 size) override;
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
    void WaitIdle() const override {}
 protected:
    void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
                             [[maybe_unused]] u32 value) const override {}
 };
 } // namespace VideoCommon
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@ -10,8 +10,9 @@
 #include <optional>
 #include <thread>
 #include <variant>
 #include "common/threadsafe_queue.h"
-#include "video_core/gpu.h"
+#include "video_core/framebuffer_config.h"
 namespace Tegra {
 struct FramebufferConfig;
@ -25,6 +26,10 @@ class GraphicsContext;
 class System;
 } // namespace Core
 namespace VideoCore {
    class RendererBase;
 } // namespace VideoCore
 namespace VideoCommon::GPUThread {
 /// Command to signal to the GPU thread that processing has ended
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@ -7,8 +7,6 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/settings.h"
 #include "video_core/gpu_asynch.h"
 #include "video_core/gpu_synch.h"
 #include "video_core/renderer_base.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
@ -39,13 +37,9 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
 namespace VideoCore {
 std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
    std::unique_ptr<Tegra::GPU> gpu;
    const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
-    if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
+    std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>(
-        gpu = std::make_unique<VideoCommon::GPUAsynch>(system, use_nvdec);
+        system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec);
    } else {
        gpu = std::make_unique<VideoCommon::GPUSynch>(system, use_nvdec);
    }
    auto context = emu_window.CreateSharedContext();
    const auto scope = context->Acquire();