forked from ShuriZma/suyu
1
0
Fork 0

video_core: gpu: Refactor out synchronous/asynchronous GPU implementations.

- We must always use a GPU thread now, even with synchronous GPU.
This commit is contained in:
bunnei 2020-12-11 22:26:14 -08:00
parent 5d4715cc6a
commit 14c825bd1c
10 changed files with 130 additions and 289 deletions

View File

@ -47,6 +47,7 @@ add_library(video_core STATIC
engines/shader_bytecode.h engines/shader_bytecode.h
engines/shader_header.h engines/shader_header.h
engines/shader_type.h engines/shader_type.h
framebuffer_config.h
macro/macro.cpp macro/macro.cpp
macro/macro.h macro/macro.h
macro/macro_hle.cpp macro/macro_hle.cpp
@ -58,10 +59,6 @@ add_library(video_core STATIC
fence_manager.h fence_manager.h
gpu.cpp gpu.cpp
gpu.h gpu.h
gpu_asynch.cpp
gpu_asynch.h
gpu_synch.cpp
gpu_synch.h
gpu_thread.cpp gpu_thread.cpp
gpu_thread.h gpu_thread.h
guest_driver.cpp guest_driver.cpp

View File

@ -0,0 +1,31 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
namespace Tegra {
/**
* Struct describing framebuffer configuration
*/
struct FramebufferConfig {
enum class PixelFormat : u32 {
A8B8G8R8_UNORM = 1,
RGB565_UNORM = 4,
B8G8R8A8_UNORM = 5,
};
VAddr address{};
u32 offset{};
u32 width{};
u32 height{};
u32 stride{};
PixelFormat pixel_format{};
using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
TransformFlags transform_flags{};
Common::Rectangle<int> crop_rect;
};
} // namespace Tegra

View File

@ -10,6 +10,7 @@
#include "core/core_timing.h" #include "core/core_timing.h"
#include "core/core_timing_util.h" #include "core/core_timing_util.h"
#include "core/frontend/emu_window.h" #include "core/frontend/emu_window.h"
#include "core/hardware_interrupt_manager.h"
#include "core/memory.h" #include "core/memory.h"
#include "core/settings.h" #include "core/settings.h"
#include "video_core/engines/fermi_2d.h" #include "video_core/engines/fermi_2d.h"
@ -36,7 +37,8 @@ GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {} shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
gpu_thread{system_} {}
GPU::~GPU() = default; GPU::~GPU() = default;
@ -198,10 +200,6 @@ void GPU::SyncGuestHost() {
renderer->Rasterizer().SyncGuestHost(); renderer->Rasterizer().SyncGuestHost();
} }
void GPU::OnCommandListEnd() {
renderer->Rasterizer().ReleaseFences();
}
enum class GpuSemaphoreOperation { enum class GpuSemaphoreOperation {
AcquireEqual = 0x1, AcquireEqual = 0x1,
WriteLong = 0x2, WriteLong = 0x2,
@ -461,4 +459,72 @@ void GPU::ProcessSemaphoreAcquire() {
} }
} }
void GPU::Start() {
gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
cpu_context = renderer->GetRenderWindow().CreateSharedContext();
cpu_context->MakeCurrent();
}
void GPU::ObtainContext() {
cpu_context->MakeCurrent();
}
void GPU::ReleaseContext() {
cpu_context->DoneCurrent();
}
void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
}
void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
if (!use_nvdec) {
return;
}
// This condition fires when a video stream ends, clear all intermediary data
if (entries[0].raw == 0xDEADB33F) {
cdma_pusher.reset();
return;
}
if (!cdma_pusher) {
cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
}
// SubmitCommandBuffer would make the nvdec operations async, this is not currently working
// TODO(ameerj): RE proper async nvdec operation
// gpu_thread.SubmitCommandBuffer(std::move(entries));
cdma_pusher->Push(std::move(entries));
cdma_pusher->DispatchCalls();
}
void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
gpu_thread.SwapBuffers(framebuffer);
}
void GPU::FlushRegion(VAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size);
}
void GPU::InvalidateRegion(VAddr addr, u64 size) {
gpu_thread.InvalidateRegion(addr, size);
}
void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
}
void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
auto& interrupt_manager = system.InterruptManager();
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
}
void GPU::WaitIdle() const {
gpu_thread.WaitIdle();
}
void GPU::OnCommandListEnd() {
gpu_thread.OnCommandListEnd();
}
} // namespace Tegra } // namespace Tegra

View File

@ -15,6 +15,8 @@
#include "core/hle/service/nvflinger/buffer_queue.h" #include "core/hle/service/nvflinger/buffer_queue.h"
#include "video_core/cdma_pusher.h" #include "video_core/cdma_pusher.h"
#include "video_core/dma_pusher.h" #include "video_core/dma_pusher.h"
#include "video_core/framebuffer_config.h"
#include "video_core/gpu_thread.h"
using CacheAddr = std::uintptr_t; using CacheAddr = std::uintptr_t;
[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
@ -101,28 +103,6 @@ enum class DepthFormat : u32 {
struct CommandListHeader; struct CommandListHeader;
class DebugContext; class DebugContext;
/**
* Struct describing framebuffer configuration
*/
struct FramebufferConfig {
enum class PixelFormat : u32 {
A8B8G8R8_UNORM = 1,
RGB565_UNORM = 4,
B8G8R8A8_UNORM = 5,
};
VAddr address;
u32 offset;
u32 width;
u32 height;
u32 stride;
PixelFormat pixel_format;
using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
TransformFlags transform_flags;
Common::Rectangle<int> crop_rect;
};
namespace Engines { namespace Engines {
class Fermi2D; class Fermi2D;
class Maxwell3D; class Maxwell3D;
@ -141,7 +121,7 @@ enum class EngineID {
class MemoryManager; class MemoryManager;
class GPU { class GPU final {
public: public:
struct MethodCall { struct MethodCall {
u32 method{}; u32 method{};
@ -159,7 +139,7 @@ public:
}; };
explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_);
virtual ~GPU(); ~GPU();
/// Binds a renderer to the GPU. /// Binds a renderer to the GPU.
void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
@ -176,7 +156,7 @@ public:
/// Synchronizes CPU writes with Host GPU memory. /// Synchronizes CPU writes with Host GPU memory.
void SyncGuestHost(); void SyncGuestHost();
/// Signal the ending of command list. /// Signal the ending of command list.
virtual void OnCommandListEnd(); void OnCommandListEnd();
/// Request a host GPU memory flush from the CPU. /// Request a host GPU memory flush from the CPU.
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
@ -240,7 +220,7 @@ public:
} }
// Waits for the GPU to finish working // Waits for the GPU to finish working
virtual void WaitIdle() const = 0; void WaitIdle() const;
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
void WaitFence(u32 syncpoint_id, u32 value); void WaitFence(u32 syncpoint_id, u32 value);
@ -330,34 +310,34 @@ public:
/// Performs any additional setup necessary in order to begin GPU emulation. /// Performs any additional setup necessary in order to begin GPU emulation.
/// This can be used to launch any necessary threads and register any necessary /// This can be used to launch any necessary threads and register any necessary
/// core timing events. /// core timing events.
virtual void Start() = 0; void Start();
/// Obtain the CPU Context /// Obtain the CPU Context
virtual void ObtainContext() = 0; void ObtainContext();
/// Release the CPU Context /// Release the CPU Context
virtual void ReleaseContext() = 0; void ReleaseContext();
/// Push GPU command entries to be processed /// Push GPU command entries to be processed
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; void PushGPUEntries(Tegra::CommandList&& entries);
/// Push GPU command buffer entries to be processed /// Push GPU command buffer entries to be processed
virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0; void PushCommandBuffer(Tegra::ChCommandHeaderList& entries);
/// Swap buffers (render frame) /// Swap buffers (render frame)
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(VAddr addr, u64 size) = 0; void FlushRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be invalidated /// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(VAddr addr, u64 size) = 0; void InvalidateRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; void FlushAndInvalidateRegion(VAddr addr, u64 size);
protected: protected:
virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const;
private: private:
void ProcessBindMethod(const MethodCall& method_call); void ProcessBindMethod(const MethodCall& method_call);
@ -426,6 +406,9 @@ private:
u64 last_flush_fence{}; u64 last_flush_fence{};
std::mutex flush_request_mutex; std::mutex flush_request_mutex;
VideoCommon::GPUThread::ThreadManager gpu_thread;
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
const bool is_async; const bool is_async;
}; };

View File

@ -1,86 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/core.h"
#include "core/hardware_interrupt_manager.h"
#include "video_core/gpu_asynch.h"
#include "video_core/gpu_thread.h"
#include "video_core/renderer_base.h"
namespace VideoCommon {
GPUAsynch::GPUAsynch(Core::System& system_, bool use_nvdec_)
: GPU{system_, true, use_nvdec_}, gpu_thread{system_} {}
GPUAsynch::~GPUAsynch() = default;
void GPUAsynch::Start() {
gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
cpu_context = renderer->GetRenderWindow().CreateSharedContext();
cpu_context->MakeCurrent();
}
void GPUAsynch::ObtainContext() {
cpu_context->MakeCurrent();
}
void GPUAsynch::ReleaseContext() {
cpu_context->DoneCurrent();
}
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
}
void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
if (!use_nvdec) {
return;
}
// This condition fires when a video stream ends, clear all intermediary data
if (entries[0].raw == 0xDEADB33F) {
cdma_pusher.reset();
return;
}
if (!cdma_pusher) {
cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
}
// SubmitCommandBuffer would make the nvdec operations async, this is not currently working
// TODO(ameerj): RE proper async nvdec operation
// gpu_thread.SubmitCommandBuffer(std::move(entries));
cdma_pusher->Push(std::move(entries));
cdma_pusher->DispatchCalls();
}
void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
gpu_thread.SwapBuffers(framebuffer);
}
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size);
}
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
gpu_thread.InvalidateRegion(addr, size);
}
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
}
void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
auto& interrupt_manager = system.InterruptManager();
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
}
void GPUAsynch::WaitIdle() const {
gpu_thread.WaitIdle();
}
void GPUAsynch::OnCommandListEnd() {
gpu_thread.OnCommandListEnd();
}
} // namespace VideoCommon

View File

@ -1,47 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
namespace Core::Frontend {
class GraphicsContext;
}
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU asynchronously
class GPUAsynch final : public Tegra::GPU {
public:
explicit GPUAsynch(Core::System& system_, bool use_nvdec_);
~GPUAsynch() override;
void Start() override;
void ObtainContext() override;
void ReleaseContext() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitIdle() const override;
void OnCommandListEnd() override;
protected:
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
private:
GPUThread::ThreadManager gpu_thread;
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
};
} // namespace VideoCommon

View File

@ -1,61 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/gpu_synch.h"
#include "video_core/renderer_base.h"
namespace VideoCommon {
GPUSynch::GPUSynch(Core::System& system_, bool use_nvdec_) : GPU{system_, false, use_nvdec_} {}
GPUSynch::~GPUSynch() = default;
void GPUSynch::Start() {}
void GPUSynch::ObtainContext() {
renderer->Context().MakeCurrent();
}
void GPUSynch::ReleaseContext() {
renderer->Context().DoneCurrent();
}
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
dma_pusher->Push(std::move(entries));
dma_pusher->DispatchCalls();
}
void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
if (!use_nvdec) {
return;
}
// This condition fires when a video stream ends, clears all intermediary data
if (entries[0].raw == 0xDEADB33F) {
cdma_pusher.reset();
return;
}
if (!cdma_pusher) {
cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
}
cdma_pusher->Push(std::move(entries));
cdma_pusher->DispatchCalls();
}
void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
renderer->SwapBuffers(framebuffer);
}
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
renderer->Rasterizer().FlushRegion(addr, size);
}
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
renderer->Rasterizer().InvalidateRegion(addr, size);
}
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
}
} // namespace VideoCommon

View File

@ -1,41 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/gpu.h"
namespace Core::Frontend {
class GraphicsContext;
}
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU synchronously
class GPUSynch final : public Tegra::GPU {
public:
explicit GPUSynch(Core::System& system_, bool use_nvdec_);
~GPUSynch() override;
void Start() override;
void ObtainContext() override;
void ReleaseContext() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitIdle() const override {}
protected:
void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
[[maybe_unused]] u32 value) const override {}
};
} // namespace VideoCommon

View File

@ -10,8 +10,9 @@
#include <optional> #include <optional>
#include <thread> #include <thread>
#include <variant> #include <variant>
#include "common/threadsafe_queue.h" #include "common/threadsafe_queue.h"
#include "video_core/gpu.h" #include "video_core/framebuffer_config.h"
namespace Tegra { namespace Tegra {
struct FramebufferConfig; struct FramebufferConfig;
@ -25,6 +26,10 @@ class GraphicsContext;
class System; class System;
} // namespace Core } // namespace Core
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace VideoCommon::GPUThread { namespace VideoCommon::GPUThread {
/// Command to signal to the GPU thread that processing has ended /// Command to signal to the GPU thread that processing has ended

View File

@ -7,8 +7,6 @@
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/core.h" #include "core/core.h"
#include "core/settings.h" #include "core/settings.h"
#include "video_core/gpu_asynch.h"
#include "video_core/gpu_synch.h"
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/renderer_vulkan.h"
@ -39,13 +37,9 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
namespace VideoCore { namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
std::unique_ptr<Tegra::GPU> gpu;
const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>(
gpu = std::make_unique<VideoCommon::GPUAsynch>(system, use_nvdec); system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec);
} else {
gpu = std::make_unique<VideoCommon::GPUSynch>(system, use_nvdec);
}
auto context = emu_window.CreateSharedContext(); auto context = emu_window.CreateSharedContext();
const auto scope = context->Acquire(); const auto scope = context->Acquire();