From d52bf795e4442ae9dd9722e79ef050ed5dda720c Mon Sep 17 00:00:00 2001 From: Stenzek Date: Fri, 17 Jan 2025 19:22:11 +1000 Subject: [PATCH] GPU: Split backend into Backend+Presenter --- src/core/CMakeLists.txt | 2 + src/core/core.vcxproj | 2 + src/core/core.vcxproj.filters | 2 + src/core/fullscreen_ui.cpp | 4 +- src/core/gpu.cpp | 2 +- src/core/gpu_backend.cpp | 1265 ++++------------------ src/core/gpu_backend.h | 111 +- src/core/gpu_hw.cpp | 76 +- src/core/gpu_hw.h | 2 +- src/core/gpu_presenter.cpp | 943 ++++++++++++++++ src/core/gpu_presenter.h | 124 +++ src/core/gpu_sw.cpp | 27 +- src/core/gpu_sw.h | 2 +- src/core/gpu_thread.cpp | 248 ++--- src/core/gpu_thread.h | 3 - src/core/system.cpp | 10 +- src/core/system.h | 2 +- src/duckstation-regtest/regtest_host.cpp | 96 +- 18 files changed, 1532 insertions(+), 1389 deletions(-) create mode 100644 src/core/gpu_presenter.cpp create mode 100644 src/core/gpu_presenter.h diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 98ebc33e4..09b276e3c 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -57,6 +57,8 @@ add_library(core gpu_hw_shadergen.h gpu_hw_texture_cache.cpp gpu_hw_texture_cache.h + gpu_presenter.cpp + gpu_presenter.h gpu_shadergen.cpp gpu_shadergen.h gpu_sw.cpp diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index 5ea3ff393..773874fc1 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -38,6 +38,7 @@ + @@ -116,6 +117,7 @@ + diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index b279d9b70..f935c9701 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -66,6 +66,7 @@ + @@ -142,6 +143,7 @@ + diff --git a/src/core/fullscreen_ui.cpp b/src/core/fullscreen_ui.cpp index c1718cb78..e61b824d6 100644 --- a/src/core/fullscreen_ui.cpp +++ b/src/core/fullscreen_ui.cpp @@ -8329,8 +8329,8 @@ LoadingScreenProgressCallback::~LoadingScreenProgressCallback() } else { - // since this was pushing frames, we need to restore the context - GPUThread::Internal::RestoreContextAfterPresent(); + // since this was pushing frames, we need to restore the context. do that by pushing a frame ourselves + GPUThread::Internal::DoRunIdle(); } } diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 7a61ef828..f4d5d076d 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -2062,7 +2062,7 @@ bool GPU::StartRecordingGPUDump(const char* path, u32 num_frames /* = 1 */) // save screenshot to same location to identify it GPUBackend::RenderScreenshotToFile(Path::ReplaceExtension(path, "png"), DisplayScreenshotMode::ScreenResolution, 85, - true, false); + false); return true; } diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp index 458b867ef..a1bbe8e04 100644 --- a/src/core/gpu_backend.cpp +++ b/src/core/gpu_backend.cpp @@ -1,9 +1,9 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "gpu_backend.h" #include "gpu.h" -#include "gpu_shadergen.h" +#include "gpu_presenter.h" #include "gpu_sw_rasterizer.h" #include "gpu_thread.h" #include "host.h" @@ -14,29 +14,19 @@ #include "system_private.h" #include "util/gpu_device.h" -#include "util/image.h" #include "util/imgui_manager.h" -#include "util/media_capture.h" -#include "util/postprocessing.h" #include "util/state_wrapper.h" -#include "common/align.h" #include "common/error.h" #include "common/file_system.h" -#include "common/gsvector_formatter.h" #include "common/log.h" #include "common/path.h" -#include "common/small_string.h" -#include "common/string_util.h" -#include "common/timer.h" +#include "common/threading.h" #include "IconsEmoji.h" #include "IconsFontAwesome5.h" #include "fmt/format.h" -#include -#include - LOG_CHANNEL(GPU); namespace { @@ -70,77 +60,75 @@ struct ALIGN_TO_CACHE_LINE CPUThreadState } // namespace -static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string path, FileSystem::ManagedCFilePtr fp, - u8 quality, bool clear_alpha, bool flip_y, Image image, std::string osd_key); - -static constexpr GPUTexture::Format DISPLAY_INTERNAL_POSTFX_FORMAT = GPUTexture::Format::RGBA8; - static Counters s_counters = {}; static Stats s_stats = {}; + static CPUThreadState s_cpu_thread_state = {}; -GPUBackend::GPUBackend() +GPUBackend::GPUBackend(GPUPresenter& presenter) : m_presenter(presenter) { GPU_SW_Rasterizer::SelectImplementation(); ResetStatistics(); - - // Should be zero. - Assert(s_cpu_thread_state.queued_frames.load(std::memory_order_acquire) == 0); - Assert(!s_cpu_thread_state.waiting_for_gpu_thread.load(std::memory_order_acquire)); } GPUBackend::~GPUBackend() { - DestroyDeinterlaceTextures(); - g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); + m_presenter.ClearDisplayTexture(); +} + +void GPUBackend::SetScreenQuadInputLayout(GPUPipeline::GraphicsConfig& config) +{ + static constexpr GPUPipeline::VertexAttribute screen_vertex_attributes[] = { + GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0, + GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, x)), + GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0, + GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, u)), + }; + + // common state + config.input_layout.vertex_attributes = screen_vertex_attributes; + config.input_layout.vertex_stride = sizeof(ScreenVertex); + config.primitive = GPUPipeline::Primitive::TriangleStrips; +} + +GSVector4 GPUBackend::GetScreenQuadClipSpaceCoordinates(const GSVector4i bounds, const GSVector2i rt_size) +{ + const GSVector4 fboundsxxyy = GSVector4(bounds.xzyw()); + const GSVector2 fsize = GSVector2(rt_size); + const GSVector2 x = ((fboundsxxyy.xy() * GSVector2::cxpr(2.0f)) / fsize.xx()) - GSVector2::cxpr(1.0f); + const GSVector2 y = GSVector2::cxpr(1.0f) - (GSVector2::cxpr(2.0f) * (fboundsxxyy.zw() / fsize.yy())); + return GSVector4::xyxy(x, y).xzyw(); +} + +void GPUBackend::DrawScreenQuad(const GSVector4i bounds, const GSVector2i rt_size, + const GSVector4 uv_bounds /* = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f) */) +{ + const GSVector4 xy = GetScreenQuadClipSpaceCoordinates(bounds, rt_size); + + ScreenVertex* vertices; + u32 space; + u32 base_vertex; + g_gpu_device->MapVertexBuffer(sizeof(ScreenVertex), 4, reinterpret_cast(&vertices), &space, &base_vertex); + + vertices[0].Set(xy.xy(), uv_bounds.xy()); + vertices[1].Set(xy.zyzw().xy(), uv_bounds.zyzw().xy()); + vertices[2].Set(xy.xwzw().xy(), uv_bounds.xwzw().xy()); + vertices[3].Set(xy.zw(), uv_bounds.zw()); + + g_gpu_device->UnmapVertexBuffer(sizeof(ScreenVertex), 4); + g_gpu_device->Draw(4, base_vertex); } bool GPUBackend::Initialize(bool clear_vram, Error* error) { m_clamped_drawing_area = GPU::GetClampedDrawingArea(GPU_SW_Rasterizer::g_drawing_area); - - if (!CompileDisplayPipelines(true, true, g_gpu_settings.display_24bit_chroma_smoothing, error)) - return false; - return true; } void GPUBackend::UpdateSettings(const GPUSettings& old_settings) { - FlushRender(); - if (g_gpu_settings.display_show_gpu_stats != old_settings.display_show_gpu_stats) GPUBackend::ResetStatistics(); - - if (g_gpu_settings.display_scaling != old_settings.display_scaling || - g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || - g_gpu_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing) - { - // Toss buffers on mode change. - if (g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode) - DestroyDeinterlaceTextures(); - - if (!CompileDisplayPipelines( - g_gpu_settings.display_scaling != old_settings.display_scaling, - g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode, - g_gpu_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing, nullptr)) - { - Panic("Failed to compile display pipeline on settings change."); - } - } -} - -void GPUBackend::UpdateResolutionScale() -{ -} - -u32 GPUBackend::GetResolutionScale() const -{ - return 1u; -} - -void GPUBackend::RestoreDeviceContext() -{ } GPUThreadCommand* GPUBackend::NewClearVRAMCommand() @@ -325,6 +313,21 @@ u32 GPUBackend::GetQueuedFrameCount() return s_cpu_thread_state.queued_frames.load(std::memory_order_acquire); } +void GPUBackend::ReleaseQueuedFrame() +{ + s_cpu_thread_state.queued_frames.fetch_sub(1, std::memory_order_acq_rel); + + bool expected = true; + if (s_cpu_thread_state.waiting_for_gpu_thread.compare_exchange_strong(expected, false, std::memory_order_acq_rel, + std::memory_order_relaxed)) + { + if (g_settings.gpu_max_queued_frames > 0) + DEV_LOG("--> Unblocking CPU thread"); + + s_cpu_thread_state.gpu_thread_wait.Post(); + } +} + bool GPUBackend::AllocateMemorySaveStates(std::span states, Error* error) { bool result; @@ -401,7 +404,7 @@ void GPUBackend::HandleCommand(const GPUThreadCommand* cmd) case GPUBackendCommandType::ClearDisplay: { - ClearDisplay(); + m_presenter.ClearDisplay(); } break; @@ -529,194 +532,14 @@ void GPUBackend::HandleCommand(const GPUThreadCommand* cmd) } } -bool GPUBackend::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing, Error* error) -{ - const GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, - g_gpu_device->GetFeatures().framebuffer_fetch); - - GPUPipeline::GraphicsConfig plconfig; - plconfig.primitive = GPUPipeline::Primitive::Triangles; - plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); - plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); - plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); - plconfig.geometry_shader = nullptr; - plconfig.depth_format = GPUTexture::Format::Unknown; - plconfig.samples = 1; - plconfig.per_sample_shading = false; - plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; - - if (display) - { - SetScreenQuadInputLayout(plconfig); - - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.SetTargetFormats(g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : - GPUTexture::Format::RGBA8); - - std::string vs = shadergen.GenerateDisplayVertexShader(); - std::string fs; - switch (g_gpu_settings.display_scaling) - { - case DisplayScalingMode::BilinearSharp: - fs = shadergen.GenerateDisplaySharpBilinearFragmentShader(); - break; - - case DisplayScalingMode::BilinearSmooth: - case DisplayScalingMode::BilinearInteger: - fs = shadergen.GenerateDisplayFragmentShader(true, false); - break; - - case DisplayScalingMode::Nearest: - case DisplayScalingMode::NearestInteger: - default: - fs = shadergen.GenerateDisplayFragmentShader(false, true); - break; - } - - std::unique_ptr vso = - g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs, error); - std::unique_ptr fso = - g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), fs, error); - if (!vso || !fso) - return false; - GL_OBJECT_NAME(vso, "Display Vertex Shader"); - GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", - Settings::GetDisplayScalingName(g_gpu_settings.display_scaling)); - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]", - Settings::GetDisplayScalingName(g_gpu_settings.display_scaling)); - } - - plconfig.input_layout = {}; - plconfig.primitive = GPUPipeline::Primitive::Triangles; - - if (deinterlace) - { - std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), - shadergen.GenerateScreenQuadVertexShader(), error); - if (!vso) - return false; - GL_OBJECT_NAME(vso, "Deinterlace Vertex Shader"); - - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.vertex_shader = vso.get(); - plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); - - switch (g_gpu_settings.display_deinterlacing_mode) - { - case DisplayDeinterlacingMode::Disabled: - case DisplayDeinterlacingMode::Progressive: - break; - - case DisplayDeinterlacingMode::Weave: - { - std::unique_ptr fso = g_gpu_device->CreateShader( - GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateDeinterlaceWeaveFragmentShader(), error); - if (!fso) - return false; - - GL_OBJECT_NAME(fso, "Weave Deinterlace Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - - GL_OBJECT_NAME(m_deinterlace_pipeline, "Weave Deinterlace Pipeline"); - } - break; - - case DisplayDeinterlacingMode::Blend: - { - std::unique_ptr fso = g_gpu_device->CreateShader( - GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateDeinterlaceBlendFragmentShader(), error); - if (!fso) - return false; - - GL_OBJECT_NAME(fso, "Blend Deinterlace Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - - GL_OBJECT_NAME(m_deinterlace_pipeline, "Blend Deinterlace Pipeline"); - } - break; - - case DisplayDeinterlacingMode::Adaptive: - { - std::unique_ptr fso = - g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateFastMADReconstructFragmentShader(), error); - if (!fso) - return false; - - GL_OBJECT_NAME(fso, "FastMAD Reconstruct Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - - GL_OBJECT_NAME(m_deinterlace_pipeline, "FastMAD Reconstruct Pipeline"); - } - break; - - default: - UnreachableCode(); - } - } - - if (chroma_smoothing) - { - m_chroma_smoothing_pipeline.reset(); - g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); - - if (g_gpu_settings.display_24bit_chroma_smoothing) - { - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); - - std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), - shadergen.GenerateScreenQuadVertexShader(), error); - std::unique_ptr fso = g_gpu_device->CreateShader( - GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateChromaSmoothingFragmentShader(), error); - if (!vso || !fso) - return false; - GL_OBJECT_NAME(vso, "Chroma Smoothing Vertex Shader"); - GL_OBJECT_NAME(fso, "Chroma Smoothing Fragment Shader"); - - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_chroma_smoothing_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - GL_OBJECT_NAME(m_chroma_smoothing_pipeline, "Chroma Smoothing Pipeline"); - } - } - - return true; -} - void GPUBackend::HandleUpdateDisplayCommand(const GPUBackendUpdateDisplayCommand* cmd) { // Height has to be doubled because we halved it on the GPU side. - const GPUBackendUpdateDisplayCommand* ccmd = static_cast(cmd); - m_display_width = ccmd->display_width; - m_display_height = ccmd->display_height; - m_display_origin_left = ccmd->display_origin_left; - m_display_origin_top = ccmd->display_origin_top; - m_display_vram_width = ccmd->display_vram_width; - m_display_vram_height = (ccmd->display_vram_height << BoolToUInt32(ccmd->interlaced_display_enabled)); - m_display_pixel_aspect_ratio = ccmd->display_pixel_aspect_ratio; - - UpdateDisplay(ccmd); + m_presenter.SetDisplayParameters( + cmd->display_width, cmd->display_height, cmd->display_origin_left, cmd->display_origin_top, cmd->display_vram_width, + cmd->display_vram_height << BoolToUInt32(cmd->interlaced_display_enabled), cmd->display_pixel_aspect_ratio); + UpdateDisplay(cmd); if (cmd->submit_frame) HandleSubmitFrameCommand(&cmd->frame); } @@ -727,23 +550,15 @@ void GPUBackend::HandleSubmitFrameCommand(const GPUBackendFramePresentationParam Host::FrameDoneOnGPUThread(this, cmd->frame_number); if (cmd->media_capture) - SendDisplayToMediaCapture(cmd->media_capture); + m_presenter.SendDisplayToMediaCapture(cmd->media_capture); + // If this returns false, our backend object is deleted and replaced with null, so bail out. if (cmd->present_frame) { - GPUThread::Internal::PresentFrame(cmd->allow_present_skip, cmd->present_time); - - s_cpu_thread_state.queued_frames.fetch_sub(1, std::memory_order_acq_rel); - - bool expected = true; - if (s_cpu_thread_state.waiting_for_gpu_thread.compare_exchange_strong(expected, false, std::memory_order_acq_rel, - std::memory_order_relaxed)) - { - if (g_settings.gpu_max_queued_frames > 0) - DEV_LOG("--> Unblocking CPU thread"); - - s_cpu_thread_state.gpu_thread_wait.Post(); - } + const bool result = m_presenter.PresentFrame(&m_presenter, this, cmd->allow_present_skip, cmd->present_time); + ReleaseQueuedFrame(); + if (!result) + return; } // Update perf counters *after* throttling, we want to measure from start-of-frame @@ -751,817 +566,8 @@ void GPUBackend::HandleSubmitFrameCommand(const GPUBackendFramePresentationParam // amounts of computation happening in each frame). if (cmd->update_performance_counters) PerformanceCounters::Update(this, cmd->frame_number, cmd->internal_frame_number); -} - -void GPUBackend::ClearDisplay() -{ - ClearDisplayTexture(); - - // Just recycle the textures, it'll get re-fetched. - DestroyDeinterlaceTextures(); -} - -void GPUBackend::ClearDisplayTexture() -{ - m_display_texture = nullptr; - m_display_texture_view_x = 0; - m_display_texture_view_y = 0; - m_display_texture_view_width = 0; - m_display_texture_view_height = 0; -} - -void GPUBackend::SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, - s32 view_width, s32 view_height) -{ - DebugAssert(texture); - - if (g_gpu_settings.display_auto_resize_window && - (view_width != m_display_texture_view_width || view_height != m_display_texture_view_height)) - { - Host::RunOnCPUThread([]() { System::RequestDisplaySize(); }); - } - - m_display_texture = texture; - m_display_depth_buffer = depth_buffer; - m_display_texture_view_x = view_x; - m_display_texture_view_y = view_y; - m_display_texture_view_width = view_width; - m_display_texture_view_height = view_height; -} - -GPUDevice::PresentResult GPUBackend::PresentDisplay() -{ - FlushRender(); - - if (!g_gpu_device->HasMainSwapChain()) - return GPUDevice::PresentResult::SkipPresent; - - GSVector4i display_rect; - GSVector4i draw_rect; - CalculateDrawRect(g_gpu_device->GetMainSwapChain()->GetWidth(), g_gpu_device->GetMainSwapChain()->GetHeight(), - !g_gpu_settings.gpu_show_vram, true, &display_rect, &draw_rect); - return RenderDisplay(nullptr, display_rect, draw_rect, !g_gpu_settings.gpu_show_vram); -} - -GPUDevice::PresentResult GPUBackend::RenderDisplay(GPUTexture* target, const GSVector4i display_rect, - const GSVector4i draw_rect, bool postfx) -{ - GL_SCOPE_FMT("RenderDisplay: {}", draw_rect); - - if (m_display_texture) - m_display_texture->MakeReadyForSampling(); - - // Internal post-processing. - GPUTexture* display_texture = m_display_texture; - s32 display_texture_view_x = m_display_texture_view_x; - s32 display_texture_view_y = m_display_texture_view_y; - s32 display_texture_view_width = m_display_texture_view_width; - s32 display_texture_view_height = m_display_texture_view_height; - if (postfx && display_texture && PostProcessing::InternalChain.IsActive() && - PostProcessing::InternalChain.CheckTargets(DISPLAY_INTERNAL_POSTFX_FORMAT, display_texture_view_width, - display_texture_view_height)) - { - DebugAssert(display_texture_view_x == 0 && display_texture_view_y == 0 && - static_cast(display_texture->GetWidth()) == display_texture_view_width && - static_cast(display_texture->GetHeight()) == display_texture_view_height); - - // Now we can apply the post chain. - GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture(); - if (PostProcessing::InternalChain.Apply(display_texture, m_display_depth_buffer, post_output_texture, - GSVector4i(0, 0, display_texture_view_width, display_texture_view_height), - display_texture_view_width, display_texture_view_height, m_display_width, - m_display_height) == GPUDevice::PresentResult::OK) - { - display_texture_view_x = 0; - display_texture_view_y = 0; - display_texture = post_output_texture; - display_texture->MakeReadyForSampling(); - } - } - - const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetMainSwapChain()->GetFormat(); - const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetMainSwapChain()->GetWidth(); - const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetMainSwapChain()->GetHeight(); - const bool really_postfx = (postfx && PostProcessing::DisplayChain.IsActive() && g_gpu_device->HasMainSwapChain() && - hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 && - PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height)); - const u32 real_target_width = - (target || really_postfx) ? target_width : g_gpu_device->GetMainSwapChain()->GetPostRotatedWidth(); - const u32 real_target_height = - (target || really_postfx) ? target_height : g_gpu_device->GetMainSwapChain()->GetPostRotatedHeight(); - GSVector4i real_draw_rect = - (target || really_postfx) ? draw_rect : g_gpu_device->GetMainSwapChain()->PreRotateClipRect(draw_rect); - if (really_postfx) - { - g_gpu_device->ClearRenderTarget(PostProcessing::DisplayChain.GetInputTexture(), GPUDevice::DEFAULT_CLEAR_COLOR); - g_gpu_device->SetRenderTarget(PostProcessing::DisplayChain.GetInputTexture()); - } - else - { - if (target) - { - g_gpu_device->SetRenderTarget(target); - } - else - { - const GPUDevice::PresentResult pres = g_gpu_device->BeginPresent(g_gpu_device->GetMainSwapChain()); - if (pres != GPUDevice::PresentResult::OK) - return pres; - } - } - - if (display_texture) - { - bool texture_filter_linear = false; - - struct alignas(16) Uniforms - { - float src_size[4]; - float clamp_rect[4]; - float params[4]; - } uniforms; - std::memset(uniforms.params, 0, sizeof(uniforms.params)); - - switch (g_gpu_settings.display_scaling) - { - case DisplayScalingMode::Nearest: - case DisplayScalingMode::NearestInteger: - break; - - case DisplayScalingMode::BilinearSmooth: - case DisplayScalingMode::BilinearInteger: - texture_filter_linear = true; - break; - - case DisplayScalingMode::BilinearSharp: - { - texture_filter_linear = true; - uniforms.params[0] = std::max( - std::floor(static_cast(draw_rect.width()) / static_cast(m_display_texture_view_width)), 1.0f); - uniforms.params[1] = std::max( - std::floor(static_cast(draw_rect.height()) / static_cast(m_display_texture_view_height)), 1.0f); - uniforms.params[2] = 0.5f - 0.5f / uniforms.params[0]; - uniforms.params[3] = 0.5f - 0.5f / uniforms.params[1]; - } - break; - - default: - UnreachableCode(); - break; - } - - g_gpu_device->SetPipeline(m_display_pipeline.get()); - g_gpu_device->SetTextureSampler( - 0, display_texture, texture_filter_linear ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler()); - - // For bilinear, clamp to 0.5/SIZE-0.5 to avoid bleeding from the adjacent texels in VRAM. This is because - // 1.0 in UV space is not the bottom-right texel, but a mix of the bottom-right and wrapped/next texel. - const GSVector2 display_texture_size = GSVector2(display_texture->GetSizeVec()); - const GSVector4 display_texture_size4 = GSVector4::xyxy(display_texture_size); - const GSVector4 uv_rect = GSVector4(GSVector4i(display_texture_view_x, display_texture_view_y, - display_texture_view_x + display_texture_view_width, - display_texture_view_y + display_texture_view_height)) / - display_texture_size4; - GSVector4::store(uniforms.clamp_rect, - GSVector4(static_cast(display_texture_view_x) + 0.5f, - static_cast(display_texture_view_y) + 0.5f, - static_cast(display_texture_view_x + display_texture_view_width) - 0.5f, - static_cast(display_texture_view_y + display_texture_view_height) - 0.5f) / - display_texture_size4); - GSVector4::store(uniforms.src_size, - GSVector4::xyxy(display_texture_size, GSVector2::cxpr(1.0f) / display_texture_size)); - - g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); - - g_gpu_device->SetViewport(0, 0, real_target_width, real_target_height); - g_gpu_device->SetScissor(g_gpu_device->UsesLowerLeftOrigin() ? - GPUDevice::FlipToLowerLeft(real_draw_rect, real_target_height) : - real_draw_rect); - - ScreenVertex* vertices; - u32 space; - u32 base_vertex; - g_gpu_device->MapVertexBuffer(sizeof(ScreenVertex), 4, reinterpret_cast(&vertices), &space, &base_vertex); - - const WindowInfo::PreRotation surface_prerotation = (target || really_postfx) ? - WindowInfo::PreRotation::Identity : - g_gpu_device->GetMainSwapChain()->GetPreRotation(); - - const DisplayRotation uv_rotation = static_cast( - (static_cast(g_gpu_settings.display_rotation) + static_cast(surface_prerotation)) % - static_cast(DisplayRotation::Count)); - - const GSVector4 xy = - GetScreenQuadClipSpaceCoordinates(real_draw_rect, GSVector2i(real_target_width, real_target_height)); - switch (uv_rotation) - { - case DisplayRotation::Normal: - vertices[0].Set(xy.xy(), uv_rect.xy()); - vertices[1].Set(xy.zyzw().xy(), uv_rect.zyzw().xy()); - vertices[2].Set(xy.xwzw().xy(), uv_rect.xwzw().xy()); - vertices[3].Set(xy.zw(), uv_rect.zw()); - break; - case DisplayRotation::Rotate90: - vertices[0].Set(xy.xy(), uv_rect.xwzw().xy()); - vertices[1].Set(xy.zyzw().xy(), uv_rect.xy()); - vertices[2].Set(xy.xwzw().xy(), uv_rect.zw()); - vertices[3].Set(xy.zw(), uv_rect.zyzw().xy()); - break; - case DisplayRotation::Rotate180: - vertices[0].Set(xy.xy(), uv_rect.xwzw().xy()); - vertices[1].Set(xy.zyzw().xy(), uv_rect.zw()); - vertices[2].Set(xy.xwzw().xy(), uv_rect.xy()); - vertices[3].Set(xy.zw(), uv_rect.zyzw().xy()); - break; - case DisplayRotation::Rotate270: - vertices[0].Set(xy.xy(), uv_rect.zyzw().xy()); - vertices[1].Set(xy.zyzw().xy(), uv_rect.zw()); - vertices[2].Set(xy.xwzw().xy(), uv_rect.xy()); - vertices[3].Set(xy.zw(), uv_rect.xwzw().xy()); - break; - - DefaultCaseIsUnreachable(); - } - - g_gpu_device->UnmapVertexBuffer(sizeof(ScreenVertex), 4); - g_gpu_device->Draw(4, base_vertex); - } - - if (really_postfx) - { - DebugAssert(!g_gpu_settings.gpu_show_vram); - - // "original size" in postfx includes padding. - const float upscale_x = - m_display_texture ? static_cast(m_display_texture_view_width) / static_cast(m_display_vram_width) : - 1.0f; - const float upscale_y = m_display_texture ? static_cast(m_display_texture_view_height) / - static_cast(m_display_vram_height) : - 1.0f; - const s32 orig_width = static_cast(std::ceil(static_cast(m_display_width) * upscale_x)); - const s32 orig_height = static_cast(std::ceil(static_cast(m_display_height) * upscale_y)); - - return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), nullptr, target, - display_rect, orig_width, orig_height, m_display_width, m_display_height); - } - else - { - return GPUDevice::PresentResult::OK; - } -} - -void GPUBackend::SendDisplayToMediaCapture(MediaCapture* cap) -{ - GPUTexture* target = cap->GetRenderTexture(); - if (!target) [[unlikely]] - { - WARNING_LOG("Failed to get video capture render texture."); - Host::RunOnCPUThread(&System::StopMediaCapture); - return; - } - - const bool apply_aspect_ratio = - (g_gpu_settings.display_screenshot_mode != DisplayScreenshotMode::UncorrectedInternalResolution); - const bool postfx = (g_gpu_settings.display_screenshot_mode != DisplayScreenshotMode::InternalResolution); - GSVector4i display_rect, draw_rect; - CalculateDrawRect(target->GetWidth(), target->GetHeight(), !g_gpu_settings.gpu_show_vram, apply_aspect_ratio, - &display_rect, &draw_rect); - - // Not cleared by RenderDisplay(). - g_gpu_device->ClearRenderTarget(target, GPUDevice::DEFAULT_CLEAR_COLOR); - - if (RenderDisplay(target, display_rect, draw_rect, postfx) != GPUDevice::PresentResult::OK || - !cap->DeliverVideoFrame(target)) [[unlikely]] - { - WARNING_LOG("Failed to render/deliver video capture frame."); - Host::RunOnCPUThread(&System::StopMediaCapture); - return; - } -} - -void GPUBackend::DestroyDeinterlaceTextures() -{ - for (std::unique_ptr& tex : m_deinterlace_buffers) - g_gpu_device->RecycleTexture(std::move(tex)); - g_gpu_device->RecycleTexture(std::move(m_deinterlace_texture)); - m_current_deinterlace_buffer = 0; -} - -bool GPUBackend::Deinterlace(u32 field) -{ - GPUTexture* src = m_display_texture; - const u32 x = m_display_texture_view_x; - const u32 y = m_display_texture_view_y; - const u32 width = m_display_texture_view_width; - const u32 height = m_display_texture_view_height; - - const auto copy_to_field_buffer = [&](u32 buffer) { - if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[buffer], width, height, GPUTexture::Type::Texture, - src->GetFormat(), GPUTexture::Flags::None, false)) [[unlikely]] - { - return false; - } - - GL_OBJECT_NAME_FMT(m_deinterlace_buffers[buffer], "Blend Deinterlace Buffer {}", buffer); - - GL_INS_FMT("Copy {}x{} from {},{} to field buffer {}", width, height, x, y, buffer); - g_gpu_device->CopyTextureRegion(m_deinterlace_buffers[buffer].get(), 0, 0, 0, 0, m_display_texture, x, y, 0, 0, - width, height); - return true; - }; - - src->MakeReadyForSampling(); - - switch (g_gpu_settings.display_deinterlacing_mode) - { - case DisplayDeinterlacingMode::Disabled: - { - GL_INS("Deinterlacing disabled, displaying field texture"); - return true; - } - - case DisplayDeinterlacingMode::Weave: - { - GL_SCOPE_FMT("DeinterlaceWeave({{{},{}}}, {}x{}, field={})", x, y, width, height, field); - - const u32 full_height = height * 2; - if (!DeinterlaceSetTargetSize(width, full_height, true)) [[unlikely]] - { - ClearDisplayTexture(); - return false; - } - - src->MakeReadyForSampling(); - - g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); - g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); - const u32 uniforms[4] = {x, y, field, 0}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); - g_gpu_device->Draw(3, 0); - - m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); - return true; - } - - case DisplayDeinterlacingMode::Blend: - { - constexpr u32 NUM_BLEND_BUFFERS = 2; - - GL_SCOPE_FMT("DeinterlaceBlend({{{},{}}}, {}x{}, field={})", x, y, width, height, field); - - const u32 this_buffer = m_current_deinterlace_buffer; - m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % NUM_BLEND_BUFFERS; - GL_INS_FMT("Current buffer: {}", this_buffer); - if (!DeinterlaceSetTargetSize(width, height, false) || !copy_to_field_buffer(this_buffer)) [[unlikely]] - { - ClearDisplayTexture(); - return false; - } - - copy_to_field_buffer(this_buffer); - - // TODO: could be implemented with alpha blending instead.. - g_gpu_device->InvalidateRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); - g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % NUM_BLEND_BUFFERS].get(), - g_gpu_device->GetNearestSampler()); - g_gpu_device->SetViewportAndScissor(0, 0, width, height); - g_gpu_device->Draw(3, 0); - - m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, height); - return true; - } - - case DisplayDeinterlacingMode::Adaptive: - { - GL_SCOPE_FMT("DeinterlaceAdaptive({{{},{}}}, {}x{}, field={})", x, y, width, height, field); - - const u32 this_buffer = m_current_deinterlace_buffer; - const u32 full_height = height * 2; - m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % DEINTERLACE_BUFFER_COUNT; - GL_INS_FMT("Current buffer: {}", this_buffer); - if (!DeinterlaceSetTargetSize(width, full_height, false) || !copy_to_field_buffer(this_buffer)) [[unlikely]] - { - ClearDisplayTexture(); - return false; - } - - g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); - g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % DEINTERLACE_BUFFER_COUNT].get(), - g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(2, m_deinterlace_buffers[(this_buffer - 2) % DEINTERLACE_BUFFER_COUNT].get(), - g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(3, m_deinterlace_buffers[(this_buffer - 3) % DEINTERLACE_BUFFER_COUNT].get(), - g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {field, full_height}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); - g_gpu_device->Draw(3, 0); - - m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); - return true; - } - - default: - UnreachableCode(); - } -} - -bool GPUBackend::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve) -{ - if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, GPUTexture::Flags::None, preserve)) [[unlikely]] - { - return false; - } - - GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture"); - return true; -} - -bool GPUBackend::ApplyChromaSmoothing() -{ - const u32 x = m_display_texture_view_x; - const u32 y = m_display_texture_view_y; - const u32 width = m_display_texture_view_width; - const u32 height = m_display_texture_view_height; - if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, GPUTexture::Flags::None, false)) - { - ClearDisplayTexture(); - return false; - } - - GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture"); - - GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height); - - m_display_texture->MakeReadyForSampling(); - g_gpu_device->InvalidateRenderTarget(m_chroma_smoothing_texture.get()); - g_gpu_device->SetRenderTarget(m_chroma_smoothing_texture.get()); - g_gpu_device->SetPipeline(m_chroma_smoothing_pipeline.get()); - g_gpu_device->SetTextureSampler(0, m_display_texture, g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {x, y, width - 1, height - 1}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, height); - g_gpu_device->Draw(3, 0); - - m_chroma_smoothing_texture->MakeReadyForSampling(); - SetDisplayTexture(m_chroma_smoothing_texture.get(), m_display_depth_buffer, 0, 0, width, height); - return true; -} - -void GPUBackend::SetScreenQuadInputLayout(GPUPipeline::GraphicsConfig& config) -{ - static constexpr GPUPipeline::VertexAttribute screen_vertex_attributes[] = { - GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0, - GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, x)), - GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0, - GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, u)), - }; - - // common state - config.input_layout.vertex_attributes = screen_vertex_attributes; - config.input_layout.vertex_stride = sizeof(ScreenVertex); - config.primitive = GPUPipeline::Primitive::TriangleStrips; -} - -GSVector4 GPUBackend::GetScreenQuadClipSpaceCoordinates(const GSVector4i bounds, const GSVector2i rt_size) -{ - const GSVector4 fboundsxxyy = GSVector4(bounds.xzyw()); - const GSVector2 fsize = GSVector2(rt_size); - const GSVector2 x = ((fboundsxxyy.xy() * GSVector2::cxpr(2.0f)) / fsize.xx()) - GSVector2::cxpr(1.0f); - const GSVector2 y = GSVector2::cxpr(1.0f) - (GSVector2::cxpr(2.0f) * (fboundsxxyy.zw() / fsize.yy())); - return GSVector4::xyxy(x, y).xzyw(); -} - -void GPUBackend::DrawScreenQuad(const GSVector4i bounds, const GSVector2i rt_size, - const GSVector4 uv_bounds /* = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f) */) -{ - const GSVector4 xy = GetScreenQuadClipSpaceCoordinates(bounds, rt_size); - - ScreenVertex* vertices; - u32 space; - u32 base_vertex; - g_gpu_device->MapVertexBuffer(sizeof(ScreenVertex), 4, reinterpret_cast(&vertices), &space, &base_vertex); - - vertices[0].Set(xy.xy(), uv_bounds.xy()); - vertices[1].Set(xy.zyzw().xy(), uv_bounds.zyzw().xy()); - vertices[2].Set(xy.xwzw().xy(), uv_bounds.xwzw().xy()); - vertices[3].Set(xy.zw(), uv_bounds.zw()); - - g_gpu_device->UnmapVertexBuffer(sizeof(ScreenVertex), 4); - g_gpu_device->Draw(4, base_vertex); -} - -void GPUBackend::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, - GSVector4i* display_rect, GSVector4i* draw_rect) const -{ - const bool integer_scale = (g_gpu_settings.display_scaling == DisplayScalingMode::NearestInteger || - g_gpu_settings.display_scaling == DisplayScalingMode::BilinearInteger); - const bool show_vram = g_gpu_settings.gpu_show_vram; - const u32 display_width = show_vram ? VRAM_WIDTH : m_display_width; - const u32 display_height = show_vram ? VRAM_HEIGHT : m_display_height; - const s32 display_origin_left = show_vram ? 0 : m_display_origin_left; - const s32 display_origin_top = show_vram ? 0 : m_display_origin_top; - const u32 display_vram_width = show_vram ? VRAM_WIDTH : m_display_vram_width; - const u32 display_vram_height = show_vram ? VRAM_HEIGHT : m_display_vram_height; - const float display_pixel_aspect_ratio = show_vram ? 1.0f : m_display_pixel_aspect_ratio; - GPU::CalculateDrawRect(window_width, window_height, display_width, display_height, display_origin_left, - display_origin_top, display_vram_width, display_vram_height, g_gpu_settings.display_rotation, - g_gpu_settings.display_alignment, display_pixel_aspect_ratio, - g_gpu_settings.display_stretch_vertically, integer_scale, display_rect, draw_rect); -} - -bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string path, FileSystem::ManagedCFilePtr fp, u8 quality, - bool clear_alpha, bool flip_y, Image image, std::string osd_key) -{ - - Error error; - - if (flip_y) - image.FlipY(); - - if (image.GetFormat() != ImageFormat::RGBA8) - { - std::optional convert_image = image.ConvertToRGBA8(&error); - if (!convert_image.has_value()) - { - ERROR_LOG("Failed to convert {} screenshot to RGBA8: {}", Image::GetFormatName(image.GetFormat()), - error.GetDescription()); - image.Invalidate(); - } - else - { - image = std::move(convert_image.value()); - } - } - - bool result = false; - if (image.IsValid()) - { - if (clear_alpha) - image.SetAllPixelsOpaque(); - - result = image.SaveToFile(path.c_str(), fp.get(), quality, &error); - if (!result) - ERROR_LOG("Failed to save screenshot to '{}': '{}'", Path::GetFileName(path), error.GetDescription()); - } - - if (!osd_key.empty()) - { - Host::AddIconOSDMessage(std::move(osd_key), ICON_EMOJI_CAMERA, - fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") : - TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."), - Path::GetFileName(path), - result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION)); - } - - return result; -} - -bool GPUBackend::WriteDisplayTextureToFile(std::string filename) -{ - if (!m_display_texture) - return false; - - const u32 read_x = static_cast(m_display_texture_view_x); - const u32 read_y = static_cast(m_display_texture_view_y); - const u32 read_width = static_cast(m_display_texture_view_width); - const u32 read_height = static_cast(m_display_texture_view_height); - const ImageFormat read_format = GPUTexture::GetImageFormatForTextureFormat(m_display_texture->GetFormat()); - if (read_format == ImageFormat::None) - return false; - - Image image(read_width, read_height, read_format); - std::unique_ptr dltex; - if (g_gpu_device->GetFeatures().memory_import) - { - dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), - image.GetPixels(), image.GetStorageSize(), image.GetPitch()); - } - if (!dltex) - { - if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat()))) - { - ERROR_LOG("Failed to create {}x{} {} download texture", read_width, read_height, - GPUTexture::GetFormatName(m_display_texture->GetFormat())); - return false; - } - } - - dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported()); - if (!dltex->ReadTexels(0, 0, read_width, read_height, image.GetPixels(), image.GetPitch())) - { - RestoreDeviceContext(); - return false; - } RestoreDeviceContext(); - - Error error; - auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error); - if (!fp) - { - ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription()); - return false; - } - - constexpr bool clear_alpha = true; - const bool flip_y = g_gpu_device->UsesLowerLeftOrigin(); - - return CompressAndWriteTextureToFile(read_width, read_height, std::move(filename), std::move(fp), - g_gpu_settings.display_screenshot_quality, clear_alpha, flip_y, std::move(image), - std::string()); -} - -bool GPUBackend::RenderScreenshotToBuffer(u32 width, u32 height, bool postfx, Image* out_image) -{ - bool result; - GPUThread::RunOnBackend( - [width, height, postfx, out_image, &result](GPUBackend* backend) { - if (!backend) - return; - - GSVector4i draw_rect, display_rect; - backend->CalculateDrawRect(static_cast(width), static_cast(height), true, true, &display_rect, - &draw_rect); - - // Crop it. - const u32 cropped_width = static_cast(display_rect.width()); - const u32 cropped_height = static_cast(display_rect.height()); - draw_rect = draw_rect.sub32(display_rect.xyxy()); - display_rect = display_rect.sub32(display_rect.xyxy()); - result = - backend->RenderScreenshotToBuffer(cropped_width, cropped_height, display_rect, draw_rect, postfx, out_image); - }, - true, false); - - return result; -} - -bool GPUBackend::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, - const GSVector4i draw_rect, bool postfx, Image* out_image) -{ - const GPUTexture::Format hdformat = - g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : GPUTexture::Format::RGBA8; - const ImageFormat image_format = GPUTexture::GetImageFormatForTextureFormat(hdformat); - if (image_format == ImageFormat::None) - return false; - - auto render_texture = g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, - hdformat, GPUTexture::Flags::None); - if (!render_texture) - return false; - - g_gpu_device->ClearRenderTarget(render_texture.get(), GPUDevice::DEFAULT_CLEAR_COLOR); - - // TODO: this should use copy shader instead. - RenderDisplay(render_texture.get(), display_rect, draw_rect, postfx); - - Image image(width, height, image_format); - - Error error; - std::unique_ptr dltex; - if (g_gpu_device->GetFeatures().memory_import) - { - dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, image.GetPixels(), image.GetStorageSize(), - image.GetPitch(), &error); - } - if (!dltex) - { - if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, &error))) - { - ERROR_LOG("Failed to create {}x{} download texture: {}", width, height, error.GetDescription()); - return false; - } - } - - dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false); - if (!dltex->ReadTexels(0, 0, width, height, image.GetPixels(), image.GetPitch())) - { - RestoreDeviceContext(); - return false; - } - - RestoreDeviceContext(); - *out_image = std::move(image); - return true; -} - -void GPUBackend::CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, - GSVector4i* draw_rect) const -{ - const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution || g_gpu_settings.gpu_show_vram); - if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) - { - if (mode == DisplayScreenshotMode::InternalResolution) - { - float f_width = static_cast(m_display_texture_view_width); - float f_height = static_cast(m_display_texture_view_height); - if (!g_gpu_settings.gpu_show_vram) - GPU::ApplyPixelAspectRatioToSize(m_display_pixel_aspect_ratio, &f_width, &f_height); - - // DX11 won't go past 16K texture size. - const float max_texture_size = static_cast(g_gpu_device->GetMaxTextureSize()); - if (f_width > max_texture_size) - { - f_height = f_height / (f_width / max_texture_size); - f_width = max_texture_size; - } - if (f_height > max_texture_size) - { - f_height = max_texture_size; - f_width = f_width / (f_height / max_texture_size); - } - - *width = static_cast(std::ceil(f_width)); - *height = static_cast(std::ceil(f_height)); - } - else // if (mode == DisplayScreenshotMode::UncorrectedInternalResolution) - { - *width = m_display_texture_view_width; - *height = m_display_texture_view_height; - } - - // Remove padding, it's not part of the framebuffer. - *draw_rect = GSVector4i(0, 0, static_cast(*width), static_cast(*height)); - *display_rect = *draw_rect; - } - else - { - *width = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetWidth() : 1; - *height = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetHeight() : 1; - CalculateDrawRect(*width, *height, true, !g_settings.gpu_show_vram, display_rect, draw_rect); - } -} - -void GPUBackend::RenderScreenshotToFile(const std::string_view path, DisplayScreenshotMode mode, u8 quality, - bool compress_on_thread, bool show_osd_message) -{ - GPUThread::RunOnBackend( - [path = std::string(path), mode, quality, compress_on_thread, show_osd_message](GPUBackend* backend) mutable { - if (!backend) - return; - - u32 width, height; - GSVector4i display_rect, draw_rect; - backend->CalculateScreenshotSize(mode, &width, &height, &display_rect, &draw_rect); - - const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution); - if (width == 0 || height == 0) - return; - - Image image; - if (!backend->RenderScreenshotToBuffer(width, height, display_rect, draw_rect, !internal_resolution, &image)) - { - ERROR_LOG("Failed to render {}x{} screenshot", width, height); - return; - } - - Error error; - auto fp = FileSystem::OpenManagedCFile(path.c_str(), "wb", &error); - if (!fp) - { - ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(path), error.GetDescription()); - return; - } - - std::string osd_key; - if (show_osd_message) - { - // Use a 60 second timeout to give it plenty of time to actually save. - osd_key = fmt::format("ScreenshotSaver_{}", path); - Host::AddIconOSDMessage(osd_key, ICON_EMOJI_CAMERA_WITH_FLASH, - fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(path)), - 60.0f); - } - - if (compress_on_thread) - { - System::QueueAsyncTask([width, height, path = std::move(path), fp = fp.release(), quality, - flip_y = g_gpu_device->UsesLowerLeftOrigin(), image = std::move(image), - osd_key = std::move(osd_key)]() mutable { - CompressAndWriteTextureToFile(width, height, std::move(path), FileSystem::ManagedCFilePtr(fp), quality, true, - flip_y, std::move(image), std::move(osd_key)); - }); - } - else - { - CompressAndWriteTextureToFile(width, height, std::move(path), std::move(fp), quality, true, - g_gpu_device->UsesLowerLeftOrigin(), std::move(image), std::move(osd_key)); - } - }, - false, false); } void GPUBackend::GetStatsString(SmallStringBase& str) const @@ -1626,3 +632,124 @@ void GPUBackend::UpdateStatistics(u32 frame_count) ResetStatistics(); } + +bool GPUBackend::RenderScreenshotToBuffer(u32 width, u32 height, bool postfx, Image* out_image) +{ + bool result; + GPUThread::RunOnBackend( + [width, height, postfx, out_image, &result](GPUBackend* backend) { + if (!backend) + return; + + GSVector4i draw_rect, display_rect; + backend->m_presenter.CalculateDrawRect(static_cast(width), static_cast(height), true, true, + &display_rect, &draw_rect); + + // Crop it. + const u32 cropped_width = static_cast(display_rect.width()); + const u32 cropped_height = static_cast(display_rect.height()); + draw_rect = draw_rect.sub32(display_rect.xyxy()); + display_rect = display_rect.sub32(display_rect.xyxy()); + result = backend->m_presenter.RenderScreenshotToBuffer(cropped_width, cropped_height, display_rect, draw_rect, + postfx, out_image); + backend->RestoreDeviceContext(); + }, + true, false); + + return result; +} + +void GPUBackend::RenderScreenshotToFile(const std::string_view path, DisplayScreenshotMode mode, u8 quality, + bool show_osd_message) +{ + GPUThread::RunOnBackend( + [path = std::string(path), mode, quality, show_osd_message](GPUBackend* backend) mutable { + if (!backend) + return; + + u32 width, height; + GSVector4i display_rect, draw_rect; + backend->m_presenter.CalculateScreenshotSize(mode, &width, &height, &display_rect, &draw_rect); + + const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution); + if (width == 0 || height == 0) + return; + + Image image; + if (!backend->m_presenter.RenderScreenshotToBuffer(width, height, display_rect, draw_rect, !internal_resolution, + &image)) + { + ERROR_LOG("Failed to render {}x{} screenshot", width, height); + backend->RestoreDeviceContext(); + return; + } + + // no more GPU calls + backend->RestoreDeviceContext(); + + Error error; + auto fp = FileSystem::OpenManagedCFile(path.c_str(), "wb", &error); + if (!fp) + { + ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(path), error.GetDescription()); + return; + } + + std::string osd_key; + if (show_osd_message) + { + // Use a 60 second timeout to give it plenty of time to actually save. + osd_key = fmt::format("ScreenshotSaver_{}", path); + Host::AddIconOSDMessage(osd_key, ICON_EMOJI_CAMERA_WITH_FLASH, + fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(path)), + 60.0f); + } + + System::QueueAsyncTask([path = std::move(path), fp = fp.release(), quality, + flip_y = g_gpu_device->UsesLowerLeftOrigin(), image = std::move(image), + osd_key = std::move(osd_key)]() mutable { + Error error; + + if (flip_y) + image.FlipY(); + + if (image.GetFormat() != ImageFormat::RGBA8) + { + std::optional convert_image = image.ConvertToRGBA8(&error); + if (!convert_image.has_value()) + { + ERROR_LOG("Failed to convert {} screenshot to RGBA8: {}", Image::GetFormatName(image.GetFormat()), + error.GetDescription()); + image.Invalidate(); + } + else + { + image = std::move(convert_image.value()); + } + } + + bool result = false; + if (image.IsValid()) + { + image.SetAllPixelsOpaque(); + + result = image.SaveToFile(path.c_str(), fp, quality, &error); + if (!result) + ERROR_LOG("Failed to save screenshot to '{}': '{}'", Path::GetFileName(path), error.GetDescription()); + } + + if (!osd_key.empty()) + { + Host::AddIconOSDMessage(std::move(osd_key), ICON_EMOJI_CAMERA, + fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") : + TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."), + Path::GetFileName(path), + result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION)); + } + + std::fclose(fp); + return result; + }); + }, + false, false); +} diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h index 9ed39bdd3..a3fee89c3 100644 --- a/src/core/gpu_backend.h +++ b/src/core/gpu_backend.h @@ -1,20 +1,13 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #pragma once -#include "gpu_thread_commands.h" - #include "util/gpu_device.h" -#include "common/heap_array.h" -#include "common/threading.h" +#include "gpu_thread_commands.h" -#include -#include #include -#include -#include class Error; class SmallStringBase; @@ -25,6 +18,8 @@ class GPUPipeline; struct GPUSettings; class StateWrapper; +class GPUPresenter; + namespace System { struct MemorySaveState; } @@ -59,12 +54,12 @@ public: static bool IsUsingHardwareBackend(); - static std::unique_ptr CreateHardwareBackend(); - static std::unique_ptr CreateSoftwareBackend(); + static std::unique_ptr CreateHardwareBackend(GPUPresenter& presenter); + static std::unique_ptr CreateSoftwareBackend(GPUPresenter& presenter); static bool RenderScreenshotToBuffer(u32 width, u32 height, bool postfx, Image* out_image); static void RenderScreenshotToFile(const std::string_view path, DisplayScreenshotMode mode, u8 quality, - bool compress_on_thread, bool show_osd_message); + bool show_osd_message); static bool BeginQueueFrame(); static void WaitForOneQueuedFrame(); @@ -73,9 +68,11 @@ public: static bool AllocateMemorySaveStates(std::span states, Error* error); public: - GPUBackend(); + GPUBackend(GPUPresenter& presenter); virtual ~GPUBackend(); + ALWAYS_INLINE const GPUPresenter& GetPresenter() const { return m_presenter; } + virtual bool Initialize(bool upload_vram, Error* error); virtual void UpdateSettings(const GPUSettings& old_settings); @@ -96,28 +93,13 @@ public: /// Main command handler for GPU thread. void HandleCommand(const GPUThreadCommand* cmd); - /// Draws the current display texture, with any post-processing. - GPUDevice::PresentResult PresentDisplay(); - - /// Helper function to save current display texture to PNG. Used for regtest. - bool WriteDisplayTextureToFile(std::string filename); - - /// Helper function for computing screenshot bounds. - void CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, - GSVector4i* draw_rect) const; - void GetStatsString(SmallStringBase& str) const; void GetMemoryStatsString(SmallStringBase& str) const; void ResetStatistics(); void UpdateStatistics(u32 frame_count); -protected: - enum : u32 - { - DEINTERLACE_BUFFER_COUNT = 4, - }; - + /// Screen-aligned vertex type for various draw types. struct ScreenVertex { float x; @@ -131,6 +113,18 @@ protected: } }; + static void SetScreenQuadInputLayout(GPUPipeline::GraphicsConfig& config); + static GSVector4 GetScreenQuadClipSpaceCoordinates(const GSVector4i bounds, const GSVector2i rt_size); + + static void DrawScreenQuad(const GSVector4i bounds, const GSVector2i rt_size, + const GSVector4 uv_bounds = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f)); + +protected: + enum : u32 + { + DEINTERLACE_BUFFER_COUNT = 4, + }; + virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height) = 0; virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering, u8 interlaced_display_field) = 0; @@ -156,67 +150,14 @@ protected: virtual bool AllocateMemorySaveState(System::MemorySaveState& mss, Error* error) = 0; virtual void DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss) = 0; - static void SetScreenQuadInputLayout(GPUPipeline::GraphicsConfig& config); - static GSVector4 GetScreenQuadClipSpaceCoordinates(const GSVector4i bounds, const GSVector2i rt_size); - - void DrawScreenQuad(const GSVector4i bounds, const GSVector2i rt_size, - const GSVector4 uv_bounds = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f)); - - /// Helper function for computing the draw rectangle in a larger window. - void CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, - GSVector4i* display_rect, GSVector4i* draw_rect) const; - - /// Renders the display, optionally with postprocessing to the specified image. - bool RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect, - bool postfx, Image* out_image); - - bool CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing, Error* error); - void HandleUpdateDisplayCommand(const GPUBackendUpdateDisplayCommand* cmd); void HandleSubmitFrameCommand(const GPUBackendFramePresentationParameters* cmd); - void ClearDisplay(); - void ClearDisplayTexture(); - void SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, s32 view_width, - s32 view_height); - - GPUDevice::PresentResult RenderDisplay(GPUTexture* target, const GSVector4i display_rect, const GSVector4i draw_rect, - bool postfx); - - /// Sends the current frame to media capture. - void SendDisplayToMediaCapture(MediaCapture* cap); - - bool Deinterlace(u32 field); - bool DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve); - void DestroyDeinterlaceTextures(); - bool ApplyChromaSmoothing(); - - s32 m_display_width = 0; - s32 m_display_height = 0; - + GPUPresenter& m_presenter; GSVector4i m_clamped_drawing_area = {}; - s32 m_display_origin_left = 0; - s32 m_display_origin_top = 0; - s32 m_display_vram_width = 0; - s32 m_display_vram_height = 0; - float m_display_pixel_aspect_ratio = 1.0f; - - u32 m_current_deinterlace_buffer = 0; - std::unique_ptr m_deinterlace_pipeline; - std::array, DEINTERLACE_BUFFER_COUNT> m_deinterlace_buffers; - std::unique_ptr m_deinterlace_texture; - - std::unique_ptr m_chroma_smoothing_pipeline; - std::unique_ptr m_chroma_smoothing_texture; - - std::unique_ptr m_display_pipeline; - GPUTexture* m_display_texture = nullptr; - GPUTexture* m_display_depth_buffer = nullptr; - s32 m_display_texture_view_x = 0; - s32 m_display_texture_view_y = 0; - s32 m_display_texture_view_width = 0; - s32 m_display_texture_view_height = 0; +private: + static void ReleaseQueuedFrame(); }; namespace Host { diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 51b20c8a0..78d84c900 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -6,6 +6,7 @@ #include "cpu_pgxp.h" #include "gpu.h" #include "gpu_hw_shadergen.h" +#include "gpu_presenter.h" #include "gpu_sw_rasterizer.h" #include "host.h" #include "imgui_overlays.h" @@ -201,7 +202,7 @@ private: }; } // namespace -GPU_HW::GPU_HW() : GPUBackend() +GPU_HW::GPU_HW(GPUPresenter& presenter) : GPUBackend(presenter) { #if defined(_DEBUG) || defined(_DEVEL) s_draw_number = 0; @@ -438,6 +439,8 @@ void GPU_HW::UpdateSettings(const GPUSettings& old_settings) { GPUBackend::UpdateSettings(old_settings); + FlushRender(); + const GPUDevice::Features features = g_gpu_device->GetFeatures(); const u8 resolution_scale = Truncate8(CalculateResolutionScale()); @@ -480,8 +483,8 @@ void GPU_HW::UpdateSettings(const GPUSettings& old_settings) { Host::AddIconOSDMessage("ResolutionScaleChanged", ICON_FA_PAINT_BRUSH, fmt::format(TRANSLATE_FS("GPU_HW", "Internal resolution set to {0}x ({1}x{2})."), - resolution_scale, m_display_width * resolution_scale, - resolution_scale * m_display_height), + resolution_scale, m_presenter.GetDisplayWidth() * resolution_scale, + m_presenter.GetDisplayHeight() * resolution_scale), Host::OSD_INFO_DURATION); } @@ -732,8 +735,9 @@ u32 GPU_HW::CalculateResolutionScale() const else { // Auto scaling. - if (m_display_width == 0 || m_display_height == 0 || m_display_vram_width == 0 || m_display_vram_height == 0 || - !m_display_texture || !g_gpu_device->HasMainSwapChain()) + if (m_presenter.GetDisplayWidth() == 0 || m_presenter.GetDisplayHeight() == 0 || + m_presenter.GetDisplayVRAMWidth() == 0 || m_presenter.GetDisplayVRAMHeight() == 0 || + !m_presenter.HasDisplayTexture() || !g_gpu_device->HasMainSwapChain()) { // When the system is starting and all borders crop is enabled, the registers are zero, and // display_height therefore is also zero. Keep the existing resolution until it updates. @@ -742,18 +746,19 @@ u32 GPU_HW::CalculateResolutionScale() const else { GSVector4i display_rect, draw_rect; - CalculateDrawRect(g_gpu_device->GetMainSwapChain()->GetWidth(), g_gpu_device->GetMainSwapChain()->GetHeight(), - true, true, &display_rect, &draw_rect); + m_presenter.CalculateDrawRect(g_gpu_device->GetMainSwapChain()->GetWidth(), + g_gpu_device->GetMainSwapChain()->GetHeight(), true, true, &display_rect, + &draw_rect); // We use the draw rect to determine scaling. This way we match the resolution as best we can, regardless of the // anamorphic aspect ratio. const s32 draw_width = draw_rect.width(); const s32 draw_height = draw_rect.height(); scale = static_cast( - std::ceil(std::max(static_cast(draw_width) / static_cast(m_display_vram_width), - static_cast(draw_height) / static_cast(m_display_vram_height)))); + std::ceil(std::max(static_cast(draw_width) / static_cast(m_presenter.GetDisplayVRAMWidth()), + static_cast(draw_height) / static_cast(m_presenter.GetDisplayVRAMHeight())))); VERBOSE_LOG("Draw Size = {}x{}, VRAM Size = {}x{}, Preferred Scale = {}", draw_width, draw_height, - m_display_vram_width, m_display_vram_height, scale); + m_presenter.GetDisplayVRAMWidth(), m_presenter.GetDisplayVRAMHeight(), scale); } } @@ -1028,7 +1033,7 @@ void GPU_HW::DeactivateROV() void GPU_HW::DestroyBuffers() { - ClearDisplayTexture(); + m_presenter.ClearDisplayTexture(); DebugAssert((m_batch_vertex_ptr != nullptr) == (m_batch_index_ptr != nullptr)); if (m_batch_vertex_ptr) @@ -3846,12 +3851,13 @@ void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) if (IsUsingMultisampling()) { UpdateVRAMReadTexture(!m_vram_dirty_draw_rect.eq(INVALID_RECT), !m_vram_dirty_write_rect.eq(INVALID_RECT)); - SetDisplayTexture(m_vram_read_texture.get(), nullptr, 0, 0, m_vram_read_texture->GetWidth(), - m_vram_read_texture->GetHeight()); + m_presenter.SetDisplayTexture(m_vram_read_texture.get(), nullptr, 0, 0, m_vram_read_texture->GetWidth(), + m_vram_read_texture->GetHeight()); } else { - SetDisplayTexture(m_vram_texture.get(), nullptr, 0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + m_presenter.SetDisplayTexture(m_vram_texture.get(), nullptr, 0, 0, m_vram_texture->GetWidth(), + m_vram_texture->GetHeight()); } return; @@ -3875,7 +3881,7 @@ void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) if (cmd->display_disabled) { - ClearDisplayTexture(); + m_presenter.ClearDisplayTexture(); return; } else if (!cmd->display_24bit && line_skip == 0 && !IsUsingMultisampling() && @@ -3883,15 +3889,15 @@ void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight() && !PostProcessing::InternalChain.IsActive()) { - SetDisplayTexture(m_vram_texture.get(), depth_source, scaled_vram_offset_x, scaled_vram_offset_y, - scaled_display_width, scaled_display_height); + m_presenter.SetDisplayTexture(m_vram_texture.get(), depth_source, scaled_vram_offset_x, scaled_vram_offset_y, + scaled_display_width, scaled_display_height); // Fast path if no copies are needed. if (interlaced) { GL_INS("Deinterlace fast path"); drew_anything = true; - Deinterlace(interlaced_field); + m_presenter.Deinterlace(interlaced_field); } else { @@ -3904,7 +3910,7 @@ void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8, GPUTexture::Flags::None)) [[unlikely]] { - ClearDisplayTexture(); + m_presenter.ClearDisplayTexture(); return; } @@ -3964,26 +3970,27 @@ void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) drew_anything = true; - SetDisplayTexture(m_vram_extract_texture.get(), depth_source ? m_vram_extract_depth_texture.get() : nullptr, 0, 0, - scaled_display_width, scaled_display_height); + m_presenter.SetDisplayTexture(m_vram_extract_texture.get(), + depth_source ? m_vram_extract_depth_texture.get() : nullptr, 0, 0, + scaled_display_width, scaled_display_height); if (g_settings.display_24bit_chroma_smoothing) { - if (ApplyChromaSmoothing()) + if (m_presenter.ApplyChromaSmoothing()) { if (interlaced) - Deinterlace(interlaced_field); + m_presenter.Deinterlace(interlaced_field); } } else { if (interlaced) - Deinterlace(interlaced_field); + m_presenter.Deinterlace(interlaced_field); } } if (m_downsample_mode != GPUDownsampleMode::Disabled && !cmd->display_24bit) { - DebugAssert(m_display_texture); + DebugAssert(m_presenter.HasDisplayTexture()); DownsampleFramebuffer(); } @@ -4024,11 +4031,11 @@ void GPU_HW::OnBufferSwapped() void GPU_HW::DownsampleFramebuffer() { - GPUTexture* source = m_display_texture; - const u32 left = m_display_texture_view_x; - const u32 top = m_display_texture_view_y; - const u32 width = m_display_texture_view_width; - const u32 height = m_display_texture_view_height; + GPUTexture* source = m_presenter.GetDisplayTexture(); + const u32 left = m_presenter.GetDisplayTextureViewX(); + const u32 top = m_presenter.GetDisplayTextureViewY(); + const u32 width = m_presenter.GetDisplayTextureViewWidth(); + const u32 height = m_presenter.GetDisplayTextureViewHeight(); if (m_downsample_mode == GPUDownsampleMode::Adaptive) DownsampleFramebufferAdaptive(source, left, top, width, height); @@ -4153,7 +4160,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top RestoreDeviceContext(); - SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, width, height); + m_presenter.SetDisplayTexture(m_downsample_texture.get(), m_presenter.GetDisplayDepthBuffer(), 0, 0, width, height); } void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) @@ -4185,10 +4192,11 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to RestoreDeviceContext(); - SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, ds_width, ds_height); + m_presenter.SetDisplayTexture(m_downsample_texture.get(), m_presenter.GetDisplayDepthBuffer(), 0, 0, ds_width, + ds_height); } -std::unique_ptr GPUBackend::CreateHardwareBackend() +std::unique_ptr GPUBackend::CreateHardwareBackend(GPUPresenter& presenter) { - return std::make_unique(); + return std::make_unique(presenter); } diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 1e998d56d..302bcf8e4 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -57,7 +57,7 @@ public: GSVector4i::cxpr(std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::min(), std::numeric_limits::min()); - GPU_HW(); + GPU_HW(GPUPresenter& presenter); ~GPU_HW() override; bool Initialize(bool upload_vram, Error* error) override; diff --git a/src/core/gpu_presenter.cpp b/src/core/gpu_presenter.cpp new file mode 100644 index 000000000..2fea1c54d --- /dev/null +++ b/src/core/gpu_presenter.cpp @@ -0,0 +1,943 @@ +// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +#include "gpu_presenter.h" +#include "fullscreen_ui.h" +#include "gpu.h" +#include "gpu_backend.h" +#include "gpu_shadergen.h" +#include "gpu_thread.h" +#include "gpu_thread_commands.h" +#include "host.h" +#include "imgui_overlays.h" +#include "performance_counters.h" +#include "save_state_version.h" +#include "settings.h" +#include "system.h" + +#include "util/gpu_device.h" +#include "util/image.h" +#include "util/imgui_fullscreen.h" +#include "util/imgui_manager.h" +#include "util/media_capture.h" +#include "util/postprocessing.h" +#include "util/state_wrapper.h" + +#include "common/align.h" +#include "common/error.h" +#include "common/file_system.h" +#include "common/gsvector_formatter.h" +#include "common/log.h" +#include "common/path.h" +#include "common/small_string.h" +#include "common/string_util.h" +#include "common/threading.h" +#include "common/timer.h" + +#include + +LOG_CHANNEL(GPU); + +static constexpr GPUTexture::Format DISPLAY_INTERNAL_POSTFX_FORMAT = GPUTexture::Format::RGBA8; + +GPUPresenter::GPUPresenter() = default; + +GPUPresenter::~GPUPresenter() +{ + DestroyDeinterlaceTextures(); + g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); +} + +bool GPUPresenter::Initialize(Error* error) +{ + if (!CompileDisplayPipelines(true, true, g_gpu_settings.display_24bit_chroma_smoothing, error)) + return false; + + return true; +} + +void GPUPresenter::UpdateSettings(const GPUSettings& old_settings) +{ + if (g_gpu_settings.display_scaling != old_settings.display_scaling || + g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || + g_gpu_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing) + { + // Toss buffers on mode change. + if (g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode) + DestroyDeinterlaceTextures(); + + if (!CompileDisplayPipelines( + g_gpu_settings.display_scaling != old_settings.display_scaling, + g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode, + g_gpu_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing, nullptr)) + { + Panic("Failed to compile display pipeline on settings change."); + } + } +} + +bool GPUPresenter::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing, Error* error) +{ + const GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, + g_gpu_device->GetFeatures().framebuffer_fetch); + + GPUPipeline::GraphicsConfig plconfig; + plconfig.primitive = GPUPipeline::Primitive::Triangles; + plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); + plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.geometry_shader = nullptr; + plconfig.depth_format = GPUTexture::Format::Unknown; + plconfig.samples = 1; + plconfig.per_sample_shading = false; + plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; + + if (display) + { + GPUBackend::SetScreenQuadInputLayout(plconfig); + + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.SetTargetFormats(g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : + GPUTexture::Format::RGBA8); + + std::string vs = shadergen.GenerateDisplayVertexShader(); + std::string fs; + switch (g_gpu_settings.display_scaling) + { + case DisplayScalingMode::BilinearSharp: + fs = shadergen.GenerateDisplaySharpBilinearFragmentShader(); + break; + + case DisplayScalingMode::BilinearSmooth: + case DisplayScalingMode::BilinearInteger: + fs = shadergen.GenerateDisplayFragmentShader(true, false); + break; + + case DisplayScalingMode::Nearest: + case DisplayScalingMode::NearestInteger: + default: + fs = shadergen.GenerateDisplayFragmentShader(false, true); + break; + } + + std::unique_ptr vso = + g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs, error); + std::unique_ptr fso = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), fs, error); + if (!vso || !fso) + return false; + GL_OBJECT_NAME(vso, "Display Vertex Shader"); + GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", + Settings::GetDisplayScalingName(g_gpu_settings.display_scaling)); + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]", + Settings::GetDisplayScalingName(g_gpu_settings.display_scaling)); + } + + plconfig.input_layout = {}; + plconfig.primitive = GPUPipeline::Primitive::Triangles; + + if (deinterlace) + { + std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), + shadergen.GenerateScreenQuadVertexShader(), error); + if (!vso) + return false; + GL_OBJECT_NAME(vso, "Deinterlace Vertex Shader"); + + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); + + switch (g_gpu_settings.display_deinterlacing_mode) + { + case DisplayDeinterlacingMode::Disabled: + case DisplayDeinterlacingMode::Progressive: + break; + + case DisplayDeinterlacingMode::Weave: + { + std::unique_ptr fso = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateDeinterlaceWeaveFragmentShader(), error); + if (!fso) + return false; + + GL_OBJECT_NAME(fso, "Weave Deinterlace Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "Weave Deinterlace Pipeline"); + } + break; + + case DisplayDeinterlacingMode::Blend: + { + std::unique_ptr fso = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateDeinterlaceBlendFragmentShader(), error); + if (!fso) + return false; + + GL_OBJECT_NAME(fso, "Blend Deinterlace Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "Blend Deinterlace Pipeline"); + } + break; + + case DisplayDeinterlacingMode::Adaptive: + { + std::unique_ptr fso = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateFastMADReconstructFragmentShader(), error); + if (!fso) + return false; + + GL_OBJECT_NAME(fso, "FastMAD Reconstruct Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "FastMAD Reconstruct Pipeline"); + } + break; + + default: + UnreachableCode(); + } + } + + if (chroma_smoothing) + { + m_chroma_smoothing_pipeline.reset(); + g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); + + if (g_gpu_settings.display_24bit_chroma_smoothing) + { + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); + + std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), + shadergen.GenerateScreenQuadVertexShader(), error); + std::unique_ptr fso = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateChromaSmoothingFragmentShader(), error); + if (!vso || !fso) + return false; + GL_OBJECT_NAME(vso, "Chroma Smoothing Vertex Shader"); + GL_OBJECT_NAME(fso, "Chroma Smoothing Fragment Shader"); + + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_chroma_smoothing_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + GL_OBJECT_NAME(m_chroma_smoothing_pipeline, "Chroma Smoothing Pipeline"); + } + } + + return true; +} + +void GPUPresenter::ClearDisplay() +{ + ClearDisplayTexture(); + + // Just recycle the textures, it'll get re-fetched. + DestroyDeinterlaceTextures(); +} + +void GPUPresenter::ClearDisplayTexture() +{ + m_display_texture = nullptr; + m_display_texture_view_x = 0; + m_display_texture_view_y = 0; + m_display_texture_view_width = 0; + m_display_texture_view_height = 0; +} + +void GPUPresenter::SetDisplayParameters(u16 display_width, u16 display_height, u16 display_origin_left, + u16 display_origin_top, u16 display_vram_width, u16 display_vram_height, + float display_pixel_aspect_ratio) +{ + m_display_width = display_width; + m_display_height = display_height; + m_display_origin_left = display_origin_left; + m_display_origin_top = display_origin_top; + m_display_vram_width = display_vram_width; + m_display_vram_height = display_vram_height; + m_display_pixel_aspect_ratio = display_pixel_aspect_ratio; +} + +void GPUPresenter::SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, + s32 view_width, s32 view_height) +{ + DebugAssert(texture); + + if (g_gpu_settings.display_auto_resize_window && + (view_width != m_display_texture_view_width || view_height != m_display_texture_view_height)) + { + Host::RunOnCPUThread([]() { System::RequestDisplaySize(); }); + } + + m_display_texture = texture; + m_display_depth_buffer = depth_buffer; + m_display_texture_view_x = view_x; + m_display_texture_view_y = view_y; + m_display_texture_view_width = view_width; + m_display_texture_view_height = view_height; +} + +GPUDevice::PresentResult GPUPresenter::PresentDisplay() +{ + if (!g_gpu_device->HasMainSwapChain()) + return GPUDevice::PresentResult::SkipPresent; + + GSVector4i display_rect; + GSVector4i draw_rect; + CalculateDrawRect(g_gpu_device->GetMainSwapChain()->GetWidth(), g_gpu_device->GetMainSwapChain()->GetHeight(), + !g_gpu_settings.gpu_show_vram, true, &display_rect, &draw_rect); + return RenderDisplay(nullptr, display_rect, draw_rect, !g_gpu_settings.gpu_show_vram); +} + +GPUDevice::PresentResult GPUPresenter::RenderDisplay(GPUTexture* target, const GSVector4i display_rect, + const GSVector4i draw_rect, bool postfx) +{ + GL_SCOPE_FMT("RenderDisplay: {}", draw_rect); + + if (m_display_texture) + m_display_texture->MakeReadyForSampling(); + + // Internal post-processing. + GPUTexture* display_texture = m_display_texture; + s32 display_texture_view_x = m_display_texture_view_x; + s32 display_texture_view_y = m_display_texture_view_y; + s32 display_texture_view_width = m_display_texture_view_width; + s32 display_texture_view_height = m_display_texture_view_height; + if (postfx && display_texture && PostProcessing::InternalChain.IsActive() && + PostProcessing::InternalChain.CheckTargets(DISPLAY_INTERNAL_POSTFX_FORMAT, display_texture_view_width, + display_texture_view_height)) + { + DebugAssert(display_texture_view_x == 0 && display_texture_view_y == 0 && + static_cast(display_texture->GetWidth()) == display_texture_view_width && + static_cast(display_texture->GetHeight()) == display_texture_view_height); + + // Now we can apply the post chain. + GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture(); + if (PostProcessing::InternalChain.Apply(display_texture, m_display_depth_buffer, post_output_texture, + GSVector4i(0, 0, display_texture_view_width, display_texture_view_height), + display_texture_view_width, display_texture_view_height, m_display_width, + m_display_height) == GPUDevice::PresentResult::OK) + { + display_texture_view_x = 0; + display_texture_view_y = 0; + display_texture = post_output_texture; + display_texture->MakeReadyForSampling(); + } + } + + const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetMainSwapChain()->GetFormat(); + const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetMainSwapChain()->GetWidth(); + const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetMainSwapChain()->GetHeight(); + const bool really_postfx = (postfx && PostProcessing::DisplayChain.IsActive() && g_gpu_device->HasMainSwapChain() && + hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 && + PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height)); + const u32 real_target_width = + (target || really_postfx) ? target_width : g_gpu_device->GetMainSwapChain()->GetPostRotatedWidth(); + const u32 real_target_height = + (target || really_postfx) ? target_height : g_gpu_device->GetMainSwapChain()->GetPostRotatedHeight(); + GSVector4i real_draw_rect = + (target || really_postfx) ? draw_rect : g_gpu_device->GetMainSwapChain()->PreRotateClipRect(draw_rect); + if (really_postfx) + { + g_gpu_device->ClearRenderTarget(PostProcessing::DisplayChain.GetInputTexture(), GPUDevice::DEFAULT_CLEAR_COLOR); + g_gpu_device->SetRenderTarget(PostProcessing::DisplayChain.GetInputTexture()); + } + else + { + if (target) + { + g_gpu_device->SetRenderTarget(target); + } + else + { + const GPUDevice::PresentResult pres = g_gpu_device->BeginPresent(g_gpu_device->GetMainSwapChain()); + if (pres != GPUDevice::PresentResult::OK) + return pres; + } + } + + if (display_texture) + { + bool texture_filter_linear = false; + + struct alignas(16) Uniforms + { + float src_size[4]; + float clamp_rect[4]; + float params[4]; + } uniforms; + std::memset(uniforms.params, 0, sizeof(uniforms.params)); + + switch (g_gpu_settings.display_scaling) + { + case DisplayScalingMode::Nearest: + case DisplayScalingMode::NearestInteger: + break; + + case DisplayScalingMode::BilinearSmooth: + case DisplayScalingMode::BilinearInteger: + texture_filter_linear = true; + break; + + case DisplayScalingMode::BilinearSharp: + { + texture_filter_linear = true; + uniforms.params[0] = std::max( + std::floor(static_cast(draw_rect.width()) / static_cast(m_display_texture_view_width)), 1.0f); + uniforms.params[1] = std::max( + std::floor(static_cast(draw_rect.height()) / static_cast(m_display_texture_view_height)), 1.0f); + uniforms.params[2] = 0.5f - 0.5f / uniforms.params[0]; + uniforms.params[3] = 0.5f - 0.5f / uniforms.params[1]; + } + break; + + default: + UnreachableCode(); + break; + } + + g_gpu_device->SetPipeline(m_display_pipeline.get()); + g_gpu_device->SetTextureSampler( + 0, display_texture, texture_filter_linear ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler()); + + // For bilinear, clamp to 0.5/SIZE-0.5 to avoid bleeding from the adjacent texels in VRAM. This is because + // 1.0 in UV space is not the bottom-right texel, but a mix of the bottom-right and wrapped/next texel. + const GSVector2 display_texture_size = GSVector2(display_texture->GetSizeVec()); + const GSVector4 display_texture_size4 = GSVector4::xyxy(display_texture_size); + const GSVector4 uv_rect = GSVector4(GSVector4i(display_texture_view_x, display_texture_view_y, + display_texture_view_x + display_texture_view_width, + display_texture_view_y + display_texture_view_height)) / + display_texture_size4; + GSVector4::store(uniforms.clamp_rect, + GSVector4(static_cast(display_texture_view_x) + 0.5f, + static_cast(display_texture_view_y) + 0.5f, + static_cast(display_texture_view_x + display_texture_view_width) - 0.5f, + static_cast(display_texture_view_y + display_texture_view_height) - 0.5f) / + display_texture_size4); + GSVector4::store(uniforms.src_size, + GSVector4::xyxy(display_texture_size, GSVector2::cxpr(1.0f) / display_texture_size)); + + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); + + g_gpu_device->SetViewport(0, 0, real_target_width, real_target_height); + g_gpu_device->SetScissor(g_gpu_device->UsesLowerLeftOrigin() ? + GPUDevice::FlipToLowerLeft(real_draw_rect, real_target_height) : + real_draw_rect); + + GPUBackend::ScreenVertex* vertices; + u32 space; + u32 base_vertex; + g_gpu_device->MapVertexBuffer(sizeof(GPUBackend::ScreenVertex), 4, reinterpret_cast(&vertices), &space, + &base_vertex); + + const WindowInfo::PreRotation surface_prerotation = (target || really_postfx) ? + WindowInfo::PreRotation::Identity : + g_gpu_device->GetMainSwapChain()->GetPreRotation(); + + const DisplayRotation uv_rotation = static_cast( + (static_cast(g_gpu_settings.display_rotation) + static_cast(surface_prerotation)) % + static_cast(DisplayRotation::Count)); + + const GSVector4 xy = + GPUBackend::GetScreenQuadClipSpaceCoordinates(real_draw_rect, GSVector2i(real_target_width, real_target_height)); + switch (uv_rotation) + { + case DisplayRotation::Normal: + vertices[0].Set(xy.xy(), uv_rect.xy()); + vertices[1].Set(xy.zyzw().xy(), uv_rect.zyzw().xy()); + vertices[2].Set(xy.xwzw().xy(), uv_rect.xwzw().xy()); + vertices[3].Set(xy.zw(), uv_rect.zw()); + break; + case DisplayRotation::Rotate90: + vertices[0].Set(xy.xy(), uv_rect.xwzw().xy()); + vertices[1].Set(xy.zyzw().xy(), uv_rect.xy()); + vertices[2].Set(xy.xwzw().xy(), uv_rect.zw()); + vertices[3].Set(xy.zw(), uv_rect.zyzw().xy()); + break; + case DisplayRotation::Rotate180: + vertices[0].Set(xy.xy(), uv_rect.xwzw().xy()); + vertices[1].Set(xy.zyzw().xy(), uv_rect.zw()); + vertices[2].Set(xy.xwzw().xy(), uv_rect.xy()); + vertices[3].Set(xy.zw(), uv_rect.zyzw().xy()); + break; + case DisplayRotation::Rotate270: + vertices[0].Set(xy.xy(), uv_rect.zyzw().xy()); + vertices[1].Set(xy.zyzw().xy(), uv_rect.zw()); + vertices[2].Set(xy.xwzw().xy(), uv_rect.xy()); + vertices[3].Set(xy.zw(), uv_rect.xwzw().xy()); + break; + + DefaultCaseIsUnreachable(); + } + + g_gpu_device->UnmapVertexBuffer(sizeof(GPUBackend::ScreenVertex), 4); + g_gpu_device->Draw(4, base_vertex); + } + + if (really_postfx) + { + DebugAssert(!g_gpu_settings.gpu_show_vram); + + // "original size" in postfx includes padding. + const float upscale_x = + m_display_texture ? static_cast(m_display_texture_view_width) / static_cast(m_display_vram_width) : + 1.0f; + const float upscale_y = m_display_texture ? static_cast(m_display_texture_view_height) / + static_cast(m_display_vram_height) : + 1.0f; + const s32 orig_width = static_cast(std::ceil(static_cast(m_display_width) * upscale_x)); + const s32 orig_height = static_cast(std::ceil(static_cast(m_display_height) * upscale_y)); + + return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), nullptr, target, + display_rect, orig_width, orig_height, m_display_width, m_display_height); + } + else + { + return GPUDevice::PresentResult::OK; + } +} + +void GPUPresenter::SendDisplayToMediaCapture(MediaCapture* cap) +{ + GPUTexture* target = cap->GetRenderTexture(); + if (!target) [[unlikely]] + { + WARNING_LOG("Failed to get video capture render texture."); + Host::RunOnCPUThread(&System::StopMediaCapture); + return; + } + + const bool apply_aspect_ratio = + (g_gpu_settings.display_screenshot_mode != DisplayScreenshotMode::UncorrectedInternalResolution); + const bool postfx = (g_gpu_settings.display_screenshot_mode != DisplayScreenshotMode::InternalResolution); + GSVector4i display_rect, draw_rect; + CalculateDrawRect(target->GetWidth(), target->GetHeight(), !g_gpu_settings.gpu_show_vram, apply_aspect_ratio, + &display_rect, &draw_rect); + + // Not cleared by RenderDisplay(). + g_gpu_device->ClearRenderTarget(target, GPUDevice::DEFAULT_CLEAR_COLOR); + + if (RenderDisplay(target, display_rect, draw_rect, postfx) != GPUDevice::PresentResult::OK || + !cap->DeliverVideoFrame(target)) [[unlikely]] + { + WARNING_LOG("Failed to render/deliver video capture frame."); + Host::RunOnCPUThread(&System::StopMediaCapture); + return; + } +} + +void GPUPresenter::DestroyDeinterlaceTextures() +{ + for (std::unique_ptr& tex : m_deinterlace_buffers) + g_gpu_device->RecycleTexture(std::move(tex)); + g_gpu_device->RecycleTexture(std::move(m_deinterlace_texture)); + m_current_deinterlace_buffer = 0; +} + +bool GPUPresenter::Deinterlace(u32 field) +{ + GPUTexture* src = m_display_texture; + const u32 x = m_display_texture_view_x; + const u32 y = m_display_texture_view_y; + const u32 width = m_display_texture_view_width; + const u32 height = m_display_texture_view_height; + + const auto copy_to_field_buffer = [&](u32 buffer) { + if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[buffer], width, height, GPUTexture::Type::Texture, + src->GetFormat(), GPUTexture::Flags::None, false)) [[unlikely]] + { + return false; + } + + GL_OBJECT_NAME_FMT(m_deinterlace_buffers[buffer], "Blend Deinterlace Buffer {}", buffer); + + GL_INS_FMT("Copy {}x{} from {},{} to field buffer {}", width, height, x, y, buffer); + g_gpu_device->CopyTextureRegion(m_deinterlace_buffers[buffer].get(), 0, 0, 0, 0, m_display_texture, x, y, 0, 0, + width, height); + return true; + }; + + src->MakeReadyForSampling(); + + switch (g_gpu_settings.display_deinterlacing_mode) + { + case DisplayDeinterlacingMode::Disabled: + { + GL_INS("Deinterlacing disabled, displaying field texture"); + return true; + } + + case DisplayDeinterlacingMode::Weave: + { + GL_SCOPE_FMT("DeinterlaceWeave({{{},{}}}, {}x{}, field={})", x, y, width, height, field); + + const u32 full_height = height * 2; + if (!DeinterlaceSetTargetSize(width, full_height, true)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + src->MakeReadyForSampling(); + + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); + const u32 uniforms[4] = {x, y, field, 0}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); + return true; + } + + case DisplayDeinterlacingMode::Blend: + { + constexpr u32 NUM_BLEND_BUFFERS = 2; + + GL_SCOPE_FMT("DeinterlaceBlend({{{},{}}}, {}x{}, field={})", x, y, width, height, field); + + const u32 this_buffer = m_current_deinterlace_buffer; + m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % NUM_BLEND_BUFFERS; + GL_INS_FMT("Current buffer: {}", this_buffer); + if (!DeinterlaceSetTargetSize(width, height, false) || !copy_to_field_buffer(this_buffer)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + copy_to_field_buffer(this_buffer); + + // TODO: could be implemented with alpha blending instead.. + g_gpu_device->InvalidateRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % NUM_BLEND_BUFFERS].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, height); + return true; + } + + case DisplayDeinterlacingMode::Adaptive: + { + GL_SCOPE_FMT("DeinterlaceAdaptive({{{},{}}}, {}x{}, field={})", x, y, width, height, field); + + const u32 this_buffer = m_current_deinterlace_buffer; + const u32 full_height = height * 2; + m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % DEINTERLACE_BUFFER_COUNT; + GL_INS_FMT("Current buffer: {}", this_buffer); + if (!DeinterlaceSetTargetSize(width, full_height, false) || !copy_to_field_buffer(this_buffer)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(2, m_deinterlace_buffers[(this_buffer - 2) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(3, m_deinterlace_buffers[(this_buffer - 3) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {field, full_height}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); + return true; + } + + default: + UnreachableCode(); + } +} + +bool GPUPresenter::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve) +{ + if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, preserve)) [[unlikely]] + { + return false; + } + + GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture"); + return true; +} + +bool GPUPresenter::ApplyChromaSmoothing() +{ + const u32 x = m_display_texture_view_x; + const u32 y = m_display_texture_view_y; + const u32 width = m_display_texture_view_width; + const u32 height = m_display_texture_view_height; + if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, false)) + { + ClearDisplayTexture(); + return false; + } + + GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture"); + + GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height); + + m_display_texture->MakeReadyForSampling(); + g_gpu_device->InvalidateRenderTarget(m_chroma_smoothing_texture.get()); + g_gpu_device->SetRenderTarget(m_chroma_smoothing_texture.get()); + g_gpu_device->SetPipeline(m_chroma_smoothing_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_display_texture, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, width - 1, height - 1}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + m_chroma_smoothing_texture->MakeReadyForSampling(); + SetDisplayTexture(m_chroma_smoothing_texture.get(), m_display_depth_buffer, 0, 0, width, height); + return true; +} + +void GPUPresenter::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, + GSVector4i* display_rect, GSVector4i* draw_rect) const +{ + const bool integer_scale = (g_gpu_settings.display_scaling == DisplayScalingMode::NearestInteger || + g_gpu_settings.display_scaling == DisplayScalingMode::BilinearInteger); + const bool show_vram = g_gpu_settings.gpu_show_vram; + const u32 display_width = show_vram ? VRAM_WIDTH : m_display_width; + const u32 display_height = show_vram ? VRAM_HEIGHT : m_display_height; + const s32 display_origin_left = show_vram ? 0 : m_display_origin_left; + const s32 display_origin_top = show_vram ? 0 : m_display_origin_top; + const u32 display_vram_width = show_vram ? VRAM_WIDTH : m_display_vram_width; + const u32 display_vram_height = show_vram ? VRAM_HEIGHT : m_display_vram_height; + const float display_pixel_aspect_ratio = show_vram ? 1.0f : m_display_pixel_aspect_ratio; + GPU::CalculateDrawRect(window_width, window_height, display_width, display_height, display_origin_left, + display_origin_top, display_vram_width, display_vram_height, g_gpu_settings.display_rotation, + g_gpu_settings.display_alignment, display_pixel_aspect_ratio, + g_gpu_settings.display_stretch_vertically, integer_scale, display_rect, draw_rect); +} + +bool GPUPresenter::PresentFrame(GPUPresenter* presenter, GPUBackend* backend, bool allow_skip_present, u64 present_time) +{ + const bool skip_present = (!g_gpu_device->HasMainSwapChain() || + (allow_skip_present && g_gpu_device->GetMainSwapChain()->ShouldSkipPresentingFrame() && + presenter && presenter->m_skipped_present_count < MAX_SKIPPED_PRESENT_COUNT)); + + if (!skip_present) + { + // acquire for IO.MousePos and system state. + std::atomic_thread_fence(std::memory_order_acquire); + + FullscreenUI::Render(); + + if (backend && System::IsValid()) + ImGuiManager::RenderTextOverlays(backend); + + ImGuiManager::RenderOverlayWindows(); + + ImGuiManager::RenderOSDMessages(); + + ImGuiFullscreen::RenderOverlays(); + + if (backend && System::GetState() == System::State::Running) + ImGuiManager::RenderSoftwareCursors(); + + ImGuiManager::RenderDebugWindows(); + } + + const GPUDevice::PresentResult pres = + skip_present ? + GPUDevice::PresentResult::SkipPresent : + (presenter ? presenter->PresentDisplay() : g_gpu_device->BeginPresent(g_gpu_device->GetMainSwapChain())); + if (pres == GPUDevice::PresentResult::OK) + { + if (presenter) + presenter->m_skipped_present_count = 0; + + g_gpu_device->RenderImGui(g_gpu_device->GetMainSwapChain()); + + const GPUDevice::Features features = g_gpu_device->GetFeatures(); + const bool scheduled_present = (present_time != 0); + const bool explicit_present = (scheduled_present && (features.explicit_present && !features.timed_present)); + const bool timed_present = (scheduled_present && features.timed_present); + + if (scheduled_present && !explicit_present) + { + // No explicit present support, simulate it with Flush. + g_gpu_device->FlushCommands(); + SleepUntilPresentTime(present_time); + } + + g_gpu_device->EndPresent(g_gpu_device->GetMainSwapChain(), explicit_present, timed_present ? present_time : 0); + + if (g_gpu_device->IsGPUTimingEnabled()) + PerformanceCounters::AccumulateGPUTime(); + + if (explicit_present) + { + SleepUntilPresentTime(present_time); + g_gpu_device->SubmitPresent(g_gpu_device->GetMainSwapChain()); + } + } + else + { + if (presenter) + presenter->m_skipped_present_count++; + + if (pres == GPUDevice::PresentResult::DeviceLost) [[unlikely]] + { + ERROR_LOG("GPU device lost during present."); + return false; + } + + if (pres == GPUDevice::PresentResult::ExclusiveFullscreenLost) [[unlikely]] + { + WARNING_LOG("Lost exclusive fullscreen."); + Host::SetFullscreen(false); + } + + if (!skip_present) + g_gpu_device->FlushCommands(); + + // Still need to kick ImGui or it gets cranky. + ImGui::EndFrame(); + } + + ImGuiManager::NewFrame(); + return true; +} + +void GPUPresenter::SleepUntilPresentTime(u64 present_time) +{ + // Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery. + // Linux also seems to do a much better job of waking up at the requested time. + +#if !defined(__linux__) && !defined(__ANDROID__) + Timer::SleepUntil(present_time, true); +#else + Timer::SleepUntil(present_time, false); +#endif +} + +bool GPUPresenter::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, + const GSVector4i draw_rect, bool postfx, Image* out_image) +{ + const GPUTexture::Format hdformat = + g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : GPUTexture::Format::RGBA8; + const ImageFormat image_format = GPUTexture::GetImageFormatForTextureFormat(hdformat); + if (image_format == ImageFormat::None) + return false; + + auto render_texture = g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, + hdformat, GPUTexture::Flags::None); + if (!render_texture) + return false; + + g_gpu_device->ClearRenderTarget(render_texture.get(), GPUDevice::DEFAULT_CLEAR_COLOR); + + // TODO: this should use copy shader instead. + RenderDisplay(render_texture.get(), display_rect, draw_rect, postfx); + + Image image(width, height, image_format); + + Error error; + std::unique_ptr dltex; + if (g_gpu_device->GetFeatures().memory_import) + { + dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, image.GetPixels(), image.GetStorageSize(), + image.GetPitch(), &error); + } + if (!dltex) + { + if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, &error))) + { + ERROR_LOG("Failed to create {}x{} download texture: {}", width, height, error.GetDescription()); + return false; + } + } + + dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false); + if (!dltex->ReadTexels(0, 0, width, height, image.GetPixels(), image.GetPitch())) + return false; + + *out_image = std::move(image); + return true; +} + +void GPUPresenter::CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, + GSVector4i* display_rect, GSVector4i* draw_rect) const +{ + const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution || g_gpu_settings.gpu_show_vram); + if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) + { + if (mode == DisplayScreenshotMode::InternalResolution) + { + float f_width = static_cast(m_display_texture_view_width); + float f_height = static_cast(m_display_texture_view_height); + if (!g_gpu_settings.gpu_show_vram) + GPU::ApplyPixelAspectRatioToSize(m_display_pixel_aspect_ratio, &f_width, &f_height); + + // DX11 won't go past 16K texture size. + const float max_texture_size = static_cast(g_gpu_device->GetMaxTextureSize()); + if (f_width > max_texture_size) + { + f_height = f_height / (f_width / max_texture_size); + f_width = max_texture_size; + } + if (f_height > max_texture_size) + { + f_height = max_texture_size; + f_width = f_width / (f_height / max_texture_size); + } + + *width = static_cast(std::ceil(f_width)); + *height = static_cast(std::ceil(f_height)); + } + else // if (mode == DisplayScreenshotMode::UncorrectedInternalResolution) + { + *width = m_display_texture_view_width; + *height = m_display_texture_view_height; + } + + // Remove padding, it's not part of the framebuffer. + *draw_rect = GSVector4i(0, 0, static_cast(*width), static_cast(*height)); + *display_rect = *draw_rect; + } + else + { + *width = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetWidth() : 1; + *height = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetHeight() : 1; + CalculateDrawRect(*width, *height, true, !g_settings.gpu_show_vram, display_rect, draw_rect); + } +} diff --git a/src/core/gpu_presenter.h b/src/core/gpu_presenter.h new file mode 100644 index 000000000..47033928a --- /dev/null +++ b/src/core/gpu_presenter.h @@ -0,0 +1,124 @@ +// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +#pragma once + +#include "util/gpu_device.h" + +#include + +class Error; +class Image; +class MediaCapture; + +enum class DisplayScreenshotMode : u8; + +class GPUBackend; + +struct GPUSettings; +struct GPUBackendUpdateDisplayCommand; +struct GPUBackendFramePresentationParameters; + +class GPUPresenter final +{ +public: + GPUPresenter(); + virtual ~GPUPresenter(); + + /// Main frame presenter - used both when a game is and is not running. + static bool PresentFrame(GPUPresenter* presenter, GPUBackend* backend, bool allow_skip_present, u64 present_time); + + ALWAYS_INLINE s32 GetDisplayWidth() const { return m_display_width; } + ALWAYS_INLINE s32 GetDisplayHeight() const { return m_display_height; } + ALWAYS_INLINE s32 GetDisplayVRAMWidth() const { return m_display_vram_width; } + ALWAYS_INLINE s32 GetDisplayVRAMHeight() const { return m_display_vram_height; } + ALWAYS_INLINE s32 GetDisplayTextureViewX() const { return m_display_texture_view_x; } + ALWAYS_INLINE s32 GetDisplayTextureViewY() const { return m_display_texture_view_y; } + ALWAYS_INLINE s32 GetDisplayTextureViewWidth() const { return m_display_texture_view_width; } + ALWAYS_INLINE s32 GetDisplayTextureViewHeight() const { return m_display_texture_view_height; } + ALWAYS_INLINE GPUTexture* GetDisplayTexture() const { return m_display_texture; } + ALWAYS_INLINE GPUTexture* GetDisplayDepthBuffer() const { return m_display_depth_buffer; } + ALWAYS_INLINE bool HasDisplayTexture() const { return m_display_texture; } + + bool Initialize(Error* error); + + void UpdateSettings(const GPUSettings& old_settings); + + void ClearDisplay(); + void ClearDisplayTexture(); + void SetDisplayParameters(u16 display_width, u16 display_height, u16 display_origin_left, u16 display_origin_top, + u16 display_vram_width, u16 display_vram_height, float display_pixel_aspect_ratio); + void SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, s32 view_width, + s32 view_height); + bool Deinterlace(u32 field); + bool ApplyChromaSmoothing(); + + /// Helper function for computing the draw rectangle in a larger window. + void CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, + GSVector4i* display_rect, GSVector4i* draw_rect) const; + + /// Helper function for computing screenshot bounds. + void CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, + GSVector4i* draw_rect) const; + + /// Renders the display, optionally with postprocessing to the specified image. + bool RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect, + bool postfx, Image* out_image); + + /// Sends the current frame to media capture. + void SendDisplayToMediaCapture(MediaCapture* cap); + +private: + enum : u32 + { + DEINTERLACE_BUFFER_COUNT = 4, + MAX_SKIPPED_PRESENT_COUNT = 50, + }; + + static void SleepUntilPresentTime(u64 present_time); + + /// Draws the current display texture, with any post-processing. + GPUDevice::PresentResult PresentDisplay(); + + bool CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing, Error* error); + + GPUDevice::PresentResult RenderDisplay(GPUTexture* target, const GSVector4i display_rect, const GSVector4i draw_rect, + bool postfx); + + bool DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve); + void DestroyDeinterlaceTextures(); + + s32 m_display_width = 0; + s32 m_display_height = 0; + + s32 m_display_origin_left = 0; + s32 m_display_origin_top = 0; + s32 m_display_vram_width = 0; + s32 m_display_vram_height = 0; + float m_display_pixel_aspect_ratio = 1.0f; + + u32 m_current_deinterlace_buffer = 0; + std::unique_ptr m_deinterlace_pipeline; + std::array, DEINTERLACE_BUFFER_COUNT> m_deinterlace_buffers; + std::unique_ptr m_deinterlace_texture; + + std::unique_ptr m_chroma_smoothing_pipeline; + std::unique_ptr m_chroma_smoothing_texture; + + std::unique_ptr m_display_pipeline; + GPUTexture* m_display_texture = nullptr; + GPUTexture* m_display_depth_buffer = nullptr; + s32 m_display_texture_view_x = 0; + s32 m_display_texture_view_y = 0; + s32 m_display_texture_view_width = 0; + s32 m_display_texture_view_height = 0; + + u32 m_skipped_present_count = 0; +}; + +namespace Host { + +/// Called at the end of the frame, before presentation. +void FrameDoneOnGPUThread(GPUPresenter* gpu_presenter, u32 frame_number); + +} // namespace Host diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 86e7840eb..f85aa321c 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -3,6 +3,7 @@ #include "gpu_sw.h" #include "gpu.h" +#include "gpu_presenter.h" #include "gpu_sw_rasterizer.h" #include "settings.h" #include "system_private.h" @@ -20,7 +21,9 @@ LOG_CHANNEL(GPU); -GPU_SW::GPU_SW() = default; +GPU_SW::GPU_SW(GPUPresenter& presenter) : GPUBackend(presenter) +{ +} GPU_SW::~GPU_SW() = default; @@ -209,7 +212,7 @@ GPUTexture* GPU_SW::GetDisplayTexture(u32 width, u32 height, GPUTexture::Format if (!m_upload_texture || m_upload_texture->GetWidth() != width || m_upload_texture->GetHeight() != height || m_upload_texture->GetFormat() != format) { - ClearDisplayTexture(); + m_presenter.ClearDisplayTexture(); g_gpu_device->RecycleTexture(std::move(m_upload_texture)); m_upload_texture = g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, format, GPUTexture::Flags::AllowMap, nullptr, 0); @@ -388,7 +391,7 @@ void GPU_SW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) { if (cmd->display_disabled) { - ClearDisplayTexture(); + m_presenter.ClearDisplayTexture(); return; } @@ -407,15 +410,15 @@ void GPU_SW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) { if (CopyOut(src_x, src_y, skip_x, width, height, line_skip, is_24bit)) { - SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, width, height); + m_presenter.SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, width, height); if (is_24bit && g_settings.display_24bit_chroma_smoothing) { - if (ApplyChromaSmoothing()) - Deinterlace(field); + if (m_presenter.ApplyChromaSmoothing()) + m_presenter.Deinterlace(field); } else { - Deinterlace(field); + m_presenter.Deinterlace(field); } } } @@ -423,20 +426,20 @@ void GPU_SW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) { if (CopyOut(src_x, src_y, skip_x, width, height, 0, is_24bit)) { - SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, width, height); + m_presenter.SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, width, height); if (is_24bit && g_settings.display_24bit_chroma_smoothing) - ApplyChromaSmoothing(); + m_presenter.ApplyChromaSmoothing(); } } } else { if (CopyOut(0, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 0, false)) - SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, VRAM_WIDTH, VRAM_HEIGHT); + m_presenter.SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, VRAM_WIDTH, VRAM_HEIGHT); } } -std::unique_ptr GPUBackend::CreateSoftwareBackend() +std::unique_ptr GPUBackend::CreateSoftwareBackend(GPUPresenter& presenter) { - return std::make_unique(); + return std::make_unique(presenter); } diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index 9eb9a89a8..8c2c6ee9d 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -17,7 +17,7 @@ class GPU_SW final : public GPUBackend { public: - GPU_SW(); + GPU_SW(GPUPresenter& presenter); ~GPU_SW() override; bool Initialize(bool upload_vram, Error* error) override; diff --git a/src/core/gpu_thread.cpp b/src/core/gpu_thread.cpp index 5f2f8edbc..06f4534ad 100644 --- a/src/core/gpu_thread.cpp +++ b/src/core/gpu_thread.cpp @@ -5,6 +5,7 @@ #include "fullscreen_ui.h" #include "gpu_backend.h" #include "gpu_hw_texture_cache.h" +#include "gpu_presenter.h" #include "gpu_thread_commands.h" #include "gpu_types.h" #include "host.h" @@ -42,7 +43,6 @@ enum : u32 { COMMAND_QUEUE_SIZE = 16 * 1024 * 1024, THRESHOLD_TO_WAKE_GPU = 65536, - MAX_SKIPPED_PRESENT_COUNT = 50 }; static constexpr s32 THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING = 0x40000000; // CPU thread needs waking @@ -76,19 +76,17 @@ static void DestroyDeviceOnThread(bool clear_fsui_state); static void ResizeDisplayWindowOnThread(u32 width, u32 height, float scale); static void UpdateDisplayWindowOnThread(bool fullscreen); static void DisplayWindowResizedOnThread(); -static void HandleGPUDeviceLost(); -static void HandleExclusiveFullscreenLost(); static void ReconfigureOnThread(GPUThreadReconfigureCommand* cmd); static bool CreateGPUBackendOnThread(GPURenderer renderer, bool upload_vram, Error* error); static void DestroyGPUBackendOnThread(); +static void DestroyGPUPresenterOnThread(); +static bool PresentFrameAndRestoreContext(); static void UpdateSettingsOnThread(const GPUSettings& old_settings); static void UpdateRunIdle(); -static void SleepUntilPresentTime(Timer::Value present_time); - namespace { struct ALIGN_TO_CACHE_LINE State @@ -109,8 +107,8 @@ struct ALIGN_TO_CACHE_LINE State // Owned by GPU thread. ALIGN_TO_CACHE_LINE std::unique_ptr gpu_backend; + ALIGN_TO_CACHE_LINE std::unique_ptr gpu_presenter; std::atomic command_fifo_read_ptr{0}; - u32 skipped_present_count = 0; u8 run_idle_reasons = 0; bool run_idle_flag = false; GPUVSyncMode requested_vsync = GPUVSyncMode::Disabled; @@ -528,7 +526,9 @@ void GPUThread::Internal::GPUThreadEntryPoint() void GPUThread::Internal::DoRunIdle() { - PresentFrame(false, 0); + if (!PresentFrameAndRestoreContext()) + return; + if (!g_gpu_device->GetMainSwapChain()->IsVSyncModeBlocking()) g_gpu_device->GetMainSwapChain()->ThrottlePresentation(); } @@ -722,6 +722,9 @@ void GPUThread::DestroyDeviceOnThread(bool clear_fsui_state) if (!g_gpu_device) return; + // Presenter should be gone by this point + Assert(!s_state.gpu_presenter); + const bool has_window = g_gpu_device->HasMainSwapChain(); FullscreenUI::Shutdown(clear_fsui_state); @@ -738,64 +741,30 @@ void GPUThread::DestroyDeviceOnThread(bool clear_fsui_state) std::atomic_thread_fence(std::memory_order_release); } -void GPUThread::HandleGPUDeviceLost() -{ - static Timer::Value s_last_gpu_reset_time = 0; - static constexpr float MIN_TIME_BETWEEN_RESETS = 15.0f; - - // If we're constantly crashing on something in particular, we don't want to end up in an - // endless reset loop.. that'd probably end up leaking memory and/or crashing us for other - // reasons. So just abort in such case. - const Timer::Value current_time = Timer::GetCurrentValue(); - if (s_last_gpu_reset_time != 0 && - Timer::ConvertValueToSeconds(current_time - s_last_gpu_reset_time) < MIN_TIME_BETWEEN_RESETS) - { - Panic("Host GPU lost too many times, device is probably completely wedged."); - } - s_last_gpu_reset_time = current_time; - - const bool is_fullscreen = Host::IsFullscreen(); - - // Device lost, something went really bad. - // Let's just toss out everything, and try to hobble on. - DestroyGPUBackendOnThread(); - DestroyDeviceOnThread(false); - - Error error; - if (!CreateDeviceOnThread( - Settings::GetRenderAPIForRenderer(s_state.requested_renderer.value_or(g_gpu_settings.gpu_renderer)), - is_fullscreen, true, &error) || - (s_state.requested_renderer.has_value() && - !CreateGPUBackendOnThread(s_state.requested_renderer.value(), true, &error))) - { - ERROR_LOG("Failed to recreate GPU device after loss: {}", error.GetDescription()); - Panic("Failed to recreate GPU device after loss."); - return; - } - - // First frame after reopening is definitely going to be trash, so skip it. - Host::AddIconOSDWarning( - "HostGPUDeviceLost", ICON_EMOJI_WARNING, - TRANSLATE_STR("System", "Host GPU device encountered an error and has recovered. This may cause broken rendering."), - Host::OSD_CRITICAL_ERROR_DURATION); -} - -void GPUThread::HandleExclusiveFullscreenLost() -{ - WARNING_LOG("Lost exclusive fullscreen."); - Host::SetFullscreen(false); -} - bool GPUThread::CreateGPUBackendOnThread(GPURenderer renderer, bool upload_vram, Error* error) { + Error local_error; + + // Create presenter if we don't already have one. + if (!s_state.gpu_presenter) + { + s_state.gpu_presenter = std::make_unique(); + if (!s_state.gpu_presenter->Initialize(&local_error)) + { + ERROR_LOG("Failed to create presenter: {}", local_error.GetDescription()); + Error::SetStringFmt(error, "Failed to create presenter: {}", local_error.GetDescription()); + s_state.gpu_presenter.reset(); + return false; + } + } + const bool is_hardware = (renderer != GPURenderer::Software); if (is_hardware) - s_state.gpu_backend = GPUBackend::CreateHardwareBackend(); + s_state.gpu_backend = GPUBackend::CreateHardwareBackend(*s_state.gpu_presenter); else - s_state.gpu_backend = GPUBackend::CreateSoftwareBackend(); + s_state.gpu_backend = GPUBackend::CreateSoftwareBackend(*s_state.gpu_presenter); - Error local_error; bool okay = s_state.gpu_backend->Initialize(upload_vram, &local_error); if (!okay) { @@ -810,7 +779,7 @@ bool GPUThread::CreateGPUBackendOnThread(GPURenderer renderer, bool upload_vram, Host::OSD_CRITICAL_ERROR_DURATION); s_state.requested_renderer = GPURenderer::Software; - s_state.gpu_backend = GPUBackend::CreateSoftwareBackend(); + s_state.gpu_backend = GPUBackend::CreateSoftwareBackend(*s_state.gpu_presenter); okay = s_state.gpu_backend->Initialize(upload_vram, &local_error); } @@ -825,7 +794,7 @@ bool GPUThread::CreateGPUBackendOnThread(GPURenderer renderer, bool upload_vram, g_gpu_device->SetGPUTimingEnabled(g_gpu_settings.display_show_gpu_usage); PostProcessing::Initialize(); ImGuiManager::UpdateDebugWindowConfig(); - Internal::RestoreContextAfterPresent(); + s_state.gpu_backend->RestoreDeviceContext(); SetRunIdleReason(RunIdleReason::NoGPUBackend, false); std::atomic_thread_fence(std::memory_order_release); return true; @@ -843,6 +812,7 @@ void GPUThread::ReconfigureOnThread(GPUThreadReconfigureCommand* cmd) if (!cmd->renderer.has_value() && !s_state.requested_fullscreen_ui) { DestroyGPUBackendOnThread(); + DestroyGPUPresenterOnThread(); DestroyDeviceOnThread(true); return; } @@ -875,6 +845,7 @@ void GPUThread::ReconfigureOnThread(GPUThreadReconfigureCommand* cmd) if (cmd->force_recreate_device || !GPUDevice::IsSameRenderAPI(current_api, expected_api)) { const bool fullscreen = cmd->fullscreen.value_or(Host::IsFullscreen()); + DestroyGPUPresenterOnThread(); DestroyDeviceOnThread(false); Error local_error; @@ -902,7 +873,11 @@ void GPUThread::ReconfigureOnThread(GPUThreadReconfigureCommand* cmd) if (cmd->renderer.has_value()) { // Do we want a renderer? - *cmd->out_result = CreateGPUBackendOnThread(cmd->renderer.value(), cmd->upload_vram, cmd->error_ptr); + if (!(*cmd->out_result = CreateGPUBackendOnThread(cmd->renderer.value(), cmd->upload_vram, cmd->error_ptr))) + { + // No point keeping the presenter around. + DestroyGPUPresenterOnThread(); + } } else if (s_state.requested_fullscreen_ui) { @@ -913,6 +888,9 @@ void GPUThread::ReconfigureOnThread(GPUThreadReconfigureCommand* cmd) return; } + // Don't need to present game frames anymore. + DestroyGPUPresenterOnThread(); + // Don't need timing to run FSUI. g_gpu_device->SetGPUTimingEnabled(false); @@ -926,6 +904,7 @@ void GPUThread::ReconfigureOnThread(GPUThreadReconfigureCommand* cmd) else { // Device is no longer needed. + DestroyGPUBackendOnThread(); DestroyDeviceOnThread(true); } } @@ -945,6 +924,34 @@ void GPUThread::DestroyGPUBackendOnThread() s_state.gpu_backend.reset(); } +void GPUThread::DestroyGPUPresenterOnThread() +{ + if (!s_state.gpu_presenter) + return; + + VERBOSE_LOG("Shutting down GPU presenter..."); + + // Should have no queued frames by this point. Backend can get replaced with null. + Assert(!s_state.gpu_backend); + Assert(GPUBackend::GetQueuedFrameCount() == 0); + + s_state.gpu_presenter.reset(); +} + +bool GPUThread::PresentFrameAndRestoreContext() +{ + if (s_state.gpu_backend) + s_state.gpu_backend->FlushRender(); + + if (!GPUPresenter::PresentFrame(s_state.gpu_presenter.get(), s_state.gpu_backend.get(), false, 0)) + return false; + + if (s_state.gpu_backend) + s_state.gpu_backend->RestoreDeviceContext(); + + return true; +} + void GPUThread::UpdateSettingsOnThread(const GPUSettings& old_settings) { if (g_gpu_device) @@ -964,11 +971,12 @@ void GPUThread::UpdateSettingsOnThread(const GPUSettings& old_settings) PostProcessing::UpdateSettings(); + s_state.gpu_presenter->UpdateSettings(old_settings); s_state.gpu_backend->UpdateSettings(old_settings); if (ImGuiManager::UpdateDebugWindowConfig() || (PostProcessing::DisplayChain.IsActive() && !IsSystemPaused())) - Internal::PresentFrame(false, 0); - - s_state.gpu_backend->RestoreDeviceContext(); + PresentFrameAndRestoreContext(); + else + s_state.gpu_backend->RestoreDeviceContext(); } } @@ -1057,7 +1065,7 @@ void GPUThread::UpdateSettings(bool gpu_settings_changed, bool device_settings_c { PostProcessing::UpdateSettings(); if (ImGuiManager::UpdateDebugWindowConfig() || (PostProcessing::DisplayChain.IsActive() && !IsSystemPaused())) - Internal::PresentFrame(false, 0); + PresentFrameAndRestoreContext(); } }); } @@ -1180,8 +1188,8 @@ void GPUThread::DisplayWindowResizedOnThread() { // Hackity hack, on some systems, presenting a single frame isn't enough to actually get it // displayed. Two seems to be good enough. Maybe something to do with direct scanout. - Internal::PresentFrame(false, 0); - Internal::PresentFrame(false, 0); + PresentFrameAndRestoreContext(); + PresentFrameAndRestoreContext(); } if (g_gpu_settings.gpu_resolution_scale == 0) @@ -1231,112 +1239,12 @@ void GPUThread::PresentCurrentFrame() return; } - Internal::PresentFrame(false, 0); + // But we shouldn't be not running idle without a GPU backend. + if (s_state.gpu_backend) + PresentFrameAndRestoreContext(); }); } -void GPUThread::SleepUntilPresentTime(Timer::Value present_time) -{ - // Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery. - // Linux also seems to do a much better job of waking up at the requested time. - -#if !defined(__linux__) && !defined(__ANDROID__) - Timer::SleepUntil(present_time, true); -#else - Timer::SleepUntil(present_time, false); -#endif -} - -void GPUThread::Internal::PresentFrame(bool allow_skip_present, u64 present_time) -{ - if (s_state.gpu_backend) - s_state.gpu_backend->FlushRender(); - - const bool skip_present = (!g_gpu_device->HasMainSwapChain() || - (allow_skip_present && g_gpu_device->GetMainSwapChain()->ShouldSkipPresentingFrame() && - s_state.skipped_present_count < MAX_SKIPPED_PRESENT_COUNT)); - - if (!skip_present) - { - // acquire for IO.MousePos and system state. - std::atomic_thread_fence(std::memory_order_acquire); - - FullscreenUI::Render(); - - if (s_state.gpu_backend && System::IsValid()) - ImGuiManager::RenderTextOverlays(s_state.gpu_backend.get()); - - ImGuiManager::RenderOverlayWindows(); - - ImGuiManager::RenderOSDMessages(); - - ImGuiFullscreen::RenderOverlays(); - - if (s_state.gpu_backend && System::GetState() == System::State::Running) - ImGuiManager::RenderSoftwareCursors(); - - ImGuiManager::RenderDebugWindows(); - } - - const GPUDevice::PresentResult pres = - skip_present ? GPUDevice::PresentResult::SkipPresent : - (s_state.gpu_backend ? s_state.gpu_backend->PresentDisplay() : - g_gpu_device->BeginPresent(g_gpu_device->GetMainSwapChain())); - if (pres == GPUDevice::PresentResult::OK) - { - s_state.skipped_present_count = 0; - - g_gpu_device->RenderImGui(g_gpu_device->GetMainSwapChain()); - - const GPUDevice::Features features = g_gpu_device->GetFeatures(); - const bool scheduled_present = (present_time != 0); - const bool explicit_present = (scheduled_present && (features.explicit_present && !features.timed_present)); - const bool timed_present = (scheduled_present && features.timed_present); - - if (scheduled_present && !explicit_present) - { - // No explicit present support, simulate it with Flush. - g_gpu_device->FlushCommands(); - SleepUntilPresentTime(present_time); - } - - g_gpu_device->EndPresent(g_gpu_device->GetMainSwapChain(), explicit_present, timed_present ? present_time : 0); - - if (g_gpu_device->IsGPUTimingEnabled()) - PerformanceCounters::AccumulateGPUTime(); - - if (explicit_present) - { - SleepUntilPresentTime(present_time); - g_gpu_device->SubmitPresent(g_gpu_device->GetMainSwapChain()); - } - } - else - { - s_state.skipped_present_count++; - - if (pres == GPUDevice::PresentResult::DeviceLost) [[unlikely]] - HandleGPUDeviceLost(); - else if (pres == GPUDevice::PresentResult::ExclusiveFullscreenLost) - HandleExclusiveFullscreenLost(); - else if (!skip_present) - g_gpu_device->FlushCommands(); - - // Still need to kick ImGui or it gets cranky. - ImGui::EndFrame(); - } - - ImGuiManager::NewFrame(); - - RestoreContextAfterPresent(); -} - -void GPUThread::Internal::RestoreContextAfterPresent() -{ - if (s_state.gpu_backend) - s_state.gpu_backend->RestoreDeviceContext(); -} - bool GPUThread::GetRunIdleReason(RunIdleReason reason) { return (s_state.run_idle_reasons & static_cast(reason)) != 0; diff --git a/src/core/gpu_thread.h b/src/core/gpu_thread.h index 57c061436..3c1daaaa1 100644 --- a/src/core/gpu_thread.h +++ b/src/core/gpu_thread.h @@ -89,7 +89,6 @@ void PushCommandAndWakeThread(GPUThreadCommand* cmd); void PushCommandAndSync(GPUThreadCommand* cmd, bool spin); void SyncGPUThread(bool spin); -// NOTE: Only called by GPUBackend namespace Internal { const Threading::ThreadHandle& GetThreadHandle(); void ProcessStartup(); @@ -97,8 +96,6 @@ void SetThreadEnabled(bool enabled); void DoRunIdle(); void RequestShutdown(); void GPUThreadEntryPoint(); -void PresentFrame(bool allow_skip_present, u64 present_time); -void RestoreContextAfterPresent(); } // namespace Internal } // namespace GPUThread diff --git a/src/core/system.cpp b/src/core/system.cpp index bfe470444..c64313b0d 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -19,6 +19,7 @@ #include "gpu_backend.h" #include "gpu_dump.h" #include "gpu_hw_texture_cache.h" +#include "gpu_presenter.h" #include "gpu_thread.h" #include "gte.h" #include "host.h" @@ -5254,8 +5255,7 @@ std::string System::GetScreenshotPath(const char* extension) return path; } -void System::SaveScreenshot(const char* path, DisplayScreenshotMode mode, DisplayScreenshotFormat format, u8 quality, - bool compress_on_thread) +void System::SaveScreenshot(const char* path, DisplayScreenshotMode mode, DisplayScreenshotFormat format, u8 quality) { if (!IsValid()) return; @@ -5264,7 +5264,7 @@ void System::SaveScreenshot(const char* path, DisplayScreenshotMode mode, Displa if (!path) path = (auto_path = GetScreenshotPath(Settings::GetDisplayScreenshotFormatExtension(format))).c_str(); - GPUBackend::RenderScreenshotToFile(path, mode, quality, compress_on_thread, true); + GPUBackend::RenderScreenshotToFile(path, mode, quality, true); } bool System::StartRecordingGPUDump(const char* path /*= nullptr*/, u32 num_frames /*= 0*/) @@ -5332,8 +5332,8 @@ bool System::StartMediaCapture(std::string path) GSVector4i unused_display_rect, unused_draw_rect; u32 video_width, video_height; - backend->CalculateScreenshotSize(DisplayScreenshotMode::InternalResolution, &video_width, &video_height, - &unused_display_rect, &unused_draw_rect); + backend->GetPresenter().CalculateScreenshotSize(DisplayScreenshotMode::InternalResolution, &video_width, + &video_height, &unused_display_rect, &unused_draw_rect); // fire back to the CPU thread to actually start the capture Host::RunOnCPUThread([path = std::move(path), capture_audio, video_width, video_height]() mutable { diff --git a/src/core/system.h b/src/core/system.h index 3cf68b974..4207c8d57 100644 --- a/src/core/system.h +++ b/src/core/system.h @@ -391,7 +391,7 @@ void UpdateVolume(); /// Saves a screenshot to the specified file. If no file name is provided, one will be generated automatically. void SaveScreenshot(const char* path = nullptr, DisplayScreenshotMode mode = g_settings.display_screenshot_mode, DisplayScreenshotFormat format = g_settings.display_screenshot_format, - u8 quality = g_settings.display_screenshot_quality, bool compress_on_thread = true); + u8 quality = g_settings.display_screenshot_quality); /// Starts/stops GPU dump/trace recording. bool StartRecordingGPUDump(const char* path = nullptr, u32 num_frames = 1); diff --git a/src/duckstation-regtest/regtest_host.cpp b/src/duckstation-regtest/regtest_host.cpp index 6c6cefb81..31778350d 100644 --- a/src/duckstation-regtest/regtest_host.cpp +++ b/src/duckstation-regtest/regtest_host.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "core/achievements.h" @@ -8,6 +8,7 @@ #include "core/game_list.h" #include "core/gpu.h" #include "core/gpu_backend.h" +#include "core/gpu_presenter.h" #include "core/gpu_thread.h" #include "core/host.h" #include "core/spu.h" @@ -32,6 +33,7 @@ #include "common/path.h" #include "common/sha256_digest.h" #include "common/string_util.h" +#include "common/threading.h" #include "common/timer.h" #include "fmt/format.h" @@ -52,7 +54,7 @@ static void HookSignals(); static bool SetFolders(); static bool SetNewDataRoot(const std::string& filename); static void DumpSystemStateHashes(); -static std::string GetFrameDumpFilename(u32 frame); +static std::string GetFrameDumpPath(u32 frame); static void GPUThreadEntryPoint(); } // namespace RegTestHost @@ -400,8 +402,92 @@ void Host::DestroyAuxiliaryRenderWindow(AuxiliaryRenderWindowHandle handle, s32* void Host::FrameDoneOnGPUThread(GPUBackend* gpu_backend, u32 frame_number) { - if (s_frame_dump_interval > 0 && (s_frame_dump_interval == 1 || (frame_number % s_frame_dump_interval) == 0)) - gpu_backend->WriteDisplayTextureToFile(RegTestHost::GetFrameDumpFilename(frame_number)); + const GPUPresenter& presenter = gpu_backend->GetPresenter(); + if (s_frame_dump_interval == 0 || (frame_number % s_frame_dump_interval) != 0 || !presenter.HasDisplayTexture()) + return; + + // Need to take a copy of the display texture. + GPUTexture* const read_texture = presenter.GetDisplayTexture(); + const u32 read_x = static_cast(presenter.GetDisplayTextureViewX()); + const u32 read_y = static_cast(presenter.GetDisplayTextureViewY()); + const u32 read_width = static_cast(presenter.GetDisplayTextureViewWidth()); + const u32 read_height = static_cast(presenter.GetDisplayTextureViewHeight()); + const ImageFormat read_format = GPUTexture::GetImageFormatForTextureFormat(read_texture->GetFormat()); + if (read_format == ImageFormat::None) + return; + + Image image(read_width, read_height, read_format); + std::unique_ptr dltex; + if (g_gpu_device->GetFeatures().memory_import) + { + dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, read_texture->GetFormat(), image.GetPixels(), + image.GetStorageSize(), image.GetPitch()); + } + if (!dltex) + { + if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, read_texture->GetFormat()))) + { + ERROR_LOG("Failed to create {}x{} {} download texture", read_width, read_height, + GPUTexture::GetFormatName(read_texture->GetFormat())); + return; + } + } + + dltex->CopyFromTexture(0, 0, read_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported()); + if (!dltex->ReadTexels(0, 0, read_width, read_height, image.GetPixels(), image.GetPitch())) + { + ERROR_LOG("Failed to read {}x{} download texture", read_width, read_height); + gpu_backend->RestoreDeviceContext(); + return; + } + + // no more GPU calls + gpu_backend->RestoreDeviceContext(); + + Error error; + const std::string path = RegTestHost::GetFrameDumpPath(frame_number); + auto fp = FileSystem::OpenManagedCFile(path.c_str(), "wb", &error); + if (!fp) + { + ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(path), error.GetDescription()); + return; + } + + System::QueueAsyncTask([path = std::move(path), fp = fp.release(), flip_y = g_gpu_device->UsesLowerLeftOrigin(), + image = std::move(image)]() mutable { + Error error; + + if (flip_y) + image.FlipY(); + + if (image.GetFormat() != ImageFormat::RGBA8) + { + std::optional convert_image = image.ConvertToRGBA8(&error); + if (!convert_image.has_value()) + { + ERROR_LOG("Failed to convert {} screenshot to RGBA8: {}", Image::GetFormatName(image.GetFormat()), + error.GetDescription()); + image.Invalidate(); + } + else + { + image = std::move(convert_image.value()); + } + } + + bool result = false; + if (image.IsValid()) + { + image.SetAllPixelsOpaque(); + + result = image.SaveToFile(path.c_str(), fp, Image::DEFAULT_SAVE_QUALITY, &error); + if (!result) + ERROR_LOG("Failed to save screenshot to '{}': '{}'", Path::GetFileName(path), error.GetDescription()); + } + + std::fclose(fp); + return result; + }); } void Host::OpenURL(std::string_view url) @@ -781,7 +867,7 @@ bool RegTestHost::SetNewDataRoot(const std::string& filename) return true; } -std::string RegTestHost::GetFrameDumpFilename(u32 frame) +std::string RegTestHost::GetFrameDumpPath(u32 frame) { return Path::Combine(EmuFolders::DataRoot, fmt::format("frame_{:05d}.png", frame)); }