From 73044dffed321b2602b47493ac4708b08c87d89e Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sat, 17 Sep 2022 20:46:27 -0500 Subject: [PATCH] GS:MTL: Add option to spin GPU during readbacks --- pcsx2-qt/Settings/GraphicsSettingsWidget.cpp | 5 + pcsx2-qt/Settings/GraphicsSettingsWidget.ui | 7 ++ pcsx2/CMakeLists.txt | 1 + pcsx2/Config.h | 1 + pcsx2/Frontend/MetalHostDisplay.mm | 1 + pcsx2/GS/GS.cpp | 1 + pcsx2/GS/Renderers/Metal/GSDeviceMTL.h | 11 ++ pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm | 119 +++++++++++++++++-- pcsx2/GS/Renderers/Metal/misc.metal | 24 ++++ pcsx2/GS/Window/GSSetting.cpp | 3 + pcsx2/GS/Window/GSSetting.h | 1 + pcsx2/GS/Window/GSwxDialog.cpp | 8 +- pcsx2/Pcsx2Config.cpp | 2 + 13 files changed, 173 insertions(+), 11 deletions(-) create mode 100644 pcsx2/GS/Renderers/Metal/misc.metal diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp index 83f2bb8b4b..904b3e9809 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp @@ -98,6 +98,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget* SettingWidgetBinder::BindWidgetToStringSetting(sif, m_ui.adapter, "EmuCore/GS", "Adapter"); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.vsync, "EmuCore/GS", "VsyncEnable", 0); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableHWFixes, "EmuCore/GS", "UserHacks", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.spinGPUDuringReadbacks, "EmuCore/GS", "HWSpinGPUForReadbacks", false); ////////////////////////////////////////////////////////////////////////// // Game Display Settings @@ -410,6 +411,10 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget* "to your games. However IF you have ENABLED this, you WILL DISABLE AUTOMATIC " "SETTINGS and you can re-enable automatic settings by unchecking this option.")); + dialog->registerWidgetHelp(m_ui.spinGPUDuringReadbacks, tr("Spin GPU During Readbacks"), tr("Unchecked"), + tr("Submits useless work to the GPU during readbacks to prevent it from going into powersave modes. " + "May improve performance but with a significant increase in power usage.")); + // Software dialog->registerWidgetHelp(m_ui.extraSWThreads, tr("Extra Rendering Threads"), tr("2 threads"), tr("Number of rendering threads: 0 for single thread, 2 or more for multithread (1 is for debugging). " diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui index 47e775f648..ff73e69858 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui @@ -643,6 +643,13 @@ + + + + Spin GPU During Readbacks + + + diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index db8005b096..bda21b3471 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -796,6 +796,7 @@ set(pcsx2GSMetalShaders GS/Renderers/Metal/convert.metal GS/Renderers/Metal/present.metal GS/Renderers/Metal/merge.metal + GS/Renderers/Metal/misc.metal GS/Renderers/Metal/interlace.metal GS/Renderers/Metal/tfx.metal GS/Renderers/Metal/fxaa.metal diff --git a/pcsx2/Config.h b/pcsx2/Config.h index fdb10d1073..38c4daf8f1 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -496,6 +496,7 @@ struct Pcsx2Config OsdShowInputs : 1; bool + HWSpinGPUForReadbacks : 1, GPUPaletteConversion : 1, AutoFlushSW : 1, PreloadFrameWithGSData : 1, diff --git a/pcsx2/Frontend/MetalHostDisplay.mm b/pcsx2/Frontend/MetalHostDisplay.mm index 38c5093dd5..46f8dae67c 100644 --- a/pcsx2/Frontend/MetalHostDisplay.mm +++ b/pcsx2/Frontend/MetalHostDisplay.mm @@ -298,6 +298,7 @@ void MetalHostDisplay::EndPresent() [drawable present]; }]; dev->FlushEncoders(); + dev->FrameCompleted(); m_current_drawable = nullptr; if (m_capture_start_frame) { diff --git a/pcsx2/GS/GS.cpp b/pcsx2/GS/GS.cpp index bb7c9cc4d9..d76ebf6776 100644 --- a/pcsx2/GS/GS.cpp +++ b/pcsx2/GS/GS.cpp @@ -1445,6 +1445,7 @@ void GSApp::Init() m_default_configuration["fxaa"] = "0"; m_default_configuration["HWDownloadMode"] = std::to_string(static_cast(GSHardwareDownloadMode::Enabled)); m_default_configuration["GSDumpCompression"] = std::to_string(static_cast(GSDumpCompressionMethod::LZMA)); + m_default_configuration["HWSpinGPUForReadbacks"] = "0"; m_default_configuration["pcrtc_antiblur"] = "1"; m_default_configuration["disable_interlace_offset"] = "0"; m_default_configuration["pcrtc_offsets"] = "0"; diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h index 6b981a4708..81e76724b6 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h @@ -25,6 +25,7 @@ #include "common/HashCombine.h" #include "common/MRCHelpers.h" +#include "common/ReadbackSpinManager.h" #include "GS/GS.h" #include "GSMTLDeviceInfo.h" #include "GSMTLSharedHeader.h" @@ -227,6 +228,14 @@ public: u64 m_current_draw = 1; std::atomic m_last_finished_draw{0}; + // Spinning + ReadbackSpinManager m_spin_manager; + u32 m_encoders_in_current_cmdbuf; + u32 m_spin_timer; + MRCOwned> m_spin_pipeline; + MRCOwned> m_spin_buffer; + MRCOwned> m_spin_fence; + // Functions and Pipeline States MRCOwned> m_convert_pipeline[static_cast(ShaderConvert::Count)]; MRCOwned> m_present_pipeline[static_cast(PresentShader::Count)]; @@ -332,6 +341,8 @@ public: void EndRenderPass(); /// Begin a new render pass (may reuse existing) void BeginRenderPass(NSString* name, GSTexture* color, MTLLoadAction color_load, GSTexture* depth, MTLLoadAction depth_load, GSTexture* stencil = nullptr, MTLLoadAction stencil_load = MTLLoadActionDontCare); + /// Call at the end of each frame + void FrameCompleted(); GSTexture* CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format) override; diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index fd74971608..c765d98f1e 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -214,6 +214,7 @@ id GSDeviceMTL::GetRenderCmdBuf() { if (!m_current_render_cmdbuf) { + m_encoders_in_current_cmdbuf = 0; m_current_render_cmdbuf = MRCRetain([m_queue commandBuffer]); pxAssertRel(m_current_render_cmdbuf, "Failed to create draw command buffer!"); [m_current_render_cmdbuf setLabel:@"Draw"]; @@ -258,15 +259,78 @@ void GSDeviceMTL::FlushEncoders() [m_late_texture_upload_encoder endEncoding]; m_late_texture_upload_encoder = nil; } - [m_current_render_cmdbuf addCompletedHandler:[backref = m_backref, draw = m_current_draw](id buf) + u32 spin_cycles = 0; + constexpr double s_to_ns = 1000000000; + if (m_spin_timer) { - std::lock_guard guard(backref->first); - if (GSDeviceMTL* dev = backref->second) - dev->DrawCommandBufferFinished(draw, buf); - }]; + u32 spin_id; + { + std::lock_guard guard(m_backref->first); + auto draw = m_spin_manager.DrawSubmitted(m_encoders_in_current_cmdbuf); + u32 constant_offset = 200000 * m_spin_manager.SpinsPerUnitTime(); // 200µs + u32 minimum_spin = 2 * constant_offset; // 400µs (200µs after subtracting constant_offset) + u32 maximum_spin = std::max(1024, 16000000 * m_spin_manager.SpinsPerUnitTime()); // 16ms + if (draw.recommended_spin > minimum_spin) + spin_cycles = std::min(draw.recommended_spin - constant_offset, maximum_spin); + spin_id = draw.id; + } + [m_current_render_cmdbuf addCompletedHandler:[backref = m_backref, draw = m_current_draw, spin_id](id buf) + { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunguarded-availability" + // Starting from kernelStartTime includes time the command buffer spent waiting to execute + // This is useful for avoiding issues on GPUs without async compute (Intel) where spinning + // delays the next command buffer start, which then makes the spin manager think it should spin more + // (If a command buffer contains multiple encoders, the GPU will start before the kernel finishes, + // so we choose kernelStartTime over kernelEndTime) + u64 begin = [buf kernelStartTime] * s_to_ns; + u64 end = [buf GPUEndTime] * s_to_ns; +#pragma clang diagnostic pop + std::lock_guard guard(backref->first); + if (GSDeviceMTL* dev = backref->second) + { + dev->DrawCommandBufferFinished(draw, buf); + dev->m_spin_manager.DrawCompleted(spin_id, begin, end); + } + }]; + } + else + { + [m_current_render_cmdbuf addCompletedHandler:[backref = m_backref, draw = m_current_draw](id buf) + { + std::lock_guard guard(backref->first); + if (GSDeviceMTL* dev = backref->second) + dev->DrawCommandBufferFinished(draw, buf); + }]; + } [m_current_render_cmdbuf commit]; m_current_render_cmdbuf = nil; m_current_draw++; + if (spin_cycles) + { + id spinCmdBuf = [m_queue commandBuffer]; + [spinCmdBuf setLabel:@"Spin"]; + id spinCmdEncoder = [spinCmdBuf computeCommandEncoder]; + [spinCmdEncoder setLabel:@"Spin"]; + [spinCmdEncoder waitForFence:m_spin_fence]; + [spinCmdEncoder setComputePipelineState:m_spin_pipeline]; + [spinCmdEncoder setBytes:&spin_cycles length:sizeof(spin_cycles) atIndex:0]; + [spinCmdEncoder setBuffer:m_spin_buffer offset:0 atIndex:1]; + [spinCmdEncoder dispatchThreadgroups:MTLSizeMake(1, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + [spinCmdEncoder endEncoding]; + [spinCmdBuf addCompletedHandler:[backref = m_backref, spin_cycles](id buf) + { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunguarded-availability" + u64 begin = [buf GPUStartTime] * s_to_ns; + u64 end = [buf GPUEndTime] * s_to_ns; +#pragma clang diagnostic pop + std::lock_guard guard(backref->first); + if (GSDeviceMTL* dev = backref->second) + dev->m_spin_manager.SpinCompleted(spin_cycles, begin, end); + }]; + [spinCmdBuf commit]; + } } void GSDeviceMTL::EndRenderPass() @@ -274,6 +338,8 @@ void GSDeviceMTL::EndRenderPass() if (m_current_render.encoder) { EndDebugGroup(m_current_render.encoder); + if (m_spin_timer) + [m_current_render.encoder updateFence:m_spin_fence afterStages:MTLRenderStageFragment]; [m_current_render.encoder endEncoding]; m_current_render.encoder = nil; memset(&m_current_render, 0, offsetof(MainRenderEncoder, depth_sel)); @@ -315,6 +381,8 @@ void GSDeviceMTL::BeginRenderPass(NSString* name, GSTexture* color, MTLLoadActio return; } + m_encoders_in_current_cmdbuf++; + if (m_late_texture_upload_encoder) { [m_late_texture_upload_encoder endEncoding]; @@ -365,6 +433,13 @@ void GSDeviceMTL::BeginRenderPass(NSString* name, GSTexture* color, MTLLoadActio pxAssertRel(m_current_render.encoder, "Failed to create render encoder!"); } +void GSDeviceMTL::FrameCompleted() +{ + if (m_spin_timer) + m_spin_timer--; + m_spin_manager.NextFrame(); +} + static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format) { switch (format) @@ -608,13 +683,31 @@ bool GSDeviceMTL::Create() try { // Init metal stuff - m_draw_sync_fence = MRCTransfer([m_dev.dev newFence]); - m_fn_constants = MRCTransfer([MTLFunctionConstantValues new]); vector_float2 upscale2 = vector2(GSConfig.UpscaleMultiplier, GSConfig.UpscaleMultiplier); [m_fn_constants setConstantValue:&upscale2 type:MTLDataTypeFloat2 atIndex:GSMTLConstantIndex_SCALING_FACTOR]; setFnConstantB(m_fn_constants, m_dev.features.framebuffer_fetch, GSMTLConstantIndex_FRAMEBUFFER_FETCH); + m_draw_sync_fence = MRCTransfer([m_dev.dev newFence]); + [m_draw_sync_fence setLabel:@"Draw Sync Fence"]; + m_spin_fence = MRCTransfer([m_dev.dev newFence]); + [m_spin_fence setLabel:@"Spin Fence"]; + constexpr MTLResourceOptions spin_opts = MTLResourceStorageModePrivate | MTLResourceHazardTrackingModeUntracked; + m_spin_buffer = MRCTransfer([m_dev.dev newBufferWithLength:4 options:spin_opts]); + [m_spin_buffer setLabel:@"Spin Buffer"]; + id initCommands = [m_queue commandBuffer]; + id clearSpinBuffer = [initCommands blitCommandEncoder]; + [clearSpinBuffer fillBuffer:m_spin_buffer range:NSMakeRange(0, 4) value:0]; + [clearSpinBuffer updateFence:m_spin_fence]; + [clearSpinBuffer endEncoding]; + NSError* err = nullptr; + m_spin_pipeline = MRCTransfer([m_dev.dev newComputePipelineStateWithFunction:LoadShader(@"waste_time") error:&err]); + if (err) + { + Console.Error("Failed to create spin pipeline: %s", [[err localizedDescription] UTF8String]); + return false; + } + m_hw_vertex = MRCTransfer([MTLVertexDescriptor new]); [[[m_hw_vertex layouts] objectAtIndexedSubscript:GSMTLBufferIndexHWVertices] setStride:sizeof(GSVertex)]; applyAttribute(m_hw_vertex, GSMTLAttributeIndexST, MTLVertexFormatFloat2, offsetof(GSVertex, ST), GSMTLBufferIndexHWVertices); @@ -891,6 +984,8 @@ bool GSDeviceMTL::Create() m_imgui_pipeline = MakePipeline(pdesc, LoadShader(@"vs_imgui"), LoadShader(@"ps_imgui"), @"imgui"); if (!m_dev.features.texture_swizzle) m_imgui_pipeline_a8 = MakePipeline(pdesc, LoadShader(@"vs_imgui"), LoadShader(@"ps_imgui_a8"), @"imgui_a8"); + + [initCommands commit]; } catch (GSRecoverableError&) { @@ -946,10 +1041,20 @@ bool GSDeviceMTL::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSText destinationOffset:0 destinationBytesPerRow:out_map.pitch destinationBytesPerImage:size]; + if (m_spin_timer) + [encoder updateFence:m_spin_fence]; [encoder endEncoding]; [cmdbuf popDebugGroup]; FlushEncoders(); + if (@available(macOS 10.15, iOS 10.3, *)) + { + if (GSConfig.HWSpinGPUForReadbacks) + { + m_spin_manager.ReadbackRequested(); + m_spin_timer = 30; + } + } [cmdbuf waitUntilCompleted]; out_map.bits = static_cast([m_texture_download_buf contents]); diff --git a/pcsx2/GS/Renderers/Metal/misc.metal b/pcsx2/GS/Renderers/Metal/misc.metal new file mode 100644 index 0000000000..0a83a8ac89 --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/misc.metal @@ -0,0 +1,24 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +kernel void waste_time(constant uint& cycles [[buffer(0)]], device uint* spin [[buffer(1)]]) +{ + uint value = spin[0]; + // The compiler doesn't know, but spin[0] == 0, so this loop won't actually go anywhere + for (uint i = 0; i < cycles; i++) + value = spin[value]; + // Store the result back to the buffer so the compiler can't optimize it away + spin[0] = value; +} diff --git a/pcsx2/GS/Window/GSSetting.cpp b/pcsx2/GS/Window/GSSetting.cpp index 83f97849ff..cfddb63f31 100644 --- a/pcsx2/GS/Window/GSSetting.cpp +++ b/pcsx2/GS/Window/GSSetting.cpp @@ -164,6 +164,9 @@ const char* dialog_message(int ID, bool* updateText) case IDC_GEOMETRY_SHADER_OVERRIDE: return cvtString("Allows the GPU instead of just the CPU to transform lines into sprites. This reduces CPU load and bandwidth requirement, but it is heavier on the GPU.\n" "Automatic detection is recommended."); + case IDC_SPIN_GPU: + return cvtString("Submits useless work to the GPU during readbacks to prevent it from going into powersave modes.\n" + "May improve performance but with a significant increase in power usage."); case IDC_LINEAR_PRESENT: return cvtString("Use bilinear filtering when Upscaling/Downscaling the image to the screen. Disable it if you want a sharper/pixelated output."); // Exclusive for Hardware Renderer diff --git a/pcsx2/GS/Window/GSSetting.h b/pcsx2/GS/Window/GSSetting.h index 29a73f550b..0bc85aef77 100644 --- a/pcsx2/GS/Window/GSSetting.h +++ b/pcsx2/GS/Window/GSSetting.h @@ -87,6 +87,7 @@ enum IDC_SWTHREADS_EDIT, // OpenGL Advanced Settings IDC_GEOMETRY_SHADER_OVERRIDE, + IDC_SPIN_GPU, // On-screen Display IDC_OSD_LOG, IDC_OSD_MONITOR, diff --git a/pcsx2/GS/Window/GSwxDialog.cpp b/pcsx2/GS/Window/GSwxDialog.cpp index df4bbb3886..69e4122779 100644 --- a/pcsx2/GS/Window/GSwxDialog.cpp +++ b/pcsx2/GS/Window/GSwxDialog.cpp @@ -284,6 +284,7 @@ RendererTab::RendererTab(wxWindow* parent) auto* hw_checks_box = new wxWrapSizer(wxHORIZONTAL); auto* paltex_prereq = m_ui.addCheckBox(hw_checks_box, "GPU Palette Conversion", "paltex", IDC_PALTEX, hw_prereq); + m_ui.addCheckBox(hw_checks_box, "Spin GPU During Readbacks", "HWSpinGPUForReadbacks", IDC_SPIN_GPU); auto aniso_prereq = [this, paltex_prereq]{ return m_is_hardware && paltex_prereq->GetValue() == false; }; auto* hw_choice_grid = new wxFlexGridSizer(2, space, space); @@ -473,10 +474,9 @@ PostTab::PostTab(wxWindow* parent) auto* shader_boost_grid = new wxFlexGridSizer(2, space, space); shader_boost_grid->AddGrowableCol(1); - auto shader_boost_prereq = [shade_boost_check, this] { return shade_boost_check.box->GetValue(); }; - m_ui.addSliderAndLabel(shader_boost_grid, "Brightness:", "ShadeBoost_Brightness", 0, 100, 50, -1, shader_boost_prereq); - m_ui.addSliderAndLabel(shader_boost_grid, "Contrast:", "ShadeBoost_Contrast", 0, 100, 50, -1, shader_boost_prereq); - m_ui.addSliderAndLabel(shader_boost_grid, "Saturation:", "ShadeBoost_Saturation", 0, 100, 50, -1, shader_boost_prereq); + m_ui.addSliderAndLabel(shader_boost_grid, "Brightness:", "ShadeBoost_Brightness", 0, 100, 50, -1, shade_boost_check); + m_ui.addSliderAndLabel(shader_boost_grid, "Contrast:", "ShadeBoost_Contrast", 0, 100, 50, -1, shade_boost_check); + m_ui.addSliderAndLabel(shader_boost_grid, "Saturation:", "ShadeBoost_Saturation", 0, 100, 50, -1, shade_boost_check); shade_boost_box->Add(shader_boost_grid, wxSizerFlags().Expand()); shader_box->Add(shade_boost_box.outer, wxSizerFlags().Expand()); diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 4d7dd59ba9..38ac956d1c 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -325,6 +325,7 @@ Pcsx2Config::GSOptions::GSOptions() OsdShowInputs = false; HWDownloadMode = GSHardwareDownloadMode::Enabled; + HWSpinGPUForReadbacks = false; GPUPaletteConversion = false; AutoFlushSW = true; PreloadFrameWithGSData = false; @@ -548,6 +549,7 @@ void Pcsx2Config::GSOptions::ReloadIniSettings() GSSettingBool(OsdShowSettings); GSSettingBool(OsdShowInputs); + GSSettingBool(HWSpinGPUForReadbacks); GSSettingBoolEx(GPUPaletteConversion, "paltex"); GSSettingBoolEx(AutoFlushSW, "autoflush_sw"); GSSettingBoolEx(PreloadFrameWithGSData, "preload_frame_with_gs_data");