mirror of https://github.com/PCSX2/pcsx2.git
GS:MTL: Add option to spin GPU during readbacks
This commit is contained in:
parent
ea35619a78
commit
73044dffed
|
@ -98,6 +98,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
|
|||
SettingWidgetBinder::BindWidgetToStringSetting(sif, m_ui.adapter, "EmuCore/GS", "Adapter");
|
||||
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.vsync, "EmuCore/GS", "VsyncEnable", 0);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableHWFixes, "EmuCore/GS", "UserHacks", false);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.spinGPUDuringReadbacks, "EmuCore/GS", "HWSpinGPUForReadbacks", false);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Game Display Settings
|
||||
|
@ -410,6 +411,10 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
|
|||
"to your games. However IF you have ENABLED this, you WILL DISABLE AUTOMATIC "
|
||||
"SETTINGS and you can re-enable automatic settings by unchecking this option."));
|
||||
|
||||
dialog->registerWidgetHelp(m_ui.spinGPUDuringReadbacks, tr("Spin GPU During Readbacks"), tr("Unchecked"),
|
||||
tr("Submits useless work to the GPU during readbacks to prevent it from going into powersave modes. "
|
||||
"May improve performance but with a significant increase in power usage."));
|
||||
|
||||
// Software
|
||||
dialog->registerWidgetHelp(m_ui.extraSWThreads, tr("Extra Rendering Threads"), tr("2 threads"),
|
||||
tr("Number of rendering threads: 0 for single thread, 2 or more for multithread (1 is for debugging). "
|
||||
|
|
|
@ -643,6 +643,13 @@
|
|||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QCheckBox" name="spinGPUDuringReadbacks">
|
||||
<property name="text">
|
||||
<string>Spin GPU During Readbacks</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
|
|
|
@ -796,6 +796,7 @@ set(pcsx2GSMetalShaders
|
|||
GS/Renderers/Metal/convert.metal
|
||||
GS/Renderers/Metal/present.metal
|
||||
GS/Renderers/Metal/merge.metal
|
||||
GS/Renderers/Metal/misc.metal
|
||||
GS/Renderers/Metal/interlace.metal
|
||||
GS/Renderers/Metal/tfx.metal
|
||||
GS/Renderers/Metal/fxaa.metal
|
||||
|
|
|
@ -496,6 +496,7 @@ struct Pcsx2Config
|
|||
OsdShowInputs : 1;
|
||||
|
||||
bool
|
||||
HWSpinGPUForReadbacks : 1,
|
||||
GPUPaletteConversion : 1,
|
||||
AutoFlushSW : 1,
|
||||
PreloadFrameWithGSData : 1,
|
||||
|
|
|
@ -298,6 +298,7 @@ void MetalHostDisplay::EndPresent()
|
|||
[drawable present];
|
||||
}];
|
||||
dev->FlushEncoders();
|
||||
dev->FrameCompleted();
|
||||
m_current_drawable = nullptr;
|
||||
if (m_capture_start_frame)
|
||||
{
|
||||
|
|
|
@ -1445,6 +1445,7 @@ void GSApp::Init()
|
|||
m_default_configuration["fxaa"] = "0";
|
||||
m_default_configuration["HWDownloadMode"] = std::to_string(static_cast<u8>(GSHardwareDownloadMode::Enabled));
|
||||
m_default_configuration["GSDumpCompression"] = std::to_string(static_cast<u8>(GSDumpCompressionMethod::LZMA));
|
||||
m_default_configuration["HWSpinGPUForReadbacks"] = "0";
|
||||
m_default_configuration["pcrtc_antiblur"] = "1";
|
||||
m_default_configuration["disable_interlace_offset"] = "0";
|
||||
m_default_configuration["pcrtc_offsets"] = "0";
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
#include "common/HashCombine.h"
|
||||
#include "common/MRCHelpers.h"
|
||||
#include "common/ReadbackSpinManager.h"
|
||||
#include "GS/GS.h"
|
||||
#include "GSMTLDeviceInfo.h"
|
||||
#include "GSMTLSharedHeader.h"
|
||||
|
@ -227,6 +228,14 @@ public:
|
|||
u64 m_current_draw = 1;
|
||||
std::atomic<u64> m_last_finished_draw{0};
|
||||
|
||||
// Spinning
|
||||
ReadbackSpinManager m_spin_manager;
|
||||
u32 m_encoders_in_current_cmdbuf;
|
||||
u32 m_spin_timer;
|
||||
MRCOwned<id<MTLComputePipelineState>> m_spin_pipeline;
|
||||
MRCOwned<id<MTLBuffer>> m_spin_buffer;
|
||||
MRCOwned<id<MTLFence>> m_spin_fence;
|
||||
|
||||
// Functions and Pipeline States
|
||||
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline[static_cast<int>(ShaderConvert::Count)];
|
||||
MRCOwned<id<MTLRenderPipelineState>> m_present_pipeline[static_cast<int>(PresentShader::Count)];
|
||||
|
@ -332,6 +341,8 @@ public:
|
|||
void EndRenderPass();
|
||||
/// Begin a new render pass (may reuse existing)
|
||||
void BeginRenderPass(NSString* name, GSTexture* color, MTLLoadAction color_load, GSTexture* depth, MTLLoadAction depth_load, GSTexture* stencil = nullptr, MTLLoadAction stencil_load = MTLLoadActionDontCare);
|
||||
/// Call at the end of each frame
|
||||
void FrameCompleted();
|
||||
|
||||
GSTexture* CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format) override;
|
||||
|
||||
|
|
|
@ -214,6 +214,7 @@ id<MTLCommandBuffer> GSDeviceMTL::GetRenderCmdBuf()
|
|||
{
|
||||
if (!m_current_render_cmdbuf)
|
||||
{
|
||||
m_encoders_in_current_cmdbuf = 0;
|
||||
m_current_render_cmdbuf = MRCRetain([m_queue commandBuffer]);
|
||||
pxAssertRel(m_current_render_cmdbuf, "Failed to create draw command buffer!");
|
||||
[m_current_render_cmdbuf setLabel:@"Draw"];
|
||||
|
@ -258,15 +259,78 @@ void GSDeviceMTL::FlushEncoders()
|
|||
[m_late_texture_upload_encoder endEncoding];
|
||||
m_late_texture_upload_encoder = nil;
|
||||
}
|
||||
[m_current_render_cmdbuf addCompletedHandler:[backref = m_backref, draw = m_current_draw](id<MTLCommandBuffer> buf)
|
||||
u32 spin_cycles = 0;
|
||||
constexpr double s_to_ns = 1000000000;
|
||||
if (m_spin_timer)
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(backref->first);
|
||||
if (GSDeviceMTL* dev = backref->second)
|
||||
dev->DrawCommandBufferFinished(draw, buf);
|
||||
}];
|
||||
u32 spin_id;
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(m_backref->first);
|
||||
auto draw = m_spin_manager.DrawSubmitted(m_encoders_in_current_cmdbuf);
|
||||
u32 constant_offset = 200000 * m_spin_manager.SpinsPerUnitTime(); // 200µs
|
||||
u32 minimum_spin = 2 * constant_offset; // 400µs (200µs after subtracting constant_offset)
|
||||
u32 maximum_spin = std::max<u32>(1024, 16000000 * m_spin_manager.SpinsPerUnitTime()); // 16ms
|
||||
if (draw.recommended_spin > minimum_spin)
|
||||
spin_cycles = std::min(draw.recommended_spin - constant_offset, maximum_spin);
|
||||
spin_id = draw.id;
|
||||
}
|
||||
[m_current_render_cmdbuf addCompletedHandler:[backref = m_backref, draw = m_current_draw, spin_id](id<MTLCommandBuffer> buf)
|
||||
{
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wunguarded-availability"
|
||||
// Starting from kernelStartTime includes time the command buffer spent waiting to execute
|
||||
// This is useful for avoiding issues on GPUs without async compute (Intel) where spinning
|
||||
// delays the next command buffer start, which then makes the spin manager think it should spin more
|
||||
// (If a command buffer contains multiple encoders, the GPU will start before the kernel finishes,
|
||||
// so we choose kernelStartTime over kernelEndTime)
|
||||
u64 begin = [buf kernelStartTime] * s_to_ns;
|
||||
u64 end = [buf GPUEndTime] * s_to_ns;
|
||||
#pragma clang diagnostic pop
|
||||
std::lock_guard<std::mutex> guard(backref->first);
|
||||
if (GSDeviceMTL* dev = backref->second)
|
||||
{
|
||||
dev->DrawCommandBufferFinished(draw, buf);
|
||||
dev->m_spin_manager.DrawCompleted(spin_id, begin, end);
|
||||
}
|
||||
}];
|
||||
}
|
||||
else
|
||||
{
|
||||
[m_current_render_cmdbuf addCompletedHandler:[backref = m_backref, draw = m_current_draw](id<MTLCommandBuffer> buf)
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(backref->first);
|
||||
if (GSDeviceMTL* dev = backref->second)
|
||||
dev->DrawCommandBufferFinished(draw, buf);
|
||||
}];
|
||||
}
|
||||
[m_current_render_cmdbuf commit];
|
||||
m_current_render_cmdbuf = nil;
|
||||
m_current_draw++;
|
||||
if (spin_cycles)
|
||||
{
|
||||
id<MTLCommandBuffer> spinCmdBuf = [m_queue commandBuffer];
|
||||
[spinCmdBuf setLabel:@"Spin"];
|
||||
id<MTLComputeCommandEncoder> spinCmdEncoder = [spinCmdBuf computeCommandEncoder];
|
||||
[spinCmdEncoder setLabel:@"Spin"];
|
||||
[spinCmdEncoder waitForFence:m_spin_fence];
|
||||
[spinCmdEncoder setComputePipelineState:m_spin_pipeline];
|
||||
[spinCmdEncoder setBytes:&spin_cycles length:sizeof(spin_cycles) atIndex:0];
|
||||
[spinCmdEncoder setBuffer:m_spin_buffer offset:0 atIndex:1];
|
||||
[spinCmdEncoder dispatchThreadgroups:MTLSizeMake(1, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
||||
[spinCmdEncoder endEncoding];
|
||||
[spinCmdBuf addCompletedHandler:[backref = m_backref, spin_cycles](id<MTLCommandBuffer> buf)
|
||||
{
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wunguarded-availability"
|
||||
u64 begin = [buf GPUStartTime] * s_to_ns;
|
||||
u64 end = [buf GPUEndTime] * s_to_ns;
|
||||
#pragma clang diagnostic pop
|
||||
std::lock_guard<std::mutex> guard(backref->first);
|
||||
if (GSDeviceMTL* dev = backref->second)
|
||||
dev->m_spin_manager.SpinCompleted(spin_cycles, begin, end);
|
||||
}];
|
||||
[spinCmdBuf commit];
|
||||
}
|
||||
}
|
||||
|
||||
void GSDeviceMTL::EndRenderPass()
|
||||
|
@ -274,6 +338,8 @@ void GSDeviceMTL::EndRenderPass()
|
|||
if (m_current_render.encoder)
|
||||
{
|
||||
EndDebugGroup(m_current_render.encoder);
|
||||
if (m_spin_timer)
|
||||
[m_current_render.encoder updateFence:m_spin_fence afterStages:MTLRenderStageFragment];
|
||||
[m_current_render.encoder endEncoding];
|
||||
m_current_render.encoder = nil;
|
||||
memset(&m_current_render, 0, offsetof(MainRenderEncoder, depth_sel));
|
||||
|
@ -315,6 +381,8 @@ void GSDeviceMTL::BeginRenderPass(NSString* name, GSTexture* color, MTLLoadActio
|
|||
return;
|
||||
}
|
||||
|
||||
m_encoders_in_current_cmdbuf++;
|
||||
|
||||
if (m_late_texture_upload_encoder)
|
||||
{
|
||||
[m_late_texture_upload_encoder endEncoding];
|
||||
|
@ -365,6 +433,13 @@ void GSDeviceMTL::BeginRenderPass(NSString* name, GSTexture* color, MTLLoadActio
|
|||
pxAssertRel(m_current_render.encoder, "Failed to create render encoder!");
|
||||
}
|
||||
|
||||
void GSDeviceMTL::FrameCompleted()
|
||||
{
|
||||
if (m_spin_timer)
|
||||
m_spin_timer--;
|
||||
m_spin_manager.NextFrame();
|
||||
}
|
||||
|
||||
static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format)
|
||||
{
|
||||
switch (format)
|
||||
|
@ -608,13 +683,31 @@ bool GSDeviceMTL::Create()
|
|||
try
|
||||
{
|
||||
// Init metal stuff
|
||||
m_draw_sync_fence = MRCTransfer([m_dev.dev newFence]);
|
||||
|
||||
m_fn_constants = MRCTransfer([MTLFunctionConstantValues new]);
|
||||
vector_float2 upscale2 = vector2(GSConfig.UpscaleMultiplier, GSConfig.UpscaleMultiplier);
|
||||
[m_fn_constants setConstantValue:&upscale2 type:MTLDataTypeFloat2 atIndex:GSMTLConstantIndex_SCALING_FACTOR];
|
||||
setFnConstantB(m_fn_constants, m_dev.features.framebuffer_fetch, GSMTLConstantIndex_FRAMEBUFFER_FETCH);
|
||||
|
||||
m_draw_sync_fence = MRCTransfer([m_dev.dev newFence]);
|
||||
[m_draw_sync_fence setLabel:@"Draw Sync Fence"];
|
||||
m_spin_fence = MRCTransfer([m_dev.dev newFence]);
|
||||
[m_spin_fence setLabel:@"Spin Fence"];
|
||||
constexpr MTLResourceOptions spin_opts = MTLResourceStorageModePrivate | MTLResourceHazardTrackingModeUntracked;
|
||||
m_spin_buffer = MRCTransfer([m_dev.dev newBufferWithLength:4 options:spin_opts]);
|
||||
[m_spin_buffer setLabel:@"Spin Buffer"];
|
||||
id<MTLCommandBuffer> initCommands = [m_queue commandBuffer];
|
||||
id<MTLBlitCommandEncoder> clearSpinBuffer = [initCommands blitCommandEncoder];
|
||||
[clearSpinBuffer fillBuffer:m_spin_buffer range:NSMakeRange(0, 4) value:0];
|
||||
[clearSpinBuffer updateFence:m_spin_fence];
|
||||
[clearSpinBuffer endEncoding];
|
||||
NSError* err = nullptr;
|
||||
m_spin_pipeline = MRCTransfer([m_dev.dev newComputePipelineStateWithFunction:LoadShader(@"waste_time") error:&err]);
|
||||
if (err)
|
||||
{
|
||||
Console.Error("Failed to create spin pipeline: %s", [[err localizedDescription] UTF8String]);
|
||||
return false;
|
||||
}
|
||||
|
||||
m_hw_vertex = MRCTransfer([MTLVertexDescriptor new]);
|
||||
[[[m_hw_vertex layouts] objectAtIndexedSubscript:GSMTLBufferIndexHWVertices] setStride:sizeof(GSVertex)];
|
||||
applyAttribute(m_hw_vertex, GSMTLAttributeIndexST, MTLVertexFormatFloat2, offsetof(GSVertex, ST), GSMTLBufferIndexHWVertices);
|
||||
|
@ -891,6 +984,8 @@ bool GSDeviceMTL::Create()
|
|||
m_imgui_pipeline = MakePipeline(pdesc, LoadShader(@"vs_imgui"), LoadShader(@"ps_imgui"), @"imgui");
|
||||
if (!m_dev.features.texture_swizzle)
|
||||
m_imgui_pipeline_a8 = MakePipeline(pdesc, LoadShader(@"vs_imgui"), LoadShader(@"ps_imgui_a8"), @"imgui_a8");
|
||||
|
||||
[initCommands commit];
|
||||
}
|
||||
catch (GSRecoverableError&)
|
||||
{
|
||||
|
@ -946,10 +1041,20 @@ bool GSDeviceMTL::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSText
|
|||
destinationOffset:0
|
||||
destinationBytesPerRow:out_map.pitch
|
||||
destinationBytesPerImage:size];
|
||||
if (m_spin_timer)
|
||||
[encoder updateFence:m_spin_fence];
|
||||
[encoder endEncoding];
|
||||
[cmdbuf popDebugGroup];
|
||||
|
||||
FlushEncoders();
|
||||
if (@available(macOS 10.15, iOS 10.3, *))
|
||||
{
|
||||
if (GSConfig.HWSpinGPUForReadbacks)
|
||||
{
|
||||
m_spin_manager.ReadbackRequested();
|
||||
m_spin_timer = 30;
|
||||
}
|
||||
}
|
||||
[cmdbuf waitUntilCompleted];
|
||||
|
||||
out_map.bits = static_cast<u8*>([m_texture_download_buf contents]);
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2021 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
kernel void waste_time(constant uint& cycles [[buffer(0)]], device uint* spin [[buffer(1)]])
|
||||
{
|
||||
uint value = spin[0];
|
||||
// The compiler doesn't know, but spin[0] == 0, so this loop won't actually go anywhere
|
||||
for (uint i = 0; i < cycles; i++)
|
||||
value = spin[value];
|
||||
// Store the result back to the buffer so the compiler can't optimize it away
|
||||
spin[0] = value;
|
||||
}
|
|
@ -164,6 +164,9 @@ const char* dialog_message(int ID, bool* updateText)
|
|||
case IDC_GEOMETRY_SHADER_OVERRIDE:
|
||||
return cvtString("Allows the GPU instead of just the CPU to transform lines into sprites. This reduces CPU load and bandwidth requirement, but it is heavier on the GPU.\n"
|
||||
"Automatic detection is recommended.");
|
||||
case IDC_SPIN_GPU:
|
||||
return cvtString("Submits useless work to the GPU during readbacks to prevent it from going into powersave modes.\n"
|
||||
"May improve performance but with a significant increase in power usage.");
|
||||
case IDC_LINEAR_PRESENT:
|
||||
return cvtString("Use bilinear filtering when Upscaling/Downscaling the image to the screen. Disable it if you want a sharper/pixelated output.");
|
||||
// Exclusive for Hardware Renderer
|
||||
|
|
|
@ -87,6 +87,7 @@ enum
|
|||
IDC_SWTHREADS_EDIT,
|
||||
// OpenGL Advanced Settings
|
||||
IDC_GEOMETRY_SHADER_OVERRIDE,
|
||||
IDC_SPIN_GPU,
|
||||
// On-screen Display
|
||||
IDC_OSD_LOG,
|
||||
IDC_OSD_MONITOR,
|
||||
|
|
|
@ -284,6 +284,7 @@ RendererTab::RendererTab(wxWindow* parent)
|
|||
auto* hw_checks_box = new wxWrapSizer(wxHORIZONTAL);
|
||||
|
||||
auto* paltex_prereq = m_ui.addCheckBox(hw_checks_box, "GPU Palette Conversion", "paltex", IDC_PALTEX, hw_prereq);
|
||||
m_ui.addCheckBox(hw_checks_box, "Spin GPU During Readbacks", "HWSpinGPUForReadbacks", IDC_SPIN_GPU);
|
||||
auto aniso_prereq = [this, paltex_prereq]{ return m_is_hardware && paltex_prereq->GetValue() == false; };
|
||||
|
||||
auto* hw_choice_grid = new wxFlexGridSizer(2, space, space);
|
||||
|
@ -473,10 +474,9 @@ PostTab::PostTab(wxWindow* parent)
|
|||
auto* shader_boost_grid = new wxFlexGridSizer(2, space, space);
|
||||
shader_boost_grid->AddGrowableCol(1);
|
||||
|
||||
auto shader_boost_prereq = [shade_boost_check, this] { return shade_boost_check.box->GetValue(); };
|
||||
m_ui.addSliderAndLabel(shader_boost_grid, "Brightness:", "ShadeBoost_Brightness", 0, 100, 50, -1, shader_boost_prereq);
|
||||
m_ui.addSliderAndLabel(shader_boost_grid, "Contrast:", "ShadeBoost_Contrast", 0, 100, 50, -1, shader_boost_prereq);
|
||||
m_ui.addSliderAndLabel(shader_boost_grid, "Saturation:", "ShadeBoost_Saturation", 0, 100, 50, -1, shader_boost_prereq);
|
||||
m_ui.addSliderAndLabel(shader_boost_grid, "Brightness:", "ShadeBoost_Brightness", 0, 100, 50, -1, shade_boost_check);
|
||||
m_ui.addSliderAndLabel(shader_boost_grid, "Contrast:", "ShadeBoost_Contrast", 0, 100, 50, -1, shade_boost_check);
|
||||
m_ui.addSliderAndLabel(shader_boost_grid, "Saturation:", "ShadeBoost_Saturation", 0, 100, 50, -1, shade_boost_check);
|
||||
|
||||
shade_boost_box->Add(shader_boost_grid, wxSizerFlags().Expand());
|
||||
shader_box->Add(shade_boost_box.outer, wxSizerFlags().Expand());
|
||||
|
|
|
@ -325,6 +325,7 @@ Pcsx2Config::GSOptions::GSOptions()
|
|||
OsdShowInputs = false;
|
||||
|
||||
HWDownloadMode = GSHardwareDownloadMode::Enabled;
|
||||
HWSpinGPUForReadbacks = false;
|
||||
GPUPaletteConversion = false;
|
||||
AutoFlushSW = true;
|
||||
PreloadFrameWithGSData = false;
|
||||
|
@ -548,6 +549,7 @@ void Pcsx2Config::GSOptions::ReloadIniSettings()
|
|||
GSSettingBool(OsdShowSettings);
|
||||
GSSettingBool(OsdShowInputs);
|
||||
|
||||
GSSettingBool(HWSpinGPUForReadbacks);
|
||||
GSSettingBoolEx(GPUPaletteConversion, "paltex");
|
||||
GSSettingBoolEx(AutoFlushSW, "autoflush_sw");
|
||||
GSSettingBoolEx(PreloadFrameWithGSData, "preload_frame_with_gs_data");
|
||||
|
|
Loading…
Reference in New Issue