GS:HW: Add option to spin CPU during readbacks

This commit is contained in:
TellowKrinkle 2022-09-27 23:54:33 -07:00 committed by refractionpcsx2
parent 89dd7f19ef
commit ac1f31f2cd
20 changed files with 133 additions and 38 deletions

View File

@ -17,6 +17,7 @@
#include "common/D3D12/Context.h" #include "common/D3D12/Context.h"
#include "common/Assertions.h" #include "common/Assertions.h"
#include "common/General.h"
#include "common/ScopedGuard.h" #include "common/ScopedGuard.h"
#include "common/Console.h" #include "common/Console.h"
#include "D3D12MemAlloc.h" #include "D3D12MemAlloc.h"
@ -382,7 +383,7 @@ void Context::MoveToNextCommandList()
// We may have to wait if this command list hasn't finished on the GPU. // We may have to wait if this command list hasn't finished on the GPU.
CommandListResources& res = m_command_lists[m_current_command_list]; CommandListResources& res = m_command_lists[m_current_command_list];
WaitForFence(res.ready_fence_value); WaitForFence(res.ready_fence_value, false);
res.ready_fence_value = m_current_fence_value; res.ready_fence_value = m_current_fence_value;
res.init_command_list_used = false; res.init_command_list_used = false;
@ -445,7 +446,7 @@ ID3D12GraphicsCommandList4* Context::GetInitCommandList()
return res.command_lists[0].get(); return res.command_lists[0].get();
} }
void Context::ExecuteCommandList(bool wait_for_completion) void Context::ExecuteCommandList(WaitType wait_for_completion)
{ {
CommandListResources& res = m_command_lists[m_current_command_list]; CommandListResources& res = m_command_lists[m_current_command_list];
HRESULT hr; HRESULT hr;
@ -485,8 +486,8 @@ void Context::ExecuteCommandList(bool wait_for_completion)
pxAssertRel(SUCCEEDED(hr), "Signal fence"); pxAssertRel(SUCCEEDED(hr), "Signal fence");
MoveToNextCommandList(); MoveToNextCommandList();
if (wait_for_completion) if (wait_for_completion != WaitType::None)
WaitForFence(res.ready_fence_value); WaitForFence(res.ready_fence_value, wait_for_completion == WaitType::Spin);
} }
void Context::InvalidateSamplerGroups() void Context::InvalidateSamplerGroups()
@ -547,7 +548,7 @@ void Context::DestroyPendingResources(CommandListResources& cmdlist)
void Context::DestroyResources() void Context::DestroyResources()
{ {
ExecuteCommandList(true); ExecuteCommandList(WaitType::Sleep);
m_texture_stream_buffer.Destroy(false); m_texture_stream_buffer.Destroy(false);
m_descriptor_heap_manager.Free(&m_null_srv_descriptor); m_descriptor_heap_manager.Free(&m_null_srv_descriptor);
@ -573,20 +574,30 @@ void Context::DestroyResources()
m_device.reset(); m_device.reset();
} }
void Context::WaitForFence(u64 fence) void Context::WaitForFence(u64 fence, bool spin)
{ {
if (m_completed_fence_value >= fence) if (m_completed_fence_value >= fence)
return; return;
// Try non-blocking check. if (spin)
m_completed_fence_value = m_fence->GetCompletedValue();
if (m_completed_fence_value < fence)
{ {
// Fall back to event. u64 value;
HRESULT hr = m_fence->SetEventOnCompletion(fence, m_fence_event); while ((value = m_fence->GetCompletedValue()) < fence)
pxAssertRel(SUCCEEDED(hr), "Set fence event on completion"); ShortSpin();
WaitForSingleObject(m_fence_event, INFINITE); m_completed_fence_value = value;
}
else
{
// Try non-blocking check.
m_completed_fence_value = m_fence->GetCompletedValue(); m_completed_fence_value = m_fence->GetCompletedValue();
if (m_completed_fence_value < fence)
{
// Fall back to event.
HRESULT hr = m_fence->SetEventOnCompletion(fence, m_fence_event);
pxAssertRel(SUCCEEDED(hr), "Set fence event on completion");
WaitForSingleObject(m_fence_event, INFINITE);
m_completed_fence_value = m_fence->GetCompletedValue();
}
} }
// Release resources for as many command lists which have completed. // Release resources for as many command lists which have completed.
@ -607,7 +618,7 @@ void Context::WaitForGPUIdle()
u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS; u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS;
for (u32 i = 0; i < (NUM_COMMAND_LISTS - 1); i++) for (u32 i = 0; i < (NUM_COMMAND_LISTS - 1); i++)
{ {
WaitForFence(m_command_lists[index].ready_fence_value); WaitForFence(m_command_lists[index].ready_fence_value, false);
index = (index + 1) % NUM_COMMAND_LISTS; index = (index + 1) % NUM_COMMAND_LISTS;
} }
} }

View File

@ -122,11 +122,18 @@ namespace D3D12
/// Test for support for the specified texture format. /// Test for support for the specified texture format.
bool SupportsTextureFormat(DXGI_FORMAT format); bool SupportsTextureFormat(DXGI_FORMAT format);
enum class WaitType
{
None, ///< Don't wait (async)
Sleep, ///< Wait normally
Spin, ///< Wait by spinning
};
/// Executes the current command list. /// Executes the current command list.
void ExecuteCommandList(bool wait_for_completion); void ExecuteCommandList(WaitType wait_for_completion);
/// Waits for a specific fence. /// Waits for a specific fence.
void WaitForFence(u64 fence); void WaitForFence(u64 fence, bool spin);
/// Waits for any in-flight command buffers to complete. /// Waits for any in-flight command buffers to complete.
void WaitForGPUIdle(); void WaitForGPUIdle();

View File

@ -273,7 +273,7 @@ bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
return false; return false;
// Wait until this fence is signaled. This will fire the callback, updating the GPU position. // Wait until this fence is signaled. This will fire the callback, updating the GPU position.
g_d3d12_context->WaitForFence(iter->first); g_d3d12_context->WaitForFence(iter->first, false);
m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
m_current_offset = new_offset; m_current_offset = new_offset;
m_current_space = new_space; m_current_space = new_space;

View File

@ -293,7 +293,7 @@ ID3D12GraphicsCommandList* Texture::BeginStreamUpdate(ID3D12GraphicsCommandList*
{ {
DevCon.WriteLn("Executing command buffer while waiting for %u bytes (%ux%u) in upload buffer", upload_size, width, DevCon.WriteLn("Executing command buffer while waiting for %u bytes (%ux%u) in upload buffer", upload_size, width,
height); height);
g_d3d12_context->ExecuteCommandList(false); g_d3d12_context->ExecuteCommandList(Context::WaitType::None);
if (!g_d3d12_context->GetTextureStreamBuffer().ReserveMemory(upload_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) if (!g_d3d12_context->GetTextureStreamBuffer().ReserveMemory(upload_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT))
{ {
Console.Error("Failed to reserve %u bytes for %ux%u upload", upload_size, width, height); Console.Error("Failed to reserve %u bytes for %ux%u upload", upload_size, width, height);

View File

@ -17,6 +17,7 @@
#include "common/Align.h" #include "common/Align.h"
#include "common/Assertions.h" #include "common/Assertions.h"
#include "common/Console.h" #include "common/Console.h"
#include "common/General.h"
#include "common/StringUtil.h" #include "common/StringUtil.h"
#include "common/Vulkan/ShaderCompiler.h" #include "common/Vulkan/ShaderCompiler.h"
#include "common/Vulkan/SwapChain.h" #include "common/Vulkan/SwapChain.h"
@ -1437,18 +1438,23 @@ namespace Vulkan
vmaSetCurrentFrameIndex(m_allocator, static_cast<u32>(m_next_fence_counter)); vmaSetCurrentFrameIndex(m_allocator, static_cast<u32>(m_next_fence_counter));
} }
void Context::ExecuteCommandBuffer(bool wait_for_completion) void Context::ExecuteCommandBuffer(WaitType wait_for_completion)
{ {
// If we're waiting for completion, don't bother waking the worker thread. // If we're waiting for completion, don't bother waking the worker thread.
const u32 current_frame = m_current_frame; const u32 current_frame = m_current_frame;
SubmitCommandBuffer(); SubmitCommandBuffer();
MoveToNextCommandBuffer(); MoveToNextCommandBuffer();
if (wait_for_completion) if (wait_for_completion != WaitType::None)
{ {
// Calibrate while we wait // Calibrate while we wait
if (m_wants_new_timestamp_calibration) if (m_wants_new_timestamp_calibration)
CalibrateSpinTimestamp(); CalibrateSpinTimestamp();
if (wait_for_completion == WaitType::Spin)
{
while (vkGetFenceStatus(m_device, m_frame_resources[current_frame].fence) == VK_NOT_READY)
ShortSpin();
}
WaitForCommandBufferCompletion(current_frame); WaitForCommandBufferCompletion(current_frame);
} }
} }

View File

@ -198,7 +198,14 @@ namespace Vulkan
uint32_t present_image_index = 0xFFFFFFFF, bool submit_on_thread = false); uint32_t present_image_index = 0xFFFFFFFF, bool submit_on_thread = false);
void MoveToNextCommandBuffer(); void MoveToNextCommandBuffer();
void ExecuteCommandBuffer(bool wait_for_completion); enum class WaitType
{
None,
Sleep,
Spin,
};
void ExecuteCommandBuffer(WaitType wait_for_completion);
void WaitForPresentComplete(); void WaitForPresentComplete();
// Was the last present submitted to the queue a failure? If so, we must recreate our swapchain. // Was the last present submitted to the queue a failure? If so, we must recreate our swapchain.

View File

@ -99,6 +99,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.vsync, "EmuCore/GS", "VsyncEnable", 0); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.vsync, "EmuCore/GS", "VsyncEnable", 0);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableHWFixes, "EmuCore/GS", "UserHacks", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableHWFixes, "EmuCore/GS", "UserHacks", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.spinGPUDuringReadbacks, "EmuCore/GS", "HWSpinGPUForReadbacks", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.spinGPUDuringReadbacks, "EmuCore/GS", "HWSpinGPUForReadbacks", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.spinCPUDuringReadbacks, "EmuCore/GS", "HWSpinCPUForReadbacks", false);
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Game Display Settings // Game Display Settings
@ -415,6 +416,10 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
tr("Submits useless work to the GPU during readbacks to prevent it from going into powersave modes. " tr("Submits useless work to the GPU during readbacks to prevent it from going into powersave modes. "
"May improve performance but with a significant increase in power usage.")); "May improve performance but with a significant increase in power usage."));
dialog->registerWidgetHelp(m_ui.spinGPUDuringReadbacks, tr("Spin CPU During Readbacks"), tr("Unchecked"),
tr("Does useless work on the CPU during readbacks to prevent it from going to into powersave modes. "
"May improve performance but with a significant increase in power usage."));
// Software // Software
dialog->registerWidgetHelp(m_ui.extraSWThreads, tr("Extra Rendering Threads"), tr("2 threads"), dialog->registerWidgetHelp(m_ui.extraSWThreads, tr("Extra Rendering Threads"), tr("2 threads"),
tr("Number of rendering threads: 0 for single thread, 2 or more for multithread (1 is for debugging). " tr("Number of rendering threads: 0 for single thread, 2 or more for multithread (1 is for debugging). "

View File

@ -650,6 +650,13 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="1" column="1">
<widget class="QCheckBox" name="spinCPUDuringReadbacks">
<property name="text">
<string>Spin CPU During Readbacks</string>
</property>
</widget>
</item>
</layout> </layout>
</item> </item>
</layout> </layout>

View File

@ -497,6 +497,7 @@ struct Pcsx2Config
bool bool
HWSpinGPUForReadbacks : 1, HWSpinGPUForReadbacks : 1,
HWSpinCPUForReadbacks : 1,
GPUPaletteConversion : 1, GPUPaletteConversion : 1,
AutoFlushSW : 1, AutoFlushSW : 1,
PreloadFrameWithGSData : 1, PreloadFrameWithGSData : 1,

View File

@ -333,7 +333,7 @@ bool D3D12HostDisplay::ChangeWindow(const WindowInfo& new_wi)
void D3D12HostDisplay::DestroySurface() void D3D12HostDisplay::DestroySurface()
{ {
// For some reason if we don't execute the command list here, the swap chain is in use.. not sure where. // For some reason if we don't execute the command list here, the swap chain is in use.. not sure where.
g_d3d12_context->ExecuteCommandList(true); g_d3d12_context->ExecuteCommandList(D3D12::Context::WaitType::Sleep);
if (IsFullscreen()) if (IsFullscreen())
SetFullscreen(false, 0, 0, 0.0f); SetFullscreen(false, 0, 0, 0.0f);
@ -438,7 +438,7 @@ void D3D12HostDisplay::ResizeWindow(s32 new_window_width, s32 new_window_height,
return; return;
// For some reason if we don't execute the command list here, the swap chain is in use.. not sure where. // For some reason if we don't execute the command list here, the swap chain is in use.. not sure where.
g_d3d12_context->ExecuteCommandList(true); g_d3d12_context->ExecuteCommandList(D3D12::Context::WaitType::Sleep);
DestroySwapChainRTVs(); DestroySwapChainRTVs();
@ -509,7 +509,7 @@ bool D3D12HostDisplay::SetFullscreen(bool fullscreen, u32 width, u32 height, flo
return true; return true;
} }
g_d3d12_context->ExecuteCommandList(true); g_d3d12_context->ExecuteCommandList(D3D12::Context::WaitType::Sleep);
DestroySwapChainRTVs(); DestroySwapChainRTVs();
m_swap_chain.reset(); m_swap_chain.reset();
@ -585,7 +585,7 @@ void D3D12HostDisplay::EndPresent()
m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast<u32>(m_swap_chain_buffers.size())); m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast<u32>(m_swap_chain_buffers.size()));
swap_chain_buf.TransitionToState(g_d3d12_context->GetCommandList(), D3D12_RESOURCE_STATE_PRESENT); swap_chain_buf.TransitionToState(g_d3d12_context->GetCommandList(), D3D12_RESOURCE_STATE_PRESENT);
g_d3d12_context->ExecuteCommandList(false); g_d3d12_context->ExecuteCommandList(D3D12::Context::WaitType::None);
const bool vsync = static_cast<UINT>(m_vsync_mode != VsyncMode::Off); const bool vsync = static_cast<UINT>(m_vsync_mode != VsyncMode::Off);
if (!vsync && m_using_allow_tearing) if (!vsync && m_using_allow_tearing)

View File

@ -86,7 +86,7 @@ bool VulkanHostDisplay::ChangeWindow(const WindowInfo& new_wi)
if (new_wi.type == WindowInfo::Type::Surfaceless) if (new_wi.type == WindowInfo::Type::Surfaceless)
{ {
g_vulkan_context->ExecuteCommandBuffer(true); g_vulkan_context->ExecuteCommandBuffer(Vulkan::Context::WaitType::Sleep);
m_swap_chain.reset(); m_swap_chain.reset();
m_window_info = new_wi; m_window_info = new_wi;
return true; return true;
@ -209,7 +209,7 @@ static bool UploadBufferToTexture(
if (!buf.ReserveMemory(upload_size, g_vulkan_context->GetBufferCopyOffsetAlignment())) if (!buf.ReserveMemory(upload_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
{ {
Console.WriteLn("Executing command buffer for UploadBufferToTexture()"); Console.WriteLn("Executing command buffer for UploadBufferToTexture()");
g_vulkan_context->ExecuteCommandBuffer(false); g_vulkan_context->ExecuteCommandBuffer(Vulkan::Context::WaitType::None);
if (!buf.ReserveMemory(upload_size, g_vulkan_context->GetBufferCopyOffsetAlignment())) if (!buf.ReserveMemory(upload_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
{ {
Console.WriteLn("Failed to allocate %u bytes in stream buffer for UploadBufferToTexture()", upload_size); Console.WriteLn("Failed to allocate %u bytes in stream buffer for UploadBufferToTexture()", upload_size);
@ -365,7 +365,7 @@ bool VulkanHostDisplay::BeginPresent(bool frame_skip)
if (!m_swap_chain->RecreateSurface(m_window_info)) if (!m_swap_chain->RecreateSurface(m_window_info))
{ {
Console.Error("Failed to recreate surface after loss"); Console.Error("Failed to recreate surface after loss");
g_vulkan_context->ExecuteCommandBuffer(false); g_vulkan_context->ExecuteCommandBuffer(Vulkan::Context::WaitType::None);
return false; return false;
} }
@ -378,7 +378,7 @@ bool VulkanHostDisplay::BeginPresent(bool frame_skip)
{ {
// Still submit the command buffer, otherwise we'll end up with several frames waiting. // Still submit the command buffer, otherwise we'll end up with several frames waiting.
LOG_VULKAN_ERROR(res, "vkAcquireNextImageKHR() failed: "); LOG_VULKAN_ERROR(res, "vkAcquireNextImageKHR() failed: ");
g_vulkan_context->ExecuteCommandBuffer(false); g_vulkan_context->ExecuteCommandBuffer(Vulkan::Context::WaitType::None);
return false; return false;
} }
} }

View File

@ -165,7 +165,7 @@ void ImGui_ImplDX12_RenderDrawData(ImDrawData* draw_data)
if (!bd->VertexStreamBuffer.ReserveMemory(needed_vb, sizeof(ImDrawVert)) || if (!bd->VertexStreamBuffer.ReserveMemory(needed_vb, sizeof(ImDrawVert)) ||
!bd->IndexStreamBuffer.ReserveMemory(needed_ib, sizeof(ImDrawIdx))) !bd->IndexStreamBuffer.ReserveMemory(needed_ib, sizeof(ImDrawIdx)))
{ {
g_d3d12_context->ExecuteCommandList(false); g_d3d12_context->ExecuteCommandList(D3D12::Context::WaitType::None);
if (!bd->VertexStreamBuffer.ReserveMemory(needed_vb, sizeof(ImDrawVert)) || if (!bd->VertexStreamBuffer.ReserveMemory(needed_vb, sizeof(ImDrawVert)) ||
!bd->IndexStreamBuffer.ReserveMemory(needed_ib, sizeof(ImDrawIdx))) !bd->IndexStreamBuffer.ReserveMemory(needed_ib, sizeof(ImDrawIdx)))
{ {
@ -230,7 +230,7 @@ void ImGui_ImplDX12_RenderDrawData(ImDrawData* draw_data)
if (!g_d3d12_context->GetDescriptorAllocator().Allocate(1, &handle)) if (!g_d3d12_context->GetDescriptorAllocator().Allocate(1, &handle))
{ {
// ugh. // ugh.
g_d3d12_context->ExecuteCommandList(false); g_d3d12_context->ExecuteCommandList(D3D12::Context::WaitType::None);
ctx = g_d3d12_context->GetCommandList(); ctx = g_d3d12_context->GetCommandList();
ImGui_ImplDX12_SetupRenderState(draw_data, ctx); ImGui_ImplDX12_SetupRenderState(draw_data, ctx);
if (!g_d3d12_context->GetDescriptorAllocator().Allocate(1, &handle)) if (!g_d3d12_context->GetDescriptorAllocator().Allocate(1, &handle))

View File

@ -1446,6 +1446,7 @@ void GSApp::Init()
m_default_configuration["HWDownloadMode"] = std::to_string(static_cast<u8>(GSHardwareDownloadMode::Enabled)); m_default_configuration["HWDownloadMode"] = std::to_string(static_cast<u8>(GSHardwareDownloadMode::Enabled));
m_default_configuration["GSDumpCompression"] = std::to_string(static_cast<u8>(GSDumpCompressionMethod::LZMA)); m_default_configuration["GSDumpCompression"] = std::to_string(static_cast<u8>(GSDumpCompressionMethod::LZMA));
m_default_configuration["HWSpinGPUForReadbacks"] = "0"; m_default_configuration["HWSpinGPUForReadbacks"] = "0";
m_default_configuration["HWSpinCPUForReadbacks"] = "0";
m_default_configuration["pcrtc_antiblur"] = "1"; m_default_configuration["pcrtc_antiblur"] = "1";
m_default_configuration["disable_interlace_offset"] = "0"; m_default_configuration["disable_interlace_offset"] = "0";
m_default_configuration["pcrtc_offsets"] = "0"; m_default_configuration["pcrtc_offsets"] = "0";

View File

@ -1445,7 +1445,7 @@ void GSDevice12::DestroyStagingBuffer()
void GSDevice12::DestroyResources() void GSDevice12::DestroyResources()
{ {
g_d3d12_context->ExecuteCommandList(true); g_d3d12_context->ExecuteCommandList(D3D12::Context::WaitType::Sleep);
for (auto& it : m_tfx_pipelines) for (auto& it : m_tfx_pipelines)
g_d3d12_context->DeferObjectDestruction(it.second.get()); g_d3d12_context->DeferObjectDestruction(it.second.get());
@ -1734,10 +1734,20 @@ void GSDevice12::InitializeSamplers()
pxFailRel("Failed to initialize samplers"); pxFailRel("Failed to initialize samplers");
} }
static D3D12::Context::WaitType GetWaitType(bool wait, bool spin)
{
if (!wait)
return D3D12::Context::WaitType::None;
if (spin)
return D3D12::Context::WaitType::Spin;
else
return D3D12::Context::WaitType::Sleep;
}
void GSDevice12::ExecuteCommandList(bool wait_for_completion) void GSDevice12::ExecuteCommandList(bool wait_for_completion)
{ {
EndRenderPass(); EndRenderPass();
g_d3d12_context->ExecuteCommandList(wait_for_completion); g_d3d12_context->ExecuteCommandList(GetWaitType(wait_for_completion, GSConfig.HWSpinCPUForReadbacks));
InvalidateCachedState(); InvalidateCachedState();
} }
@ -1758,7 +1768,7 @@ void GSDevice12::ExecuteCommandListAndRestartRenderPass(const char* reason)
const bool was_in_render_pass = m_in_render_pass; const bool was_in_render_pass = m_in_render_pass;
EndRenderPass(); EndRenderPass();
g_d3d12_context->ExecuteCommandList(false); g_d3d12_context->ExecuteCommandList(D3D12::Context::WaitType::None);
InvalidateCachedState(); InvalidateCachedState();
if (was_in_render_pass) if (was_in_render_pass)

View File

@ -26,6 +26,21 @@
#ifdef __APPLE__ #ifdef __APPLE__
#include "GSMTLSharedHeader.h" #include "GSMTLSharedHeader.h"
static constexpr bool IsCommandBufferCompleted(MTLCommandBufferStatus status)
{
switch (status)
{
case MTLCommandBufferStatusNotEnqueued:
case MTLCommandBufferStatusEnqueued:
case MTLCommandBufferStatusCommitted:
case MTLCommandBufferStatusScheduled:
return false;
case MTLCommandBufferStatusCompleted:
case MTLCommandBufferStatusError:
return true;
}
}
GSDevice* MakeGSDeviceMTL() GSDevice* MakeGSDeviceMTL()
{ {
return new GSDeviceMTL(); return new GSDeviceMTL();
@ -1055,7 +1070,15 @@ bool GSDeviceMTL::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSText
m_spin_timer = 30; m_spin_timer = 30;
} }
} }
[cmdbuf waitUntilCompleted]; if (GSConfig.HWSpinCPUForReadbacks)
{
while (!IsCommandBufferCompleted([cmdbuf status]))
ShortSpin();
}
else
{
[cmdbuf waitUntilCompleted];
}
out_map.bits = static_cast<u8*>([m_texture_download_buf contents]); out_map.bits = static_cast<u8*>([m_texture_download_buf contents]);
g_perfmon.Put(GSPerfMon::Readbacks, 1); g_perfmon.Put(GSPerfMon::Readbacks, 1);

View File

@ -1822,7 +1822,7 @@ void GSDeviceVK::DestroyStagingBuffer()
void GSDeviceVK::DestroyResources() void GSDeviceVK::DestroyResources()
{ {
g_vulkan_context->ExecuteCommandBuffer(true); g_vulkan_context->ExecuteCommandBuffer(Vulkan::Context::WaitType::Sleep);
if (m_tfx_descriptor_sets[0] != VK_NULL_HANDLE) if (m_tfx_descriptor_sets[0] != VK_NULL_HANDLE)
g_vulkan_context->FreeGlobalDescriptorSet(m_tfx_descriptor_sets[0]); g_vulkan_context->FreeGlobalDescriptorSet(m_tfx_descriptor_sets[0]);
@ -2202,10 +2202,20 @@ bool GSDeviceVK::CreatePersistentDescriptorSets()
return true; return true;
} }
static Vulkan::Context::WaitType GetWaitType(bool wait, bool spin)
{
if (!wait)
return Vulkan::Context::WaitType::None;
if (spin)
return Vulkan::Context::WaitType::Spin;
else
return Vulkan::Context::WaitType::Sleep;
}
void GSDeviceVK::ExecuteCommandBuffer(bool wait_for_completion) void GSDeviceVK::ExecuteCommandBuffer(bool wait_for_completion)
{ {
EndRenderPass(); EndRenderPass();
g_vulkan_context->ExecuteCommandBuffer(wait_for_completion); g_vulkan_context->ExecuteCommandBuffer(GetWaitType(wait_for_completion, GSConfig.HWSpinCPUForReadbacks));
InvalidateCachedState(); InvalidateCachedState();
} }
@ -2227,7 +2237,7 @@ void GSDeviceVK::ExecuteCommandBufferAndRestartRenderPass(const char* reason)
const VkRenderPass render_pass = m_current_render_pass; const VkRenderPass render_pass = m_current_render_pass;
const GSVector4i render_pass_area(m_current_render_pass_area); const GSVector4i render_pass_area(m_current_render_pass_area);
EndRenderPass(); EndRenderPass();
g_vulkan_context->ExecuteCommandBuffer(false); g_vulkan_context->ExecuteCommandBuffer(Vulkan::Context::WaitType::None);
InvalidateCachedState(); InvalidateCachedState();
if (render_pass != VK_NULL_HANDLE) if (render_pass != VK_NULL_HANDLE)

View File

@ -167,6 +167,9 @@ const char* dialog_message(int ID, bool* updateText)
case IDC_SPIN_GPU: case IDC_SPIN_GPU:
return cvtString("Submits useless work to the GPU during readbacks to prevent it from going into powersave modes.\n" return cvtString("Submits useless work to the GPU during readbacks to prevent it from going into powersave modes.\n"
"May improve performance but with a significant increase in power usage."); "May improve performance but with a significant increase in power usage.");
case IDC_SPIN_CPU:
return cvtString("Does useless work on the CPU during readbacks to prevent it from going to into powersave modes.\n"
"May improve performance but with a significant increase in power usage.");
case IDC_LINEAR_PRESENT: case IDC_LINEAR_PRESENT:
return cvtString("Use bilinear filtering when Upscaling/Downscaling the image to the screen. Disable it if you want a sharper/pixelated output."); return cvtString("Use bilinear filtering when Upscaling/Downscaling the image to the screen. Disable it if you want a sharper/pixelated output.");
// Exclusive for Hardware Renderer // Exclusive for Hardware Renderer

View File

@ -88,6 +88,7 @@ enum
// OpenGL Advanced Settings // OpenGL Advanced Settings
IDC_GEOMETRY_SHADER_OVERRIDE, IDC_GEOMETRY_SHADER_OVERRIDE,
IDC_SPIN_GPU, IDC_SPIN_GPU,
IDC_SPIN_CPU,
// On-screen Display // On-screen Display
IDC_OSD_LOG, IDC_OSD_LOG,
IDC_OSD_MONITOR, IDC_OSD_MONITOR,

View File

@ -285,6 +285,7 @@ RendererTab::RendererTab(wxWindow* parent)
auto* paltex_prereq = m_ui.addCheckBox(hw_checks_box, "GPU Palette Conversion", "paltex", IDC_PALTEX, hw_prereq); auto* paltex_prereq = m_ui.addCheckBox(hw_checks_box, "GPU Palette Conversion", "paltex", IDC_PALTEX, hw_prereq);
m_ui.addCheckBox(hw_checks_box, "Spin GPU During Readbacks", "HWSpinGPUForReadbacks", IDC_SPIN_GPU); m_ui.addCheckBox(hw_checks_box, "Spin GPU During Readbacks", "HWSpinGPUForReadbacks", IDC_SPIN_GPU);
m_ui.addCheckBox(hw_checks_box, "Spin CPU During Readbacks", "HWSpinCPUForReadbacks", IDC_SPIN_CPU);
auto aniso_prereq = [this, paltex_prereq]{ return m_is_hardware && paltex_prereq->GetValue() == false; }; auto aniso_prereq = [this, paltex_prereq]{ return m_is_hardware && paltex_prereq->GetValue() == false; };
auto* hw_choice_grid = new wxFlexGridSizer(2, space, space); auto* hw_choice_grid = new wxFlexGridSizer(2, space, space);

View File

@ -326,6 +326,7 @@ Pcsx2Config::GSOptions::GSOptions()
HWDownloadMode = GSHardwareDownloadMode::Enabled; HWDownloadMode = GSHardwareDownloadMode::Enabled;
HWSpinGPUForReadbacks = false; HWSpinGPUForReadbacks = false;
HWSpinCPUForReadbacks = false;
GPUPaletteConversion = false; GPUPaletteConversion = false;
AutoFlushSW = true; AutoFlushSW = true;
PreloadFrameWithGSData = false; PreloadFrameWithGSData = false;
@ -550,6 +551,7 @@ void Pcsx2Config::GSOptions::ReloadIniSettings()
GSSettingBool(OsdShowInputs); GSSettingBool(OsdShowInputs);
GSSettingBool(HWSpinGPUForReadbacks); GSSettingBool(HWSpinGPUForReadbacks);
GSSettingBool(HWSpinCPUForReadbacks);
GSSettingBoolEx(GPUPaletteConversion, "paltex"); GSSettingBoolEx(GPUPaletteConversion, "paltex");
GSSettingBoolEx(AutoFlushSW, "autoflush_sw"); GSSettingBoolEx(AutoFlushSW, "autoflush_sw");
GSSettingBoolEx(PreloadFrameWithGSData, "preload_frame_with_gs_data"); GSSettingBoolEx(PreloadFrameWithGSData, "preload_frame_with_gs_data");