From 5d877250e9e8932d8a837ca373cef142aa6c0054 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Wed, 19 May 2021 13:43:49 +1000 Subject: [PATCH] GPU/HW: Add an option to use software renderer for readbacks --- android/app/src/main/res/values/strings.xml | 2 + .../src/main/res/xml/advanced_preferences.xml | 6 + src/core/gpu_backend.cpp | 4 +- src/core/gpu_backend.h | 2 +- src/core/gpu_hw.cpp | 204 +++++++++++++++++- src/core/gpu_hw.h | 12 ++ src/core/gpu_hw_d3d11.cpp | 12 +- src/core/gpu_hw_opengl.cpp | 11 +- src/core/gpu_hw_vulkan.cpp | 16 +- src/core/gpu_sw.cpp | 2 +- src/core/gpu_sw_backend.cpp | 4 +- src/core/gpu_sw_backend.h | 2 +- src/core/gpu_types.h | 15 ++ src/core/host_interface.cpp | 2 + src/core/settings.cpp | 2 + src/core/settings.h | 1 + src/duckstation-qt/displaysettingswidget.cpp | 8 +- src/duckstation-qt/displaysettingswidget.ui | 7 + src/frontend-common/fullscreen_ui.cpp | 19 +- 19 files changed, 303 insertions(+), 28 deletions(-) diff --git a/android/app/src/main/res/values/strings.xml b/android/app/src/main/res/values/strings.xml index e689555cd..593dd315b 100644 --- a/android/app/src/main/res/values/strings.xml +++ b/android/app/src/main/res/values/strings.xml @@ -346,4 +346,6 @@ Toggle Controller Analog Mode Reset Console Exit Game + Use Software Renderer For Readbacks + Runs the software renderer in parallel for VRAM readbacks. On some devices, this may result in greater performance when using graphical enhancements with the hardware renderer. diff --git a/android/app/src/main/res/xml/advanced_preferences.xml b/android/app/src/main/res/xml/advanced_preferences.xml index 568730e3a..bc42cc40b 100644 --- a/android/app/src/main/res/xml/advanced_preferences.xml +++ b/android/app/src/main/res/xml/advanced_preferences.xml @@ -70,6 +70,12 @@ app:entryValues="@array/settings_advanced_display_fps_limit_values" app:useSimpleSummaryProvider="true" app:iconSpaceReserved="false" /> + diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp index 8d85dafe7..42673e317 100644 --- a/src/core/gpu_backend.cpp +++ b/src/core/gpu_backend.cpp @@ -12,9 +12,9 @@ GPUBackend::GPUBackend() = default; GPUBackend::~GPUBackend() = default; -bool GPUBackend::Initialize() +bool GPUBackend::Initialize(bool force_thread) { - if (g_settings.gpu_use_thread) + if (force_thread || g_settings.gpu_use_thread) StartGPUThread(); return true; diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h index dfa924768..e8b9d0d87 100644 --- a/src/core/gpu_backend.h +++ b/src/core/gpu_backend.h @@ -21,7 +21,7 @@ public: ALWAYS_INLINE u16* GetVRAM() const { return m_vram_ptr; } - virtual bool Initialize(); + virtual bool Initialize(bool force_thread); virtual void UpdateSettings(); virtual void Reset(bool clear_vram); virtual void Shutdown(); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 4a57cf4be..1cee4c9a3 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -3,6 +3,7 @@ #include "common/log.h" #include "common/state_wrapper.h" #include "cpu_core.h" +#include "gpu_sw_backend.h" #include "pgxp.h" #include "settings.h" #include "system.h" @@ -34,7 +35,14 @@ GPU_HW::GPU_HW() : GPU() m_vram_ptr = m_vram_shadow.data(); } -GPU_HW::~GPU_HW() = default; +GPU_HW::~GPU_HW() +{ + if (m_sw_renderer) + { + m_sw_renderer->Shutdown(); + m_sw_renderer.reset(); + } +} bool GPU_HW::Initialize(HostDisplay* host_display) { @@ -82,6 +90,9 @@ bool GPU_HW::Initialize(HostDisplay* host_display) } m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer(); + + UpdateSoftwareRenderer(false); + PrintSettingsToLog(); return true; } @@ -93,6 +104,8 @@ void GPU_HW::Reset(bool clear_vram) m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; m_vram_shadow.fill(0); + if (m_sw_renderer) + m_sw_renderer->Reset(clear_vram); m_batch = {}; m_batch_ubo_data = {}; @@ -180,6 +193,8 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) ClearDepthBuffer(); } + UpdateSoftwareRenderer(true); + PrintSettingsToLog(); } @@ -248,6 +263,7 @@ void GPU_HW::PrintSettingsToLog() Log_InfoPrintf("Using UV limits: %s", m_using_uv_limits ? "YES" : "NO"); Log_InfoPrintf("Depth buffer: %s", m_pgxp_depth_buffer ? "YES" : "NO"); Log_InfoPrintf("Downsampling: %s", Settings::GetDownsampleModeDisplayName(m_downsample_mode)); + Log_InfoPrintf("Using software renderer for readbacks: %s", m_sw_renderer ? "YES" : "NO"); } void GPU_HW::UpdateVRAMReadTexture() @@ -545,6 +561,7 @@ void GPU_HW::LoadVertices() const u32 num_vertices = rc.quad_polygon ? 4 : 3; std::array vertices; std::array, 4> native_vertex_positions; + std::array native_texcoords; bool valid_w = g_settings.gpu_pgxp_texture_correction; for (u32 i = 0; i < num_vertices; i++) { @@ -556,6 +573,7 @@ void GPU_HW::LoadVertices() const s32 native_y = m_drawing_offset.y + vp.y; native_vertex_positions[i][0] = native_x; native_vertex_positions[i][1] = native_y; + native_texcoords[i] = texcoord; vertices[i].Set(static_cast(native_x), static_cast(native_y), depth, 1.0f, color, texpage, texcoord, 0xFFFF0000u); @@ -659,6 +677,23 @@ void GPU_HW::LoadVertices() AddVertex(vertices[3]); } } + + if (m_sw_renderer) + { + GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices); + FillDrawCommand(cmd, rc); + + for (u32 i = 0; i < num_vertices; i++) + { + GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; + vert->x = native_vertex_positions[i][0]; + vert->y = native_vertex_positions[i][1]; + vert->texcoord = native_texcoords[i]; + vert->color = vertices[i].color; + } + + m_sw_renderer->PushCommand(cmd); + } } break; @@ -754,6 +789,19 @@ void GPU_HW::LoadVertices() m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.transparency_enable); + + if (m_sw_renderer) + { + GPUBackendDrawRectangleCommand* cmd = m_sw_renderer->NewDrawRectangleCommand(); + FillDrawCommand(cmd, rc); + cmd->color = color; + cmd->x = pos_x; + cmd->y = pos_y; + cmd->width = static_cast(rectangle_width); + cmd->height = static_cast(rectangle_height); + cmd->texcoord = (static_cast(texcoord_y) << 8) | static_cast(texcoord_x); + m_sw_renderer->PushCommand(cmd); + } } break; @@ -808,6 +856,15 @@ void GPU_HW::LoadVertices() // TODO: Should we do a PGXP lookup here? Most lines are 2D. DrawLine(static_cast(start_x), static_cast(start_y), start_color, static_cast(end_x), static_cast(end_y), end_color, depth); + + if (m_sw_renderer) + { + GPUBackendDrawLineCommand* cmd = m_sw_renderer->NewDrawLineCommand(2); + FillDrawCommand(cmd, rc); + cmd->vertices[0].Set(start_x, start_y, start_color); + cmd->vertices[1].Set(end_x, end_y, end_color); + m_sw_renderer->PushCommand(cmd); + } } else { @@ -826,6 +883,18 @@ void GPU_HW::LoadVertices() s32 start_y = start_vp.y + m_drawing_offset.y; u32 start_color = rc.color_for_first_vertex; + GPUBackendDrawLineCommand* cmd; + if (m_sw_renderer) + { + cmd = m_sw_renderer->NewDrawLineCommand(num_vertices); + FillDrawCommand(cmd, rc); + cmd->vertices[0].Set(start_x, start_y, start_color); + } + else + { + cmd = nullptr; + } + for (u32 i = 1; i < num_vertices; i++) { const u32 end_color = shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : start_color; @@ -859,7 +928,13 @@ void GPU_HW::LoadVertices() start_x = end_x; start_y = end_y; start_color = end_color; + + if (cmd) + cmd->vertices[i].Set(end_x, end_y, end_color); } + + if (cmd) + m_sw_renderer->PushCommand(cmd); } } break; @@ -1017,10 +1092,84 @@ void GPU_HW::ResetBatchVertexDepth() m_current_depth = 1; } +void GPU_HW::FillBackendCommandParameters(GPUBackendCommand* cmd) const +{ + cmd->params.bits = 0; + cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; + cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; + cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; + cmd->params.interlaced_rendering = m_GPUSTAT.SkipDrawingToActiveField(); +} + +void GPU_HW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const +{ + FillBackendCommandParameters(cmd); + cmd->rc.bits = rc.bits; + cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; + cmd->palette.bits = m_draw_mode.palette_reg; + cmd->window = m_draw_mode.texture_window; +} + +void GPU_HW::HandleVRAMReadWithSoftwareRenderer(u32 x, u32 y, u32 width, u32 height) +{ + DebugAssert(m_sw_renderer); + m_sw_renderer->Sync(false); +} + +void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw) +{ + const bool current_enabled = (m_sw_renderer != nullptr); + const bool new_enabled = g_settings.gpu_use_software_renderer_for_readbacks; + if (current_enabled == new_enabled) + return; + + m_vram_ptr = m_vram_shadow.data(); + + if (!new_enabled) + { + if (m_sw_renderer) + m_sw_renderer->Shutdown(); + m_sw_renderer.reset(); + return; + } + + std::unique_ptr sw_renderer = std::make_unique(); + if (!sw_renderer->Initialize(true)) + return; + + // We need to fill in the SW renderer's VRAM with the current state for hot toggles. + if (copy_vram_from_hw) + { + FlushRender(); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + std::memcpy(sw_renderer->GetVRAM(), m_vram_ptr, sizeof(u16) * VRAM_WIDTH * VRAM_HEIGHT); + + // Sync the drawing area. + GPUBackendSetDrawingAreaCommand* cmd = sw_renderer->NewSetDrawingAreaCommand(); + cmd->new_area = m_drawing_area; + sw_renderer->PushCommand(cmd); + } + + m_sw_renderer = std::move(sw_renderer); + m_vram_ptr = m_sw_renderer->GetVRAM(); +} + void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { IncludeVRAMDirtyRectangle( Common::Rectangle::FromExtents(x, y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); + + if (m_sw_renderer) + { + GPUBackendFillVRAMCommand* cmd = m_sw_renderer->NewFillVRAMCommand(); + FillBackendCommandParameters(cmd); + cmd->x = static_cast(x); + cmd->y = static_cast(y); + cmd->width = static_cast(width); + cmd->height = static_cast(height); + cmd->color = color; + m_sw_renderer->PushCommand(cmd); + } } void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) @@ -1033,6 +1182,21 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b // set new vertex counter since we want this to take into consideration previous masked pixels m_current_depth++; } + + if (m_sw_renderer) + { + const u32 num_words = width * height; + GPUBackendUpdateVRAMCommand* cmd = m_sw_renderer->NewUpdateVRAMCommand(num_words); + FillBackendCommandParameters(cmd); + cmd->params.set_mask_while_drawing = set_mask; + cmd->params.check_mask_before_draw = check_mask; + cmd->x = static_cast(x); + cmd->y = static_cast(y); + cmd->width = static_cast(width); + cmd->height = static_cast(height); + std::memcpy(cmd->data, data, sizeof(u16) * num_words); + m_sw_renderer->PushCommand(cmd); + } } void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) @@ -1045,6 +1209,19 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 // set new vertex counter since we want this to take into consideration previous masked pixels m_current_depth++; } + + if (m_sw_renderer) + { + GPUBackendCopyVRAMCommand* cmd = m_sw_renderer->NewCopyVRAMCommand(); + FillBackendCommandParameters(cmd); + cmd->src_x = static_cast(src_x); + cmd->src_y = static_cast(src_y); + cmd->dst_x = static_cast(dst_x); + cmd->dst_y = static_cast(dst_y); + cmd->width = static_cast(width); + cmd->height = static_cast(height); + m_sw_renderer->PushCommand(cmd); + } } void GPU_HW::DispatchRenderCommand() @@ -1136,6 +1313,22 @@ void GPU_HW::DispatchRenderCommand() m_batch_ubo_dirty = true; } + if (m_drawing_area_changed) + { + m_drawing_area_changed = false; + SetScissorFromDrawingArea(); + + if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f) + ClearDepthBuffer(); + + if (m_sw_renderer) + { + GPUBackendSetDrawingAreaCommand* cmd = m_sw_renderer->NewSetDrawingAreaCommand(); + cmd->new_area = m_drawing_area; + m_sw_renderer->PushCommand(cmd); + } + } + LoadVertices(); } @@ -1150,15 +1343,6 @@ void GPU_HW::FlushRender() if (vertex_count == 0) return; - if (m_drawing_area_changed) - { - m_drawing_area_changed = false; - SetScissorFromDrawingArea(); - - if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f) - ClearDepthBuffer(); - } - if (m_batch_ubo_dirty) { UploadUniformBuffer(&m_batch_ubo_data, sizeof(m_batch_ubo_data)); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 2e40b2f39..c077ee5f5 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -8,6 +8,10 @@ #include #include +class GPU_SW_Backend; +struct GPUBackendCommand; +struct GPUBackendDrawCommand; + class GPU_HW : public GPU { public: @@ -254,6 +258,13 @@ protected: (m_batch.transparency_mode != GPUTransparencyMode::Disabled && !m_supports_dual_source_blend); } + ALWAYS_INLINE bool IsUsingSoftwareRendererForReadbacks() { return static_cast(m_sw_renderer); } + + void FillBackendCommandParameters(GPUBackendCommand* cmd) const; + void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; + void HandleVRAMReadWithSoftwareRenderer(u32 x, u32 y, u32 width, u32 height); + void UpdateSoftwareRenderer(bool copy_vram_from_hw); + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; @@ -308,6 +319,7 @@ protected: u32 tex_height) const; HeapArray m_vram_shadow; + std::unique_ptr m_sw_renderer; BatchVertex* m_batch_start_vertex_ptr = nullptr; BatchVertex* m_batch_end_vertex_ptr = nullptr; diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 9b2a87018..7cfc01e49 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -5,6 +5,7 @@ #include "common/state_wrapper.h" #include "common/timer.h" #include "gpu_hw_shadergen.h" +#include "gpu_sw_backend.h" #include "host_display.h" #include "host_interface.h" #include "shader_cache_version.h" @@ -946,6 +947,12 @@ void GPU_HW_D3D11::UpdateDisplay() void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { + if (IsUsingSoftwareRendererForReadbacks()) + { + HandleVRAMReadWithSoftwareRenderer(x, y, width, height); + return; + } + // Get bounds with wrap-around handled. const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; @@ -987,7 +994,7 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); GPU::FillVRAM(x, y, width, height, color); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); return; } @@ -1045,6 +1052,9 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt { if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height) || IsUsingMultisampling()) { + if (IsUsingSoftwareRendererForReadbacks()) + GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); if (m_vram_dirty_rect.Intersects(src_bounds)) diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 528c8a343..739bd401a 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -982,6 +982,12 @@ void GPU_HW_OpenGL::UpdateDisplay() void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { + if (IsUsingSoftwareRendererForReadbacks()) + { + HandleVRAMReadWithSoftwareRenderer(x, y, width, height); + return; + } + // Get bounds with wrap-around handled. const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; @@ -1019,7 +1025,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); GPU::FillVRAM(x, y, width, height, color); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); return; } @@ -1182,6 +1188,9 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height)) { + if (IsUsingSoftwareRendererForReadbacks()) + GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + if (src_dirty) UpdateVRAMReadTexture(); IncludeVRAMDirtyRectangle(dst_bounds); diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp index 8ab248f24..3503b5bfa 100644 --- a/src/core/gpu_hw_vulkan.cpp +++ b/src/core/gpu_hw_vulkan.cpp @@ -1438,6 +1438,12 @@ void GPU_HW_Vulkan::UpdateDisplay() void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { + if (IsUsingSoftwareRendererForReadbacks()) + { + HandleVRAMReadWithSoftwareRenderer(x, y, width, height); + return; + } + // Get bounds with wrap-around handled. const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; @@ -1451,8 +1457,9 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height) // Work around Mali driver bug: set full framebuffer size for render area. The GPU crashes with a page fault if we use // the actual size we're rendering to... - BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, m_vram_readback_texture.GetWidth(), - m_vram_readback_texture.GetHeight()); + const u32 rp_width = std::max(16, encoded_width); + const u32 rp_height = std::max(16, encoded_height); + BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, rp_width, rp_height); // Encode the 24-bit texture as 16-bit. const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; @@ -1488,7 +1495,7 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); GPU::FillVRAM(x, y, width, height, color); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); return; } @@ -1571,6 +1578,9 @@ void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid { if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height) || IsUsingMultisampling()) { + if (IsUsingSoftwareRendererForReadbacks()) + GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); if (m_vram_dirty_rect.Intersects(src_bounds)) diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index f3bfe7831..980735ff6 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -47,7 +47,7 @@ GPURenderer GPU_SW::GetRendererType() const bool GPU_SW::Initialize(HostDisplay* host_display) { - if (!GPU::Initialize(host_display) || !m_backend.Initialize()) + if (!GPU::Initialize(host_display) || !m_backend.Initialize(false)) return false; static constexpr auto formats_for_16bit = make_array(HostDisplayPixelFormat::RGB565, HostDisplayPixelFormat::RGBA5551, diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp index eafd317ed..2d8bfef7b 100644 --- a/src/core/gpu_sw_backend.cpp +++ b/src/core/gpu_sw_backend.cpp @@ -15,9 +15,9 @@ GPU_SW_Backend::GPU_SW_Backend() : GPUBackend() GPU_SW_Backend::~GPU_SW_Backend() = default; -bool GPU_SW_Backend::Initialize() +bool GPU_SW_Backend::Initialize(bool force_thread) { - return GPUBackend::Initialize(); + return GPUBackend::Initialize(force_thread); } void GPU_SW_Backend::Reset(bool clear_vram) diff --git a/src/core/gpu_sw_backend.h b/src/core/gpu_sw_backend.h index 409b18eb1..5a7be6ffe 100644 --- a/src/core/gpu_sw_backend.h +++ b/src/core/gpu_sw_backend.h @@ -10,7 +10,7 @@ public: GPU_SW_Backend(); ~GPU_SW_Backend() override; - bool Initialize() override; + bool Initialize(bool force_thread) override; void Reset(bool clear_vram) override; ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return m_vram[VRAM_WIDTH * y + x]; } diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index c0da5c63a..56353817a 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -346,6 +346,14 @@ struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand }; u16 texcoord; }; + + ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u16 texcoord_) + { + x = x_; + y = y_; + color = color_; + texcoord = texcoord_; + } }; Vertex vertices[0]; @@ -374,6 +382,13 @@ struct GPUBackendDrawLineCommand : public GPUBackendDrawCommand }; u32 color; }; + + ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_) + { + x = x_; + y = y_; + color = color_; + } }; Vertex vertices[0]; diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp index 8ffcf2880..e2d504530 100644 --- a/src/core/host_interface.cpp +++ b/src/core/host_interface.cpp @@ -506,6 +506,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si) si.SetIntValue("GPU", "ResolutionScale", 1); si.SetIntValue("GPU", "Multisamples", 1); si.SetBoolValue("GPU", "UseDebugDevice", false); + si.SetBoolValue("GPU", "UseSoftwareRendererForReadbacks", false); si.SetBoolValue("GPU", "PerSampleShading", false); si.SetBoolValue("GPU", "UseThread", true); si.SetBoolValue("GPU", "ThreadedPresentation", true); @@ -783,6 +784,7 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings) g_settings.gpu_multisamples != old_settings.gpu_multisamples || g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading || g_settings.gpu_use_thread != old_settings.gpu_use_thread || + g_settings.gpu_use_software_renderer_for_readbacks != old_settings.gpu_use_software_renderer_for_readbacks || g_settings.gpu_fifo_size != old_settings.gpu_fifo_size || g_settings.gpu_max_run_ahead != old_settings.gpu_max_run_ahead || g_settings.gpu_true_color != old_settings.gpu_true_color || diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 0abcbe901..619c7f0d6 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -193,6 +193,7 @@ void Settings::Load(SettingsInterface& si) gpu_use_debug_device = si.GetBoolValue("GPU", "UseDebugDevice", false); gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false); gpu_use_thread = si.GetBoolValue("GPU", "UseThread", true); + gpu_use_software_renderer_for_readbacks = si.GetBoolValue("GPU", "UseSoftwareRendererForReadbacks", false); gpu_threaded_presentation = si.GetBoolValue("GPU", "ThreadedPresentation", true); gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true); gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", false); @@ -379,6 +380,7 @@ void Settings::Save(SettingsInterface& si) const si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading); si.SetBoolValue("GPU", "UseThread", gpu_use_thread); si.SetBoolValue("GPU", "ThreadedPresentation", gpu_threaded_presentation); + si.SetBoolValue("GPU", "UseSoftwareRendererForReadbacks", gpu_use_software_renderer_for_readbacks); si.SetBoolValue("GPU", "TrueColor", gpu_true_color); si.SetBoolValue("GPU", "ScaledDithering", gpu_scaled_dithering); si.SetStringValue("GPU", "TextureFilter", GetTextureFilterName(gpu_texture_filter)); diff --git a/src/core/settings.h b/src/core/settings.h index 534f1d17a..c30f3fa88 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -108,6 +108,7 @@ struct Settings u32 gpu_resolution_scale = 1; u32 gpu_multisamples = 1; bool gpu_use_thread = true; + bool gpu_use_software_renderer_for_readbacks = false; bool gpu_threaded_presentation = true; bool gpu_use_debug_device = false; bool gpu_per_sample_shading = false; diff --git a/src/duckstation-qt/displaysettingswidget.cpp b/src/duckstation-qt/displaysettingswidget.cpp index 1f00148f1..d158a40b3 100644 --- a/src/duckstation-qt/displaysettingswidget.cpp +++ b/src/duckstation-qt/displaysettingswidget.cpp @@ -48,6 +48,8 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.gpuThread, "GPU", "UseThread", true); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.threadedPresentation, "GPU", "ThreadedPresentation", true); + SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.useSoftwareRendererForReadbacks, "GPU", + "UseSoftwareRendererForReadbacks", false); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showOSDMessages, "Display", "ShowOSDMessages", true); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showFPS, "Display", "ShowFPS", false); @@ -130,6 +132,10 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW dialog->registerWidgetHelp(m_ui.gpuThread, tr("Threaded Rendering"), tr("Checked"), tr("Uses a second thread for drawing graphics. Currently only available for the software " "renderer, but can provide a significant speed improvement, and is safe to use.")); + dialog->registerWidgetHelp( + m_ui.useSoftwareRendererForReadbacks, tr("Use Software Renderer For Readbacks"), tr("Unchecked"), + tr("Runs the software renderer in parallel for VRAM readbacks. On some systems, this may result in greater " + "performance when using graphical enhancements with the hardware renderer.")); dialog->registerWidgetHelp(m_ui.showOSDMessages, tr("Show OSD Messages"), tr("Checked"), tr("Shows on-screen-display messages when events occur such as save states being " "created/loaded, screenshots being taken, etc.")); @@ -151,7 +157,7 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW { QCheckBox* cb = new QCheckBox(tr("Use Blit Swap Chain"), m_ui.basicGroupBox); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, cb, "Display", "UseBlitSwapChain", false); - m_ui.basicCheckboxGridLayout->addWidget(cb, 2, 0, 1, 1); + m_ui.basicCheckboxGridLayout->addWidget(cb, 2, 1, 1, 1); dialog->registerWidgetHelp(cb, tr("Use Blit Swap Chain"), tr("Unchecked"), tr("Uses a blit presentation model instead of flipping when using the Direct3D 11 " "renderer. This usually results in slower performance, but may be required for some " diff --git a/src/duckstation-qt/displaysettingswidget.ui b/src/duckstation-qt/displaysettingswidget.ui index 05e497589..bc04f412e 100644 --- a/src/duckstation-qt/displaysettingswidget.ui +++ b/src/duckstation-qt/displaysettingswidget.ui @@ -92,6 +92,13 @@ + + + + Use Software Renderer For Readbacks + + + diff --git a/src/frontend-common/fullscreen_ui.cpp b/src/frontend-common/fullscreen_ui.cpp index 09f9a045d..8150eeba8 100644 --- a/src/frontend-common/fullscreen_ui.cpp +++ b/src/frontend-common/fullscreen_ui.cpp @@ -1983,11 +1983,6 @@ void DrawSettingsWindow() OpenChoiceDialog(ICON_FA_TV " Fullscreen Resolution", false, std::move(options), std::move(callback)); } - settings_changed |= - ToggleButton("Enable VSync", - "Synchronizes presentation of the console's frames to the host. Enable for smoother animations.", - &s_settings_copy.video_sync_enabled); - switch (s_settings_copy.gpu_renderer) { #ifdef WIN32 @@ -2024,6 +2019,20 @@ void DrawSettingsWindow() break; } + if (!s_settings_copy.IsUsingSoftwareRenderer()) + { + settings_changed |= + ToggleButton("Use Software Renderer For Readbacks", + "Runs the software renderer in parallel for VRAM readbacks. On some systems, this may result " + "in greater performance.", + &s_settings_copy.gpu_use_software_renderer_for_readbacks); + } + + settings_changed |= + ToggleButton("Enable VSync", + "Synchronizes presentation of the console's frames to the host. Enable for smoother animations.", + &s_settings_copy.video_sync_enabled); + settings_changed |= ToggleButton("Optimal Frame Pacing", "Ensures every frame generated is displayed for optimal pacing. Disable if " "you are having speed or sound issues.",