diff --git a/android/app/src/main/res/values/strings.xml b/android/app/src/main/res/values/strings.xml
index e689555cd..593dd315b 100644
--- a/android/app/src/main/res/values/strings.xml
+++ b/android/app/src/main/res/values/strings.xml
@@ -346,4 +346,6 @@
Toggle Controller Analog Mode
Reset Console
Exit Game
+ Use Software Renderer For Readbacks
+ Runs the software renderer in parallel for VRAM readbacks. On some devices, this may result in greater performance when using graphical enhancements with the hardware renderer.
diff --git a/android/app/src/main/res/xml/advanced_preferences.xml b/android/app/src/main/res/xml/advanced_preferences.xml
index 568730e3a..bc42cc40b 100644
--- a/android/app/src/main/res/xml/advanced_preferences.xml
+++ b/android/app/src/main/res/xml/advanced_preferences.xml
@@ -70,6 +70,12 @@
app:entryValues="@array/settings_advanced_display_fps_limit_values"
app:useSimpleSummaryProvider="true"
app:iconSpaceReserved="false" />
+
diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp
index 8d85dafe7..42673e317 100644
--- a/src/core/gpu_backend.cpp
+++ b/src/core/gpu_backend.cpp
@@ -12,9 +12,9 @@ GPUBackend::GPUBackend() = default;
GPUBackend::~GPUBackend() = default;
-bool GPUBackend::Initialize()
+bool GPUBackend::Initialize(bool force_thread)
{
- if (g_settings.gpu_use_thread)
+ if (force_thread || g_settings.gpu_use_thread)
StartGPUThread();
return true;
diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h
index dfa924768..e8b9d0d87 100644
--- a/src/core/gpu_backend.h
+++ b/src/core/gpu_backend.h
@@ -21,7 +21,7 @@ public:
ALWAYS_INLINE u16* GetVRAM() const { return m_vram_ptr; }
- virtual bool Initialize();
+ virtual bool Initialize(bool force_thread);
virtual void UpdateSettings();
virtual void Reset(bool clear_vram);
virtual void Shutdown();
diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp
index 4a57cf4be..1cee4c9a3 100644
--- a/src/core/gpu_hw.cpp
+++ b/src/core/gpu_hw.cpp
@@ -3,6 +3,7 @@
#include "common/log.h"
#include "common/state_wrapper.h"
#include "cpu_core.h"
+#include "gpu_sw_backend.h"
#include "pgxp.h"
#include "settings.h"
#include "system.h"
@@ -34,7 +35,14 @@ GPU_HW::GPU_HW() : GPU()
m_vram_ptr = m_vram_shadow.data();
}
-GPU_HW::~GPU_HW() = default;
+GPU_HW::~GPU_HW()
+{
+ if (m_sw_renderer)
+ {
+ m_sw_renderer->Shutdown();
+ m_sw_renderer.reset();
+ }
+}
bool GPU_HW::Initialize(HostDisplay* host_display)
{
@@ -82,6 +90,9 @@ bool GPU_HW::Initialize(HostDisplay* host_display)
}
m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer();
+
+ UpdateSoftwareRenderer(false);
+
PrintSettingsToLog();
return true;
}
@@ -93,6 +104,8 @@ void GPU_HW::Reset(bool clear_vram)
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
m_vram_shadow.fill(0);
+ if (m_sw_renderer)
+ m_sw_renderer->Reset(clear_vram);
m_batch = {};
m_batch_ubo_data = {};
@@ -180,6 +193,8 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
ClearDepthBuffer();
}
+ UpdateSoftwareRenderer(true);
+
PrintSettingsToLog();
}
@@ -248,6 +263,7 @@ void GPU_HW::PrintSettingsToLog()
Log_InfoPrintf("Using UV limits: %s", m_using_uv_limits ? "YES" : "NO");
Log_InfoPrintf("Depth buffer: %s", m_pgxp_depth_buffer ? "YES" : "NO");
Log_InfoPrintf("Downsampling: %s", Settings::GetDownsampleModeDisplayName(m_downsample_mode));
+ Log_InfoPrintf("Using software renderer for readbacks: %s", m_sw_renderer ? "YES" : "NO");
}
void GPU_HW::UpdateVRAMReadTexture()
@@ -545,6 +561,7 @@ void GPU_HW::LoadVertices()
const u32 num_vertices = rc.quad_polygon ? 4 : 3;
std::array vertices;
std::array, 4> native_vertex_positions;
+ std::array native_texcoords;
bool valid_w = g_settings.gpu_pgxp_texture_correction;
for (u32 i = 0; i < num_vertices; i++)
{
@@ -556,6 +573,7 @@ void GPU_HW::LoadVertices()
const s32 native_y = m_drawing_offset.y + vp.y;
native_vertex_positions[i][0] = native_x;
native_vertex_positions[i][1] = native_y;
+ native_texcoords[i] = texcoord;
vertices[i].Set(static_cast(native_x), static_cast(native_y), depth, 1.0f, color, texpage,
texcoord, 0xFFFF0000u);
@@ -659,6 +677,23 @@ void GPU_HW::LoadVertices()
AddVertex(vertices[3]);
}
}
+
+ if (m_sw_renderer)
+ {
+ GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices);
+ FillDrawCommand(cmd, rc);
+
+ for (u32 i = 0; i < num_vertices; i++)
+ {
+ GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i];
+ vert->x = native_vertex_positions[i][0];
+ vert->y = native_vertex_positions[i][1];
+ vert->texcoord = native_texcoords[i];
+ vert->color = vertices[i].color;
+ }
+
+ m_sw_renderer->PushCommand(cmd);
+ }
}
break;
@@ -754,6 +789,19 @@ void GPU_HW::LoadVertices()
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.transparency_enable);
+
+ if (m_sw_renderer)
+ {
+ GPUBackendDrawRectangleCommand* cmd = m_sw_renderer->NewDrawRectangleCommand();
+ FillDrawCommand(cmd, rc);
+ cmd->color = color;
+ cmd->x = pos_x;
+ cmd->y = pos_y;
+ cmd->width = static_cast(rectangle_width);
+ cmd->height = static_cast(rectangle_height);
+ cmd->texcoord = (static_cast(texcoord_y) << 8) | static_cast(texcoord_x);
+ m_sw_renderer->PushCommand(cmd);
+ }
}
break;
@@ -808,6 +856,15 @@ void GPU_HW::LoadVertices()
// TODO: Should we do a PGXP lookup here? Most lines are 2D.
DrawLine(static_cast(start_x), static_cast(start_y), start_color, static_cast(end_x),
static_cast(end_y), end_color, depth);
+
+ if (m_sw_renderer)
+ {
+ GPUBackendDrawLineCommand* cmd = m_sw_renderer->NewDrawLineCommand(2);
+ FillDrawCommand(cmd, rc);
+ cmd->vertices[0].Set(start_x, start_y, start_color);
+ cmd->vertices[1].Set(end_x, end_y, end_color);
+ m_sw_renderer->PushCommand(cmd);
+ }
}
else
{
@@ -826,6 +883,18 @@ void GPU_HW::LoadVertices()
s32 start_y = start_vp.y + m_drawing_offset.y;
u32 start_color = rc.color_for_first_vertex;
+ GPUBackendDrawLineCommand* cmd;
+ if (m_sw_renderer)
+ {
+ cmd = m_sw_renderer->NewDrawLineCommand(num_vertices);
+ FillDrawCommand(cmd, rc);
+ cmd->vertices[0].Set(start_x, start_y, start_color);
+ }
+ else
+ {
+ cmd = nullptr;
+ }
+
for (u32 i = 1; i < num_vertices; i++)
{
const u32 end_color = shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : start_color;
@@ -859,7 +928,13 @@ void GPU_HW::LoadVertices()
start_x = end_x;
start_y = end_y;
start_color = end_color;
+
+ if (cmd)
+ cmd->vertices[i].Set(end_x, end_y, end_color);
}
+
+ if (cmd)
+ m_sw_renderer->PushCommand(cmd);
}
}
break;
@@ -1017,10 +1092,84 @@ void GPU_HW::ResetBatchVertexDepth()
m_current_depth = 1;
}
+void GPU_HW::FillBackendCommandParameters(GPUBackendCommand* cmd) const
+{
+ cmd->params.bits = 0;
+ cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw;
+ cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing;
+ cmd->params.active_line_lsb = m_crtc_state.active_line_lsb;
+ cmd->params.interlaced_rendering = m_GPUSTAT.SkipDrawingToActiveField();
+}
+
+void GPU_HW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const
+{
+ FillBackendCommandParameters(cmd);
+ cmd->rc.bits = rc.bits;
+ cmd->draw_mode.bits = m_draw_mode.mode_reg.bits;
+ cmd->palette.bits = m_draw_mode.palette_reg;
+ cmd->window = m_draw_mode.texture_window;
+}
+
+void GPU_HW::HandleVRAMReadWithSoftwareRenderer(u32 x, u32 y, u32 width, u32 height)
+{
+ DebugAssert(m_sw_renderer);
+ m_sw_renderer->Sync(false);
+}
+
+void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw)
+{
+ const bool current_enabled = (m_sw_renderer != nullptr);
+ const bool new_enabled = g_settings.gpu_use_software_renderer_for_readbacks;
+ if (current_enabled == new_enabled)
+ return;
+
+ m_vram_ptr = m_vram_shadow.data();
+
+ if (!new_enabled)
+ {
+ if (m_sw_renderer)
+ m_sw_renderer->Shutdown();
+ m_sw_renderer.reset();
+ return;
+ }
+
+ std::unique_ptr sw_renderer = std::make_unique();
+ if (!sw_renderer->Initialize(true))
+ return;
+
+ // We need to fill in the SW renderer's VRAM with the current state for hot toggles.
+ if (copy_vram_from_hw)
+ {
+ FlushRender();
+ ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
+ std::memcpy(sw_renderer->GetVRAM(), m_vram_ptr, sizeof(u16) * VRAM_WIDTH * VRAM_HEIGHT);
+
+ // Sync the drawing area.
+ GPUBackendSetDrawingAreaCommand* cmd = sw_renderer->NewSetDrawingAreaCommand();
+ cmd->new_area = m_drawing_area;
+ sw_renderer->PushCommand(cmd);
+ }
+
+ m_sw_renderer = std::move(sw_renderer);
+ m_vram_ptr = m_sw_renderer->GetVRAM();
+}
+
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{
IncludeVRAMDirtyRectangle(
Common::Rectangle::FromExtents(x, y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
+
+ if (m_sw_renderer)
+ {
+ GPUBackendFillVRAMCommand* cmd = m_sw_renderer->NewFillVRAMCommand();
+ FillBackendCommandParameters(cmd);
+ cmd->x = static_cast(x);
+ cmd->y = static_cast(y);
+ cmd->width = static_cast(width);
+ cmd->height = static_cast(height);
+ cmd->color = color;
+ m_sw_renderer->PushCommand(cmd);
+ }
}
void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask)
@@ -1033,6 +1182,21 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b
// set new vertex counter since we want this to take into consideration previous masked pixels
m_current_depth++;
}
+
+ if (m_sw_renderer)
+ {
+ const u32 num_words = width * height;
+ GPUBackendUpdateVRAMCommand* cmd = m_sw_renderer->NewUpdateVRAMCommand(num_words);
+ FillBackendCommandParameters(cmd);
+ cmd->params.set_mask_while_drawing = set_mask;
+ cmd->params.check_mask_before_draw = check_mask;
+ cmd->x = static_cast(x);
+ cmd->y = static_cast(y);
+ cmd->width = static_cast(width);
+ cmd->height = static_cast(height);
+ std::memcpy(cmd->data, data, sizeof(u16) * num_words);
+ m_sw_renderer->PushCommand(cmd);
+ }
}
void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
@@ -1045,6 +1209,19 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
// set new vertex counter since we want this to take into consideration previous masked pixels
m_current_depth++;
}
+
+ if (m_sw_renderer)
+ {
+ GPUBackendCopyVRAMCommand* cmd = m_sw_renderer->NewCopyVRAMCommand();
+ FillBackendCommandParameters(cmd);
+ cmd->src_x = static_cast(src_x);
+ cmd->src_y = static_cast(src_y);
+ cmd->dst_x = static_cast(dst_x);
+ cmd->dst_y = static_cast(dst_y);
+ cmd->width = static_cast(width);
+ cmd->height = static_cast(height);
+ m_sw_renderer->PushCommand(cmd);
+ }
}
void GPU_HW::DispatchRenderCommand()
@@ -1136,6 +1313,22 @@ void GPU_HW::DispatchRenderCommand()
m_batch_ubo_dirty = true;
}
+ if (m_drawing_area_changed)
+ {
+ m_drawing_area_changed = false;
+ SetScissorFromDrawingArea();
+
+ if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f)
+ ClearDepthBuffer();
+
+ if (m_sw_renderer)
+ {
+ GPUBackendSetDrawingAreaCommand* cmd = m_sw_renderer->NewSetDrawingAreaCommand();
+ cmd->new_area = m_drawing_area;
+ m_sw_renderer->PushCommand(cmd);
+ }
+ }
+
LoadVertices();
}
@@ -1150,15 +1343,6 @@ void GPU_HW::FlushRender()
if (vertex_count == 0)
return;
- if (m_drawing_area_changed)
- {
- m_drawing_area_changed = false;
- SetScissorFromDrawingArea();
-
- if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f)
- ClearDepthBuffer();
- }
-
if (m_batch_ubo_dirty)
{
UploadUniformBuffer(&m_batch_ubo_data, sizeof(m_batch_ubo_data));
diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h
index 2e40b2f39..c077ee5f5 100644
--- a/src/core/gpu_hw.h
+++ b/src/core/gpu_hw.h
@@ -8,6 +8,10 @@
#include
#include
+class GPU_SW_Backend;
+struct GPUBackendCommand;
+struct GPUBackendDrawCommand;
+
class GPU_HW : public GPU
{
public:
@@ -254,6 +258,13 @@ protected:
(m_batch.transparency_mode != GPUTransparencyMode::Disabled && !m_supports_dual_source_blend);
}
+ ALWAYS_INLINE bool IsUsingSoftwareRendererForReadbacks() { return static_cast(m_sw_renderer); }
+
+ void FillBackendCommandParameters(GPUBackendCommand* cmd) const;
+ void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const;
+ void HandleVRAMReadWithSoftwareRenderer(u32 x, u32 y, u32 width, u32 height);
+ void UpdateSoftwareRenderer(bool copy_vram_from_hw);
+
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
@@ -308,6 +319,7 @@ protected:
u32 tex_height) const;
HeapArray m_vram_shadow;
+ std::unique_ptr m_sw_renderer;
BatchVertex* m_batch_start_vertex_ptr = nullptr;
BatchVertex* m_batch_end_vertex_ptr = nullptr;
diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp
index 9b2a87018..7cfc01e49 100644
--- a/src/core/gpu_hw_d3d11.cpp
+++ b/src/core/gpu_hw_d3d11.cpp
@@ -5,6 +5,7 @@
#include "common/state_wrapper.h"
#include "common/timer.h"
#include "gpu_hw_shadergen.h"
+#include "gpu_sw_backend.h"
#include "host_display.h"
#include "host_interface.h"
#include "shader_cache_version.h"
@@ -946,6 +947,12 @@ void GPU_HW_D3D11::UpdateDisplay()
void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
{
+ if (IsUsingSoftwareRendererForReadbacks())
+ {
+ HandleVRAMReadWithSoftwareRenderer(x, y, width, height);
+ return;
+ }
+
// Get bounds with wrap-around handled.
const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height);
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
@@ -987,7 +994,7 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color);
- UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false);
+ UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
return;
}
@@ -1045,6 +1052,9 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
{
if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height) || IsUsingMultisampling())
{
+ if (IsUsingSoftwareRendererForReadbacks())
+ GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
+
const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height);
const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height);
if (m_vram_dirty_rect.Intersects(src_bounds))
diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp
index 528c8a343..739bd401a 100644
--- a/src/core/gpu_hw_opengl.cpp
+++ b/src/core/gpu_hw_opengl.cpp
@@ -982,6 +982,12 @@ void GPU_HW_OpenGL::UpdateDisplay()
void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
{
+ if (IsUsingSoftwareRendererForReadbacks())
+ {
+ HandleVRAMReadWithSoftwareRenderer(x, y, width, height);
+ return;
+ }
+
// Get bounds with wrap-around handled.
const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height);
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
@@ -1019,7 +1025,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color);
- UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false);
+ UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
return;
}
@@ -1182,6 +1188,9 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height))
{
+ if (IsUsingSoftwareRendererForReadbacks())
+ GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
+
if (src_dirty)
UpdateVRAMReadTexture();
IncludeVRAMDirtyRectangle(dst_bounds);
diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp
index 8ab248f24..3503b5bfa 100644
--- a/src/core/gpu_hw_vulkan.cpp
+++ b/src/core/gpu_hw_vulkan.cpp
@@ -1438,6 +1438,12 @@ void GPU_HW_Vulkan::UpdateDisplay()
void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
{
+ if (IsUsingSoftwareRendererForReadbacks())
+ {
+ HandleVRAMReadWithSoftwareRenderer(x, y, width, height);
+ return;
+ }
+
// Get bounds with wrap-around handled.
const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height);
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
@@ -1451,8 +1457,9 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
// Work around Mali driver bug: set full framebuffer size for render area. The GPU crashes with a page fault if we use
// the actual size we're rendering to...
- BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, m_vram_readback_texture.GetWidth(),
- m_vram_readback_texture.GetHeight());
+ const u32 rp_width = std::max(16, encoded_width);
+ const u32 rp_height = std::max(16, encoded_height);
+ BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, rp_width, rp_height);
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
@@ -1488,7 +1495,7 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color);
- UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false);
+ UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
return;
}
@@ -1571,6 +1578,9 @@ void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
{
if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height) || IsUsingMultisampling())
{
+ if (IsUsingSoftwareRendererForReadbacks())
+ GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
+
const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height);
const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height);
if (m_vram_dirty_rect.Intersects(src_bounds))
diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp
index f3bfe7831..980735ff6 100644
--- a/src/core/gpu_sw.cpp
+++ b/src/core/gpu_sw.cpp
@@ -47,7 +47,7 @@ GPURenderer GPU_SW::GetRendererType() const
bool GPU_SW::Initialize(HostDisplay* host_display)
{
- if (!GPU::Initialize(host_display) || !m_backend.Initialize())
+ if (!GPU::Initialize(host_display) || !m_backend.Initialize(false))
return false;
static constexpr auto formats_for_16bit = make_array(HostDisplayPixelFormat::RGB565, HostDisplayPixelFormat::RGBA5551,
diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp
index eafd317ed..2d8bfef7b 100644
--- a/src/core/gpu_sw_backend.cpp
+++ b/src/core/gpu_sw_backend.cpp
@@ -15,9 +15,9 @@ GPU_SW_Backend::GPU_SW_Backend() : GPUBackend()
GPU_SW_Backend::~GPU_SW_Backend() = default;
-bool GPU_SW_Backend::Initialize()
+bool GPU_SW_Backend::Initialize(bool force_thread)
{
- return GPUBackend::Initialize();
+ return GPUBackend::Initialize(force_thread);
}
void GPU_SW_Backend::Reset(bool clear_vram)
diff --git a/src/core/gpu_sw_backend.h b/src/core/gpu_sw_backend.h
index 409b18eb1..5a7be6ffe 100644
--- a/src/core/gpu_sw_backend.h
+++ b/src/core/gpu_sw_backend.h
@@ -10,7 +10,7 @@ public:
GPU_SW_Backend();
~GPU_SW_Backend() override;
- bool Initialize() override;
+ bool Initialize(bool force_thread) override;
void Reset(bool clear_vram) override;
ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return m_vram[VRAM_WIDTH * y + x]; }
diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h
index c0da5c63a..56353817a 100644
--- a/src/core/gpu_types.h
+++ b/src/core/gpu_types.h
@@ -346,6 +346,14 @@ struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand
};
u16 texcoord;
};
+
+ ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u16 texcoord_)
+ {
+ x = x_;
+ y = y_;
+ color = color_;
+ texcoord = texcoord_;
+ }
};
Vertex vertices[0];
@@ -374,6 +382,13 @@ struct GPUBackendDrawLineCommand : public GPUBackendDrawCommand
};
u32 color;
};
+
+ ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_)
+ {
+ x = x_;
+ y = y_;
+ color = color_;
+ }
};
Vertex vertices[0];
diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp
index 8ffcf2880..e2d504530 100644
--- a/src/core/host_interface.cpp
+++ b/src/core/host_interface.cpp
@@ -506,6 +506,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
si.SetIntValue("GPU", "ResolutionScale", 1);
si.SetIntValue("GPU", "Multisamples", 1);
si.SetBoolValue("GPU", "UseDebugDevice", false);
+ si.SetBoolValue("GPU", "UseSoftwareRendererForReadbacks", false);
si.SetBoolValue("GPU", "PerSampleShading", false);
si.SetBoolValue("GPU", "UseThread", true);
si.SetBoolValue("GPU", "ThreadedPresentation", true);
@@ -783,6 +784,7 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
g_settings.gpu_multisamples != old_settings.gpu_multisamples ||
g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading ||
g_settings.gpu_use_thread != old_settings.gpu_use_thread ||
+ g_settings.gpu_use_software_renderer_for_readbacks != old_settings.gpu_use_software_renderer_for_readbacks ||
g_settings.gpu_fifo_size != old_settings.gpu_fifo_size ||
g_settings.gpu_max_run_ahead != old_settings.gpu_max_run_ahead ||
g_settings.gpu_true_color != old_settings.gpu_true_color ||
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 0abcbe901..619c7f0d6 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -193,6 +193,7 @@ void Settings::Load(SettingsInterface& si)
gpu_use_debug_device = si.GetBoolValue("GPU", "UseDebugDevice", false);
gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false);
gpu_use_thread = si.GetBoolValue("GPU", "UseThread", true);
+ gpu_use_software_renderer_for_readbacks = si.GetBoolValue("GPU", "UseSoftwareRendererForReadbacks", false);
gpu_threaded_presentation = si.GetBoolValue("GPU", "ThreadedPresentation", true);
gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true);
gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", false);
@@ -379,6 +380,7 @@ void Settings::Save(SettingsInterface& si) const
si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading);
si.SetBoolValue("GPU", "UseThread", gpu_use_thread);
si.SetBoolValue("GPU", "ThreadedPresentation", gpu_threaded_presentation);
+ si.SetBoolValue("GPU", "UseSoftwareRendererForReadbacks", gpu_use_software_renderer_for_readbacks);
si.SetBoolValue("GPU", "TrueColor", gpu_true_color);
si.SetBoolValue("GPU", "ScaledDithering", gpu_scaled_dithering);
si.SetStringValue("GPU", "TextureFilter", GetTextureFilterName(gpu_texture_filter));
diff --git a/src/core/settings.h b/src/core/settings.h
index 534f1d17a..c30f3fa88 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -108,6 +108,7 @@ struct Settings
u32 gpu_resolution_scale = 1;
u32 gpu_multisamples = 1;
bool gpu_use_thread = true;
+ bool gpu_use_software_renderer_for_readbacks = false;
bool gpu_threaded_presentation = true;
bool gpu_use_debug_device = false;
bool gpu_per_sample_shading = false;
diff --git a/src/duckstation-qt/displaysettingswidget.cpp b/src/duckstation-qt/displaysettingswidget.cpp
index 1f00148f1..d158a40b3 100644
--- a/src/duckstation-qt/displaysettingswidget.cpp
+++ b/src/duckstation-qt/displaysettingswidget.cpp
@@ -48,6 +48,8 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.gpuThread, "GPU", "UseThread", true);
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.threadedPresentation, "GPU",
"ThreadedPresentation", true);
+ SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.useSoftwareRendererForReadbacks, "GPU",
+ "UseSoftwareRendererForReadbacks", false);
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showOSDMessages, "Display", "ShowOSDMessages",
true);
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showFPS, "Display", "ShowFPS", false);
@@ -130,6 +132,10 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW
dialog->registerWidgetHelp(m_ui.gpuThread, tr("Threaded Rendering"), tr("Checked"),
tr("Uses a second thread for drawing graphics. Currently only available for the software "
"renderer, but can provide a significant speed improvement, and is safe to use."));
+ dialog->registerWidgetHelp(
+ m_ui.useSoftwareRendererForReadbacks, tr("Use Software Renderer For Readbacks"), tr("Unchecked"),
+ tr("Runs the software renderer in parallel for VRAM readbacks. On some systems, this may result in greater "
+ "performance when using graphical enhancements with the hardware renderer."));
dialog->registerWidgetHelp(m_ui.showOSDMessages, tr("Show OSD Messages"), tr("Checked"),
tr("Shows on-screen-display messages when events occur such as save states being "
"created/loaded, screenshots being taken, etc."));
@@ -151,7 +157,7 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW
{
QCheckBox* cb = new QCheckBox(tr("Use Blit Swap Chain"), m_ui.basicGroupBox);
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, cb, "Display", "UseBlitSwapChain", false);
- m_ui.basicCheckboxGridLayout->addWidget(cb, 2, 0, 1, 1);
+ m_ui.basicCheckboxGridLayout->addWidget(cb, 2, 1, 1, 1);
dialog->registerWidgetHelp(cb, tr("Use Blit Swap Chain"), tr("Unchecked"),
tr("Uses a blit presentation model instead of flipping when using the Direct3D 11 "
"renderer. This usually results in slower performance, but may be required for some "
diff --git a/src/duckstation-qt/displaysettingswidget.ui b/src/duckstation-qt/displaysettingswidget.ui
index 05e497589..bc04f412e 100644
--- a/src/duckstation-qt/displaysettingswidget.ui
+++ b/src/duckstation-qt/displaysettingswidget.ui
@@ -92,6 +92,13 @@
+ -
+
+
+ Use Software Renderer For Readbacks
+
+
+
diff --git a/src/frontend-common/fullscreen_ui.cpp b/src/frontend-common/fullscreen_ui.cpp
index 09f9a045d..8150eeba8 100644
--- a/src/frontend-common/fullscreen_ui.cpp
+++ b/src/frontend-common/fullscreen_ui.cpp
@@ -1983,11 +1983,6 @@ void DrawSettingsWindow()
OpenChoiceDialog(ICON_FA_TV " Fullscreen Resolution", false, std::move(options), std::move(callback));
}
- settings_changed |=
- ToggleButton("Enable VSync",
- "Synchronizes presentation of the console's frames to the host. Enable for smoother animations.",
- &s_settings_copy.video_sync_enabled);
-
switch (s_settings_copy.gpu_renderer)
{
#ifdef WIN32
@@ -2024,6 +2019,20 @@ void DrawSettingsWindow()
break;
}
+ if (!s_settings_copy.IsUsingSoftwareRenderer())
+ {
+ settings_changed |=
+ ToggleButton("Use Software Renderer For Readbacks",
+ "Runs the software renderer in parallel for VRAM readbacks. On some systems, this may result "
+ "in greater performance.",
+ &s_settings_copy.gpu_use_software_renderer_for_readbacks);
+ }
+
+ settings_changed |=
+ ToggleButton("Enable VSync",
+ "Synchronizes presentation of the console's frames to the host. Enable for smoother animations.",
+ &s_settings_copy.video_sync_enabled);
+
settings_changed |= ToggleButton("Optimal Frame Pacing",
"Ensures every frame generated is displayed for optimal pacing. Disable if "
"you are having speed or sound issues.",