From e2c894cf551078d1cae4446146a2bce1c739580d Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Wed, 10 Feb 2021 01:25:07 +1000 Subject: [PATCH] GPU: Add delay VRAM reads hack --- src/core/gpu.cpp | 14 ++- src/core/gpu.h | 3 +- src/core/gpu_commands.cpp | 3 +- src/core/gpu_hw.h | 5 +- src/core/gpu_hw_d3d11.cpp | 6 +- src/core/gpu_hw_d3d11.h | 2 +- src/core/gpu_hw_opengl.cpp | 85 +++++++++++++++---- src/core/gpu_hw_opengl.h | 9 +- src/core/gpu_hw_vulkan.cpp | 65 ++++++++++---- src/core/gpu_hw_vulkan.h | 9 +- src/core/gpu_sw.cpp | 2 +- src/core/gpu_sw.h | 2 +- src/core/settings.cpp | 2 + src/core/settings.h | 1 + src/duckstation-qt/advancedsettingswidget.cpp | 5 +- 15 files changed, 161 insertions(+), 52 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index c862ab922..a1a62cb04 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -228,7 +228,7 @@ bool GPU::DoState(StateWrapper& sw, HostDisplayTexture** host_texture, bool upda } else { - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true); sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); } } @@ -713,6 +713,11 @@ void GPU::UpdateCRTCDisplayParameters() } } +void GPU::UpdateDelayedVRAMReadBuffer() +{ + // noop +} + TickCount GPU::GetPendingCRTCTicks() const { const TickCount pending_sysclk_ticks = m_crtc_tick_event->GetTicksSinceLastExecution(); @@ -1079,6 +1084,9 @@ void GPU::WriteGP1(u32 value) m_crtc_state.regs.display_address_start = new_value; UpdateCRTCDisplayParameters(); } + + if (g_settings.gpu_delay_vram_reads) + UpdateDelayedVRAMReadBuffer(); } break; @@ -1241,7 +1249,7 @@ void GPU::ClearDisplay() {} void GPU::UpdateDisplay() {} -void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) {} +void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) {} void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { @@ -1467,7 +1475,7 @@ void GPU::SetTextureWindow(u32 value) bool GPU::DumpVRAMToFile(const char* filename) { - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true); const char* extension = std::strrchr(filename, '.'); if (extension && StringUtil::Strcasecmp(extension, ".png") == 0) diff --git a/src/core/gpu.h b/src/core/gpu.h index 41424c41e..97547036d 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -288,7 +288,7 @@ protected: void HandleGetGPUInfoCommand(u32 value); // Rendering in the backend - virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height); + virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay); virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color); virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask); virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); @@ -297,6 +297,7 @@ protected: virtual void ClearDisplay(); virtual void UpdateDisplay(); virtual void DrawRendererStats(bool is_idle_frame); + virtual void UpdateDelayedVRAMReadBuffer(); ALWAYS_INLINE void AddDrawTriangleTicks(s32 x1, s32 y1, s32 x2, s32 y2, s32 x3, s32 y3, bool shaded, bool textured, bool semitransparent) diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index fea325276..9d96ff87f 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -571,7 +571,8 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand() FlushRender(); // ensure VRAM shadow is up to date - ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); + ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height, + !g_settings.gpu_delay_vram_reads); if (g_settings.debugging.dump_vram_to_cpu_copies) { diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index f28e2df47..109bb5bab 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -42,11 +42,14 @@ protected: enum : u32 { VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32), + VRAM_READ_TEXTURE_WIDTH = VRAM_WIDTH / 2, + VRAM_READ_TEXTURE_HEIGHT = VRAM_HEIGHT, VERTEX_BUFFER_SIZE = 1 * 1024 * 1024, UNIFORM_BUFFER_SIZE = 512 * 1024, MAX_BATCH_VERTEX_COUNTER_IDS = 65536 - 2, MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) * - (((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u) + (((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u), + NUM_VRAM_STAGING_TEXTURES_IN_DELAYED_MODE = 2, }; struct BatchVertex diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index da0e8b2fe..08a02c603 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -170,7 +170,7 @@ void GPU_HW_D3D11::UpdateSettings() if (framebuffer_changed) { RestoreGraphicsAPIState(); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true); ResetGraphicsAPIState(); m_host_display->ClearDisplayTexture(); CreateFramebuffer(); @@ -933,7 +933,7 @@ void GPU_HW_D3D11::UpdateDisplay() } } -void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) { // Get bounds with wrap-around handled. const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); @@ -973,7 +973,7 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { // CPU round trip if oversized for now. Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true); GPU::FillVRAM(x, y, width, height, color); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); return; diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index fab7a7e7f..6ec7a36fd 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -31,7 +31,7 @@ public: protected: void ClearDisplay() override; void UpdateDisplay() override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 7502f2cfe..e0bb30f91 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -245,7 +245,7 @@ void GPU_HW_OpenGL::UpdateSettings() if (framebuffer_changed) { RestoreGraphicsAPIState(); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true); ResetGraphicsAPIState(); m_host_display->ClearDisplayTexture(); CreateFramebuffer(); @@ -394,8 +394,8 @@ bool GPU_HW_OpenGL::CreateFramebuffer() !m_vram_read_texture.Create(texture_width, texture_height, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true) || !m_vram_read_texture.CreateFramebuffer() || - !m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, - false) || + !m_vram_encoding_texture.Create(VRAM_READ_TEXTURE_WIDTH, VRAM_READ_TEXTURE_HEIGHT, 1, GL_RGBA8, GL_RGBA, + GL_UNSIGNED_BYTE, nullptr, false) || !m_vram_encoding_texture.CreateFramebuffer() || !m_display_texture.Create(GPU_MAX_DISPLAY_WIDTH * m_resolution_scale, GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) || @@ -426,6 +426,17 @@ bool GPU_HW_OpenGL::CreateFramebuffer() if (m_state_copy_fbo_id == 0) glGenFramebuffers(1, &m_state_copy_fbo_id); + if (g_settings.gpu_delay_vram_reads) + { + for (GL::StagingTexture& tex : m_delayed_vram_read_buffer) + { + if (!tex.Create(VRAM_READ_TEXTURE_WIDTH, VRAM_READ_TEXTURE_HEIGHT, GL_RGBA8, true)) + return false; + } + + m_current_delayed_vram_read_buffer = 0; + } + SetFullVRAMDirtyRectangle(); return true; } @@ -971,10 +982,8 @@ void GPU_HW_OpenGL::UpdateDisplay() } } -void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +void GPU_HW_OpenGL::DoVRAMReadback(const Common::Rectangle& copy_rect, u32 dst_x, u32 dst_y) { - // Get bounds with wrap-around handled. - const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; const u32 encoded_height = copy_rect.GetHeight(); @@ -985,21 +994,61 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height) m_vram_texture.Bind(); m_vram_read_program.Bind(); UploadUniformBuffer(uniforms, sizeof(uniforms)); + glDisable(GL_DEPTH_TEST); glDisable(GL_BLEND); glDisable(GL_SCISSOR_TEST); - glViewport(0, 0, encoded_width, encoded_height); + glViewport(dst_x, dst_y, encoded_width, encoded_height); glBindVertexArray(m_attributeless_vao_id); glDrawArrays(GL_TRIANGLES, 0, 3); - // Readback encoded texture. - m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); - glPixelStorei(GL_PACK_ALIGNMENT, 2); - glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2); - glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE, - &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); - glPixelStorei(GL_PACK_ALIGNMENT, 4); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); - RestoreGraphicsAPIState(); + SetBlendMode(); + SetDepthFunc(); + glEnable(GL_SCISSOR_TEST); + glBindVertexArray(m_vao_id); + glViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id); +} + +void GPU_HW_OpenGL::UpdateDelayedVRAMReadBuffer() +{ + DoVRAMReadback(Common::Rectangle(0, 0, VRAM_WIDTH, VRAM_HEIGHT), 0, 0); + + GL::StagingTexture& st = m_delayed_vram_read_buffer[m_current_delayed_vram_read_buffer]; + st.CopyFromTexture(m_vram_encoding_texture, 0, 0, 0, 0, 0, 0, VRAM_READ_TEXTURE_WIDTH, VRAM_READ_TEXTURE_HEIGHT); + + m_vram_read_texture.Bind(); + + m_current_delayed_vram_read_buffer = + (m_current_delayed_vram_read_buffer + 1) % NUM_VRAM_STAGING_TEXTURES_IN_DELAYED_MODE; +} + +void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) +{ + // Get bounds with wrap-around handled. + Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); + copy_rect.left &= ~static_cast(1); + + const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; + const u32 encoded_height = copy_rect.GetHeight(); + if (no_delay) + { + DoVRAMReadback(copy_rect, 0, 0); + + // Readback encoded texture. + m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); + glPixelStorei(GL_PACK_ALIGNMENT, 2); + glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2); + glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE, + &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); + glPixelStorei(GL_PACK_ALIGNMENT, 4); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); + } + else + { + GL::StagingTexture& st = m_delayed_vram_read_buffer[m_current_delayed_vram_read_buffer]; + st.ReadTexels(copy_rect.left / 2, copy_rect.top, encoded_width, encoded_height, + &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left], VRAM_WIDTH * sizeof(u16)); + } } void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) @@ -1008,7 +1057,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { // CPU round trip if oversized for now. Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true); GPU::FillVRAM(x, y, width, height, color); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); return; @@ -1100,7 +1149,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* { // CPU round trip if oversized for now. Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true); GPU::UpdateVRAM(x, y, width, height, data, set_mask, check_mask); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); return; diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index fec8b003d..7f283460f 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -1,6 +1,7 @@ #pragma once #include "common/gl/program.h" #include "common/gl/shader_cache.h" +#include "common/gl/staging_texture.h" #include "common/gl/stream_buffer.h" #include "common/gl/texture.h" #include "glad.h" @@ -27,13 +28,14 @@ public: protected: void ClearDisplay() override; void UpdateDisplay() override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void UpdateVRAMReadTexture() override; void UpdateDepthBufferFromMaskBit() override; void ClearDepthBuffer() override; + void UpdateDelayedVRAMReadBuffer() override; void SetScissorFromDrawingArea() override; void MapBatchVertexPointer(u32 required_vertices) override; void UnmapBatchVertexPointer(u32 used_vertices) override; @@ -67,6 +69,8 @@ private: bool CompilePrograms(); + void DoVRAMReadback(const Common::Rectangle& copy_rect, u32 dst_x, u32 dst_y); + void SetDepthFunc(); void SetDepthFunc(GLenum func); void SetBlendMode(); @@ -116,4 +120,7 @@ private: GL::Texture m_downsample_texture; GL::Program m_downsample_program; + + std::array m_delayed_vram_read_buffer; + u32 m_current_delayed_vram_read_buffer = 0; }; diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp index d58b5d845..aeab31bfa 100644 --- a/src/core/gpu_hw_vulkan.cpp +++ b/src/core/gpu_hw_vulkan.cpp @@ -201,7 +201,7 @@ void GPU_HW_Vulkan::UpdateSettings() if (framebuffer_changed) { RestoreGraphicsAPIState(); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true); ResetGraphicsAPIState(); } @@ -528,15 +528,20 @@ bool GPU_HW_Vulkan::CreateFramebuffer() VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || - !m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, + !m_vram_readback_texture.Create(VRAM_WIDTH / 2u, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) || - !m_vram_readback_staging_texture.Create(Vulkan::StagingBuffer::Type::Readback, texture_format, VRAM_WIDTH / 2, - VRAM_HEIGHT)) + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)) { return false; } + for (Vulkan::StagingTexture& st : m_vram_readback_staging_textures) + { + if (!st.Create(Vulkan::StagingBuffer::Type::Readback, texture_format, VRAM_WIDTH / 2u, VRAM_HEIGHT)) + return false; + } + m_current_vram_readback_staging_texture = 0; + m_vram_render_pass = g_vulkan_context->GetRenderPass(texture_format, depth_format, samples, VK_ATTACHMENT_LOAD_OP_LOAD); m_vram_update_depth_render_pass = @@ -757,7 +762,10 @@ void GPU_HW_Vulkan::DestroyFramebuffer() m_vram_texture.Destroy(false); m_vram_readback_texture.Destroy(false); m_display_texture.Destroy(false); - m_vram_readback_staging_texture.Destroy(false); + + for (auto& it : m_vram_readback_staging_textures) + it.Destroy(false); + m_current_vram_readback_staging_texture = 0; } bool GPU_HW_Vulkan::CreateVertexBuffer() @@ -1417,15 +1425,11 @@ void GPU_HW_Vulkan::UpdateDisplay() } } -void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +void GPU_HW_Vulkan::DoVRAMReadback(Vulkan::StagingTexture& staging_texture, const Common::Rectangle& copy_rect, u32 dst_x, u32 dst_y) { - // Get bounds with wrap-around handled. - const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; const u32 encoded_height = copy_rect.GetHeight(); - EndRenderPass(); - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); @@ -1442,7 +1446,7 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height) uniforms); vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, &m_vram_read_descriptor_set, 0, nullptr); - Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, encoded_width, encoded_height); + Vulkan::Util::SetViewportAndScissor(cmdbuf, dst_x, dst_y, encoded_width, encoded_height); vkCmdDraw(cmdbuf, 3, 1, 0, 0); EndRenderPass(); @@ -1451,13 +1455,38 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height) m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); // Stage the readback. - m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, 0, encoded_width, - encoded_height); + staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, 0, encoded_width, encoded_height); +} + +void GPU_HW_Vulkan::UpdateDelayedVRAMReadBuffer() +{ + EndRenderPass(); + + DoVRAMReadback(m_vram_readback_staging_textures[m_current_vram_readback_staging_texture], + Common::Rectangle(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); + m_current_vram_readback_staging_texture = + (m_current_vram_readback_staging_texture + 1) % NUM_VRAM_STAGING_TEXTURES_IN_DELAYED_MODE; + + RestoreGraphicsAPIState(); +} + +void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) +{ + EndRenderPass(); + + // Get bounds with wrap-around handled. + Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); + copy_rect.left &= ~static_cast(1); + + Vulkan::StagingTexture& staging_texture = m_vram_readback_staging_textures[m_current_vram_readback_staging_texture]; + if (no_delay) + DoVRAMReadback(staging_texture, Common::Rectangle(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); // And copy it into our shadow buffer (will execute command buffer and stall). - m_vram_readback_staging_texture.ReadTexels(0, 0, encoded_width, encoded_height, - &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left], - VRAM_WIDTH * sizeof(u16)); + const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; + const u32 encoded_height = copy_rect.GetHeight(); + staging_texture.ReadTexels(copy_rect.left / 2u, copy_rect.top, encoded_width, encoded_height, + &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left], VRAM_WIDTH * sizeof(u16)); RestoreGraphicsAPIState(); } @@ -1468,7 +1497,7 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { // CPU round trip if oversized for now. Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true); GPU::FillVRAM(x, y, width, height, color); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); return; diff --git a/src/core/gpu_hw_vulkan.h b/src/core/gpu_hw_vulkan.h index 5d7ccb846..52a4e6312 100644 --- a/src/core/gpu_hw_vulkan.h +++ b/src/core/gpu_hw_vulkan.h @@ -26,7 +26,7 @@ public: protected: void ClearDisplay() override; void UpdateDisplay() override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; @@ -38,6 +38,7 @@ protected: void UnmapBatchVertexPointer(u32 used_vertices) override; void UploadUniformBuffer(const void* data, u32 data_size) override; void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override; + void UpdateDelayedVRAMReadBuffer(); private: enum : u32 @@ -67,6 +68,9 @@ private: bool CompilePipelines(); void DestroyPipelines(); + void DoVRAMReadback(Vulkan::StagingTexture& staging_texture, const Common::Rectangle& copy_rect, u32 dst_x, + u32 dst_y); + bool CreateTextureReplacementStreamBuffer(); bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); @@ -95,9 +99,10 @@ private: Vulkan::Texture m_vram_depth_texture; Vulkan::Texture m_vram_read_texture; Vulkan::Texture m_vram_readback_texture; - Vulkan::StagingTexture m_vram_readback_staging_texture; + std::array m_vram_readback_staging_textures; Vulkan::Texture m_display_texture; bool m_use_ssbos_for_vram_writes = false; + u32 m_current_vram_readback_staging_texture = 0; VkFramebuffer m_vram_framebuffer = VK_NULL_HANDLE; VkFramebuffer m_vram_update_depth_framebuffer = VK_NULL_HANDLE; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 0242b0cb9..819dc317e 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -836,7 +836,7 @@ void GPU_SW::DispatchRenderCommand() } } -void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) { m_backend.Sync(); } diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index 1d14fb3e0..17acc4e17 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -23,7 +23,7 @@ public: void UpdateSettings() override; protected: - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 31c474c9a..315dd1373 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -164,6 +164,7 @@ void Settings::Load(SettingsInterface& si) gpu_disable_interlacing = si.GetBoolValue("GPU", "DisableInterlacing", true); gpu_force_ntsc_timings = si.GetBoolValue("GPU", "ForceNTSCTimings", false); gpu_widescreen_hack = si.GetBoolValue("GPU", "WidescreenHack", false); + gpu_delay_vram_reads = si.GetBoolValue("GPU", "DelayVRAMReads", false); gpu_24bit_chroma_smoothing = si.GetBoolValue("GPU", "ChromaSmoothing24Bit", false); gpu_pgxp_enable = si.GetBoolValue("GPU", "PGXPEnable", false); gpu_pgxp_culling = si.GetBoolValue("GPU", "PGXPCulling", true); @@ -328,6 +329,7 @@ void Settings::Save(SettingsInterface& si) const si.SetBoolValue("GPU", "DisableInterlacing", gpu_disable_interlacing); si.SetBoolValue("GPU", "ForceNTSCTimings", gpu_force_ntsc_timings); si.SetBoolValue("GPU", "WidescreenHack", gpu_widescreen_hack); + si.SetBoolValue("GPU", "DelayVRAMReads", gpu_delay_vram_reads); si.SetBoolValue("GPU", "ChromaSmoothing24Bit", gpu_24bit_chroma_smoothing); si.SetBoolValue("GPU", "PGXPEnable", gpu_pgxp_enable); si.SetBoolValue("GPU", "PGXPCulling", gpu_pgxp_culling); diff --git a/src/core/settings.h b/src/core/settings.h index 5b4b2b42a..f0fa1c280 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -116,6 +116,7 @@ struct Settings bool gpu_disable_interlacing = false; bool gpu_force_ntsc_timings = false; bool gpu_widescreen_hack = false; + bool gpu_delay_vram_reads = true; bool gpu_pgxp_enable = false; bool gpu_pgxp_culling = true; bool gpu_pgxp_texture_correction = true; diff --git a/src/duckstation-qt/advancedsettingswidget.cpp b/src/duckstation-qt/advancedsettingswidget.cpp index 691fe9816..fb3d8d34a 100644 --- a/src/duckstation-qt/advancedsettingswidget.cpp +++ b/src/duckstation-qt/advancedsettingswidget.cpp @@ -192,6 +192,8 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface, 1000, Settings::DEFAULT_GPU_MAX_RUN_AHEAD); addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Use Debug Host GPU Device"), "GPU", "UseDebugDevice", false); + addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Delay VRAM Reads (Hack)"), "GPU", "DelayVRAMReads", + false); addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Increase Timer Resolution"), "Main", "IncreaseTimerResolution", true); @@ -235,5 +237,6 @@ void AdvancedSettingsWidget::onResetToDefaultClicked() setIntRangeTweakOption(m_ui.tweakOptionTable, 18, static_cast(Settings::DEFAULT_GPU_FIFO_SIZE)); setIntRangeTweakOption(m_ui.tweakOptionTable, 19, static_cast(Settings::DEFAULT_GPU_MAX_RUN_AHEAD)); setBooleanTweakOption(m_ui.tweakOptionTable, 20, false); - setBooleanTweakOption(m_ui.tweakOptionTable, 21, true); + setBooleanTweakOption(m_ui.tweakOptionTable, 21, false); + setBooleanTweakOption(m_ui.tweakOptionTable, 22, true); }