GPU: Add delay VRAM reads hack

This commit is contained in:
Connor McLaughlin 2021-02-10 01:25:07 +10:00
parent 53abc4cfff
commit e2c894cf55
15 changed files with 161 additions and 52 deletions

View File

@ -228,7 +228,7 @@ bool GPU::DoState(StateWrapper& sw, HostDisplayTexture** host_texture, bool upda
} }
else else
{ {
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true);
sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
} }
} }
@ -713,6 +713,11 @@ void GPU::UpdateCRTCDisplayParameters()
} }
} }
void GPU::UpdateDelayedVRAMReadBuffer()
{
// noop
}
TickCount GPU::GetPendingCRTCTicks() const TickCount GPU::GetPendingCRTCTicks() const
{ {
const TickCount pending_sysclk_ticks = m_crtc_tick_event->GetTicksSinceLastExecution(); const TickCount pending_sysclk_ticks = m_crtc_tick_event->GetTicksSinceLastExecution();
@ -1079,6 +1084,9 @@ void GPU::WriteGP1(u32 value)
m_crtc_state.regs.display_address_start = new_value; m_crtc_state.regs.display_address_start = new_value;
UpdateCRTCDisplayParameters(); UpdateCRTCDisplayParameters();
} }
if (g_settings.gpu_delay_vram_reads)
UpdateDelayedVRAMReadBuffer();
} }
break; break;
@ -1241,7 +1249,7 @@ void GPU::ClearDisplay() {}
void GPU::UpdateDisplay() {} void GPU::UpdateDisplay() {}
void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) {} void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) {}
void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{ {
@ -1467,7 +1475,7 @@ void GPU::SetTextureWindow(u32 value)
bool GPU::DumpVRAMToFile(const char* filename) bool GPU::DumpVRAMToFile(const char* filename)
{ {
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true);
const char* extension = std::strrchr(filename, '.'); const char* extension = std::strrchr(filename, '.');
if (extension && StringUtil::Strcasecmp(extension, ".png") == 0) if (extension && StringUtil::Strcasecmp(extension, ".png") == 0)

View File

@ -288,7 +288,7 @@ protected:
void HandleGetGPUInfoCommand(u32 value); void HandleGetGPUInfoCommand(u32 value);
// Rendering in the backend // Rendering in the backend
virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height); virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay);
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color); virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color);
virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask); virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask);
virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height);
@ -297,6 +297,7 @@ protected:
virtual void ClearDisplay(); virtual void ClearDisplay();
virtual void UpdateDisplay(); virtual void UpdateDisplay();
virtual void DrawRendererStats(bool is_idle_frame); virtual void DrawRendererStats(bool is_idle_frame);
virtual void UpdateDelayedVRAMReadBuffer();
ALWAYS_INLINE void AddDrawTriangleTicks(s32 x1, s32 y1, s32 x2, s32 y2, s32 x3, s32 y3, bool shaded, bool textured, ALWAYS_INLINE void AddDrawTriangleTicks(s32 x1, s32 y1, s32 x2, s32 y2, s32 x3, s32 y3, bool shaded, bool textured,
bool semitransparent) bool semitransparent)

View File

@ -571,7 +571,8 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand()
FlushRender(); FlushRender();
// ensure VRAM shadow is up to date // ensure VRAM shadow is up to date
ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height,
!g_settings.gpu_delay_vram_reads);
if (g_settings.debugging.dump_vram_to_cpu_copies) if (g_settings.debugging.dump_vram_to_cpu_copies)
{ {

View File

@ -42,11 +42,14 @@ protected:
enum : u32 enum : u32
{ {
VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32), VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32),
VRAM_READ_TEXTURE_WIDTH = VRAM_WIDTH / 2,
VRAM_READ_TEXTURE_HEIGHT = VRAM_HEIGHT,
VERTEX_BUFFER_SIZE = 1 * 1024 * 1024, VERTEX_BUFFER_SIZE = 1 * 1024 * 1024,
UNIFORM_BUFFER_SIZE = 512 * 1024, UNIFORM_BUFFER_SIZE = 512 * 1024,
MAX_BATCH_VERTEX_COUNTER_IDS = 65536 - 2, MAX_BATCH_VERTEX_COUNTER_IDS = 65536 - 2,
MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) * MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) *
(((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u) (((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u),
NUM_VRAM_STAGING_TEXTURES_IN_DELAYED_MODE = 2,
}; };
struct BatchVertex struct BatchVertex

View File

@ -170,7 +170,7 @@ void GPU_HW_D3D11::UpdateSettings()
if (framebuffer_changed) if (framebuffer_changed)
{ {
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true);
ResetGraphicsAPIState(); ResetGraphicsAPIState();
m_host_display->ClearDisplayTexture(); m_host_display->ClearDisplayTexture();
CreateFramebuffer(); CreateFramebuffer();
@ -933,7 +933,7 @@ void GPU_HW_D3D11::UpdateDisplay()
} }
} }
void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height) void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay)
{ {
// Get bounds with wrap-around handled. // Get bounds with wrap-around handled.
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height); const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
@ -973,7 +973,7 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{ {
// CPU round trip if oversized for now. // CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true);
GPU::FillVRAM(x, y, width, height, color); GPU::FillVRAM(x, y, width, height, color);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false);
return; return;

View File

@ -31,7 +31,7 @@ public:
protected: protected:
void ClearDisplay() override; void ClearDisplay() override;
void UpdateDisplay() override; void UpdateDisplay() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;

View File

@ -245,7 +245,7 @@ void GPU_HW_OpenGL::UpdateSettings()
if (framebuffer_changed) if (framebuffer_changed)
{ {
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true);
ResetGraphicsAPIState(); ResetGraphicsAPIState();
m_host_display->ClearDisplayTexture(); m_host_display->ClearDisplayTexture();
CreateFramebuffer(); CreateFramebuffer();
@ -394,8 +394,8 @@ bool GPU_HW_OpenGL::CreateFramebuffer()
!m_vram_read_texture.Create(texture_width, texture_height, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, !m_vram_read_texture.Create(texture_width, texture_height, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false,
true) || true) ||
!m_vram_read_texture.CreateFramebuffer() || !m_vram_read_texture.CreateFramebuffer() ||
!m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, !m_vram_encoding_texture.Create(VRAM_READ_TEXTURE_WIDTH, VRAM_READ_TEXTURE_HEIGHT, 1, GL_RGBA8, GL_RGBA,
false) || GL_UNSIGNED_BYTE, nullptr, false) ||
!m_vram_encoding_texture.CreateFramebuffer() || !m_vram_encoding_texture.CreateFramebuffer() ||
!m_display_texture.Create(GPU_MAX_DISPLAY_WIDTH * m_resolution_scale, GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, !m_display_texture.Create(GPU_MAX_DISPLAY_WIDTH * m_resolution_scale, GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale,
1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) || 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) ||
@ -426,6 +426,17 @@ bool GPU_HW_OpenGL::CreateFramebuffer()
if (m_state_copy_fbo_id == 0) if (m_state_copy_fbo_id == 0)
glGenFramebuffers(1, &m_state_copy_fbo_id); glGenFramebuffers(1, &m_state_copy_fbo_id);
if (g_settings.gpu_delay_vram_reads)
{
for (GL::StagingTexture& tex : m_delayed_vram_read_buffer)
{
if (!tex.Create(VRAM_READ_TEXTURE_WIDTH, VRAM_READ_TEXTURE_HEIGHT, GL_RGBA8, true))
return false;
}
m_current_delayed_vram_read_buffer = 0;
}
SetFullVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle();
return true; return true;
} }
@ -971,10 +982,8 @@ void GPU_HW_OpenGL::UpdateDisplay()
} }
} }
void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height) void GPU_HW_OpenGL::DoVRAMReadback(const Common::Rectangle<u32>& copy_rect, u32 dst_x, u32 dst_y)
{ {
// Get bounds with wrap-around handled.
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
const u32 encoded_height = copy_rect.GetHeight(); const u32 encoded_height = copy_rect.GetHeight();
@ -985,21 +994,61 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
m_vram_texture.Bind(); m_vram_texture.Bind();
m_vram_read_program.Bind(); m_vram_read_program.Bind();
UploadUniformBuffer(uniforms, sizeof(uniforms)); UploadUniformBuffer(uniforms, sizeof(uniforms));
glDisable(GL_DEPTH_TEST);
glDisable(GL_BLEND); glDisable(GL_BLEND);
glDisable(GL_SCISSOR_TEST); glDisable(GL_SCISSOR_TEST);
glViewport(0, 0, encoded_width, encoded_height); glViewport(dst_x, dst_y, encoded_width, encoded_height);
glBindVertexArray(m_attributeless_vao_id); glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3); glDrawArrays(GL_TRIANGLES, 0, 3);
// Readback encoded texture. SetBlendMode();
m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); SetDepthFunc();
glPixelStorei(GL_PACK_ALIGNMENT, 2); glEnable(GL_SCISSOR_TEST);
glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2); glBindVertexArray(m_vao_id);
glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE, glViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id);
glPixelStorei(GL_PACK_ALIGNMENT, 4); }
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
RestoreGraphicsAPIState(); void GPU_HW_OpenGL::UpdateDelayedVRAMReadBuffer()
{
DoVRAMReadback(Common::Rectangle<u32>(0, 0, VRAM_WIDTH, VRAM_HEIGHT), 0, 0);
GL::StagingTexture& st = m_delayed_vram_read_buffer[m_current_delayed_vram_read_buffer];
st.CopyFromTexture(m_vram_encoding_texture, 0, 0, 0, 0, 0, 0, VRAM_READ_TEXTURE_WIDTH, VRAM_READ_TEXTURE_HEIGHT);
m_vram_read_texture.Bind();
m_current_delayed_vram_read_buffer =
(m_current_delayed_vram_read_buffer + 1) % NUM_VRAM_STAGING_TEXTURES_IN_DELAYED_MODE;
}
void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay)
{
// Get bounds with wrap-around handled.
Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
copy_rect.left &= ~static_cast<u32>(1);
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
const u32 encoded_height = copy_rect.GetHeight();
if (no_delay)
{
DoVRAMReadback(copy_rect, 0, 0);
// Readback encoded texture.
m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
glPixelStorei(GL_PACK_ALIGNMENT, 2);
glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2);
glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE,
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]);
glPixelStorei(GL_PACK_ALIGNMENT, 4);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
}
else
{
GL::StagingTexture& st = m_delayed_vram_read_buffer[m_current_delayed_vram_read_buffer];
st.ReadTexels(copy_rect.left / 2, copy_rect.top, encoded_width, encoded_height,
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left], VRAM_WIDTH * sizeof(u16));
}
} }
void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
@ -1008,7 +1057,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{ {
// CPU round trip if oversized for now. // CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true);
GPU::FillVRAM(x, y, width, height, color); GPU::FillVRAM(x, y, width, height, color);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false);
return; return;
@ -1100,7 +1149,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
{ {
// CPU round trip if oversized for now. // CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true);
GPU::UpdateVRAM(x, y, width, height, data, set_mask, check_mask); GPU::UpdateVRAM(x, y, width, height, data, set_mask, check_mask);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false);
return; return;

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include "common/gl/program.h" #include "common/gl/program.h"
#include "common/gl/shader_cache.h" #include "common/gl/shader_cache.h"
#include "common/gl/staging_texture.h"
#include "common/gl/stream_buffer.h" #include "common/gl/stream_buffer.h"
#include "common/gl/texture.h" #include "common/gl/texture.h"
#include "glad.h" #include "glad.h"
@ -27,13 +28,14 @@ public:
protected: protected:
void ClearDisplay() override; void ClearDisplay() override;
void UpdateDisplay() override; void UpdateDisplay() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void UpdateVRAMReadTexture() override; void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override; void UpdateDepthBufferFromMaskBit() override;
void ClearDepthBuffer() override; void ClearDepthBuffer() override;
void UpdateDelayedVRAMReadBuffer() override;
void SetScissorFromDrawingArea() override; void SetScissorFromDrawingArea() override;
void MapBatchVertexPointer(u32 required_vertices) override; void MapBatchVertexPointer(u32 required_vertices) override;
void UnmapBatchVertexPointer(u32 used_vertices) override; void UnmapBatchVertexPointer(u32 used_vertices) override;
@ -67,6 +69,8 @@ private:
bool CompilePrograms(); bool CompilePrograms();
void DoVRAMReadback(const Common::Rectangle<u32>& copy_rect, u32 dst_x, u32 dst_y);
void SetDepthFunc(); void SetDepthFunc();
void SetDepthFunc(GLenum func); void SetDepthFunc(GLenum func);
void SetBlendMode(); void SetBlendMode();
@ -116,4 +120,7 @@ private:
GL::Texture m_downsample_texture; GL::Texture m_downsample_texture;
GL::Program m_downsample_program; GL::Program m_downsample_program;
std::array<GL::StagingTexture, NUM_VRAM_STAGING_TEXTURES_IN_DELAYED_MODE> m_delayed_vram_read_buffer;
u32 m_current_delayed_vram_read_buffer = 0;
}; };

View File

@ -201,7 +201,7 @@ void GPU_HW_Vulkan::UpdateSettings()
if (framebuffer_changed) if (framebuffer_changed)
{ {
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true);
ResetGraphicsAPIState(); ResetGraphicsAPIState();
} }
@ -528,15 +528,20 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) ||
!m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, !m_vram_readback_texture.Create(VRAM_WIDTH / 2u, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT,
VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) || VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT))
!m_vram_readback_staging_texture.Create(Vulkan::StagingBuffer::Type::Readback, texture_format, VRAM_WIDTH / 2,
VRAM_HEIGHT))
{ {
return false; return false;
} }
for (Vulkan::StagingTexture& st : m_vram_readback_staging_textures)
{
if (!st.Create(Vulkan::StagingBuffer::Type::Readback, texture_format, VRAM_WIDTH / 2u, VRAM_HEIGHT))
return false;
}
m_current_vram_readback_staging_texture = 0;
m_vram_render_pass = m_vram_render_pass =
g_vulkan_context->GetRenderPass(texture_format, depth_format, samples, VK_ATTACHMENT_LOAD_OP_LOAD); g_vulkan_context->GetRenderPass(texture_format, depth_format, samples, VK_ATTACHMENT_LOAD_OP_LOAD);
m_vram_update_depth_render_pass = m_vram_update_depth_render_pass =
@ -757,7 +762,10 @@ void GPU_HW_Vulkan::DestroyFramebuffer()
m_vram_texture.Destroy(false); m_vram_texture.Destroy(false);
m_vram_readback_texture.Destroy(false); m_vram_readback_texture.Destroy(false);
m_display_texture.Destroy(false); m_display_texture.Destroy(false);
m_vram_readback_staging_texture.Destroy(false);
for (auto& it : m_vram_readback_staging_textures)
it.Destroy(false);
m_current_vram_readback_staging_texture = 0;
} }
bool GPU_HW_Vulkan::CreateVertexBuffer() bool GPU_HW_Vulkan::CreateVertexBuffer()
@ -1417,15 +1425,11 @@ void GPU_HW_Vulkan::UpdateDisplay()
} }
} }
void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height) void GPU_HW_Vulkan::DoVRAMReadback(Vulkan::StagingTexture& staging_texture, const Common::Rectangle<u32>& copy_rect, u32 dst_x, u32 dst_y)
{ {
// Get bounds with wrap-around handled.
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
const u32 encoded_height = copy_rect.GetHeight(); const u32 encoded_height = copy_rect.GetHeight();
EndRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
@ -1442,7 +1446,7 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
uniforms); uniforms);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
&m_vram_read_descriptor_set, 0, nullptr); &m_vram_read_descriptor_set, 0, nullptr);
Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, encoded_width, encoded_height); Vulkan::Util::SetViewportAndScissor(cmdbuf, dst_x, dst_y, encoded_width, encoded_height);
vkCmdDraw(cmdbuf, 3, 1, 0, 0); vkCmdDraw(cmdbuf, 3, 1, 0, 0);
EndRenderPass(); EndRenderPass();
@ -1451,13 +1455,38 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
// Stage the readback. // Stage the readback.
m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, 0, encoded_width, staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, 0, encoded_width, encoded_height);
encoded_height); }
void GPU_HW_Vulkan::UpdateDelayedVRAMReadBuffer()
{
EndRenderPass();
DoVRAMReadback(m_vram_readback_staging_textures[m_current_vram_readback_staging_texture],
Common::Rectangle<u32>(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
m_current_vram_readback_staging_texture =
(m_current_vram_readback_staging_texture + 1) % NUM_VRAM_STAGING_TEXTURES_IN_DELAYED_MODE;
RestoreGraphicsAPIState();
}
void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay)
{
EndRenderPass();
// Get bounds with wrap-around handled.
Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
copy_rect.left &= ~static_cast<u32>(1);
Vulkan::StagingTexture& staging_texture = m_vram_readback_staging_textures[m_current_vram_readback_staging_texture];
if (no_delay)
DoVRAMReadback(staging_texture, Common::Rectangle<u32>(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
// And copy it into our shadow buffer (will execute command buffer and stall). // And copy it into our shadow buffer (will execute command buffer and stall).
m_vram_readback_staging_texture.ReadTexels(0, 0, encoded_width, encoded_height, const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left], const u32 encoded_height = copy_rect.GetHeight();
VRAM_WIDTH * sizeof(u16)); staging_texture.ReadTexels(copy_rect.left / 2u, copy_rect.top, encoded_width, encoded_height,
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left], VRAM_WIDTH * sizeof(u16));
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
} }
@ -1468,7 +1497,7 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{ {
// CPU round trip if oversized for now. // CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, true);
GPU::FillVRAM(x, y, width, height, color); GPU::FillVRAM(x, y, width, height, color);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false);
return; return;

View File

@ -26,7 +26,7 @@ public:
protected: protected:
void ClearDisplay() override; void ClearDisplay() override;
void UpdateDisplay() override; void UpdateDisplay() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
@ -38,6 +38,7 @@ protected:
void UnmapBatchVertexPointer(u32 used_vertices) override; void UnmapBatchVertexPointer(u32 used_vertices) override;
void UploadUniformBuffer(const void* data, u32 data_size) override; void UploadUniformBuffer(const void* data, u32 data_size) override;
void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override; void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override;
void UpdateDelayedVRAMReadBuffer();
private: private:
enum : u32 enum : u32
@ -67,6 +68,9 @@ private:
bool CompilePipelines(); bool CompilePipelines();
void DestroyPipelines(); void DestroyPipelines();
void DoVRAMReadback(Vulkan::StagingTexture& staging_texture, const Common::Rectangle<u32>& copy_rect, u32 dst_x,
u32 dst_y);
bool CreateTextureReplacementStreamBuffer(); bool CreateTextureReplacementStreamBuffer();
bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height);
@ -95,9 +99,10 @@ private:
Vulkan::Texture m_vram_depth_texture; Vulkan::Texture m_vram_depth_texture;
Vulkan::Texture m_vram_read_texture; Vulkan::Texture m_vram_read_texture;
Vulkan::Texture m_vram_readback_texture; Vulkan::Texture m_vram_readback_texture;
Vulkan::StagingTexture m_vram_readback_staging_texture; std::array<Vulkan::StagingTexture, NUM_VRAM_STAGING_TEXTURES_IN_DELAYED_MODE> m_vram_readback_staging_textures;
Vulkan::Texture m_display_texture; Vulkan::Texture m_display_texture;
bool m_use_ssbos_for_vram_writes = false; bool m_use_ssbos_for_vram_writes = false;
u32 m_current_vram_readback_staging_texture = 0;
VkFramebuffer m_vram_framebuffer = VK_NULL_HANDLE; VkFramebuffer m_vram_framebuffer = VK_NULL_HANDLE;
VkFramebuffer m_vram_update_depth_framebuffer = VK_NULL_HANDLE; VkFramebuffer m_vram_update_depth_framebuffer = VK_NULL_HANDLE;

View File

@ -836,7 +836,7 @@ void GPU_SW::DispatchRenderCommand()
} }
} }
void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay)
{ {
m_backend.Sync(); m_backend.Sync();
} }

View File

@ -23,7 +23,7 @@ public:
void UpdateSettings() override; void UpdateSettings() override;
protected: protected:
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height, bool no_delay) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;

View File

@ -164,6 +164,7 @@ void Settings::Load(SettingsInterface& si)
gpu_disable_interlacing = si.GetBoolValue("GPU", "DisableInterlacing", true); gpu_disable_interlacing = si.GetBoolValue("GPU", "DisableInterlacing", true);
gpu_force_ntsc_timings = si.GetBoolValue("GPU", "ForceNTSCTimings", false); gpu_force_ntsc_timings = si.GetBoolValue("GPU", "ForceNTSCTimings", false);
gpu_widescreen_hack = si.GetBoolValue("GPU", "WidescreenHack", false); gpu_widescreen_hack = si.GetBoolValue("GPU", "WidescreenHack", false);
gpu_delay_vram_reads = si.GetBoolValue("GPU", "DelayVRAMReads", false);
gpu_24bit_chroma_smoothing = si.GetBoolValue("GPU", "ChromaSmoothing24Bit", false); gpu_24bit_chroma_smoothing = si.GetBoolValue("GPU", "ChromaSmoothing24Bit", false);
gpu_pgxp_enable = si.GetBoolValue("GPU", "PGXPEnable", false); gpu_pgxp_enable = si.GetBoolValue("GPU", "PGXPEnable", false);
gpu_pgxp_culling = si.GetBoolValue("GPU", "PGXPCulling", true); gpu_pgxp_culling = si.GetBoolValue("GPU", "PGXPCulling", true);
@ -328,6 +329,7 @@ void Settings::Save(SettingsInterface& si) const
si.SetBoolValue("GPU", "DisableInterlacing", gpu_disable_interlacing); si.SetBoolValue("GPU", "DisableInterlacing", gpu_disable_interlacing);
si.SetBoolValue("GPU", "ForceNTSCTimings", gpu_force_ntsc_timings); si.SetBoolValue("GPU", "ForceNTSCTimings", gpu_force_ntsc_timings);
si.SetBoolValue("GPU", "WidescreenHack", gpu_widescreen_hack); si.SetBoolValue("GPU", "WidescreenHack", gpu_widescreen_hack);
si.SetBoolValue("GPU", "DelayVRAMReads", gpu_delay_vram_reads);
si.SetBoolValue("GPU", "ChromaSmoothing24Bit", gpu_24bit_chroma_smoothing); si.SetBoolValue("GPU", "ChromaSmoothing24Bit", gpu_24bit_chroma_smoothing);
si.SetBoolValue("GPU", "PGXPEnable", gpu_pgxp_enable); si.SetBoolValue("GPU", "PGXPEnable", gpu_pgxp_enable);
si.SetBoolValue("GPU", "PGXPCulling", gpu_pgxp_culling); si.SetBoolValue("GPU", "PGXPCulling", gpu_pgxp_culling);

View File

@ -116,6 +116,7 @@ struct Settings
bool gpu_disable_interlacing = false; bool gpu_disable_interlacing = false;
bool gpu_force_ntsc_timings = false; bool gpu_force_ntsc_timings = false;
bool gpu_widescreen_hack = false; bool gpu_widescreen_hack = false;
bool gpu_delay_vram_reads = true;
bool gpu_pgxp_enable = false; bool gpu_pgxp_enable = false;
bool gpu_pgxp_culling = true; bool gpu_pgxp_culling = true;
bool gpu_pgxp_texture_correction = true; bool gpu_pgxp_texture_correction = true;

View File

@ -192,6 +192,8 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface,
1000, Settings::DEFAULT_GPU_MAX_RUN_AHEAD); 1000, Settings::DEFAULT_GPU_MAX_RUN_AHEAD);
addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Use Debug Host GPU Device"), "GPU", addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Use Debug Host GPU Device"), "GPU",
"UseDebugDevice", false); "UseDebugDevice", false);
addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Delay VRAM Reads (Hack)"), "GPU", "DelayVRAMReads",
false);
addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Increase Timer Resolution"), "Main", addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Increase Timer Resolution"), "Main",
"IncreaseTimerResolution", true); "IncreaseTimerResolution", true);
@ -235,5 +237,6 @@ void AdvancedSettingsWidget::onResetToDefaultClicked()
setIntRangeTweakOption(m_ui.tweakOptionTable, 18, static_cast<int>(Settings::DEFAULT_GPU_FIFO_SIZE)); setIntRangeTweakOption(m_ui.tweakOptionTable, 18, static_cast<int>(Settings::DEFAULT_GPU_FIFO_SIZE));
setIntRangeTweakOption(m_ui.tweakOptionTable, 19, static_cast<int>(Settings::DEFAULT_GPU_MAX_RUN_AHEAD)); setIntRangeTweakOption(m_ui.tweakOptionTable, 19, static_cast<int>(Settings::DEFAULT_GPU_MAX_RUN_AHEAD));
setBooleanTweakOption(m_ui.tweakOptionTable, 20, false); setBooleanTweakOption(m_ui.tweakOptionTable, 20, false);
setBooleanTweakOption(m_ui.tweakOptionTable, 21, true); setBooleanTweakOption(m_ui.tweakOptionTable, 21, false);
setBooleanTweakOption(m_ui.tweakOptionTable, 22, true);
} }