GPU/OpenGL: Only use one upload path and clamp to max SSBO size

2021-03-18 12:44:45 +10:00 · 2021-03-18 12:44:45 +10:00 · 391114ccae
parent 703715724e
commit 391114ccae
2 changed files with 30 additions and 22 deletions
--- a/src/core/gpu_hw_opengl.cpp
+++ b/src/core/gpu_hw_opengl.cpp
@ -319,22 +319,29 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display)

 #ifdef __APPLE__
  // Partial texture buffer uploads appear to be broken in macOS's OpenGL driver.
-  m_supports_texture_buffer = false;
+  m_use_texture_buffer_for_vram_writes = false;
 #else
-  m_supports_texture_buffer = (GLAD_GL_VERSION_3_1 || GLAD_GL_ES_VERSION_3_2);
+  m_use_texture_buffer_for_vram_writes = (GLAD_GL_VERSION_3_1 || GLAD_GL_ES_VERSION_3_2);
 #endif
-  if (m_supports_texture_buffer)
+  m_texture_stream_buffer_size = VRAM_UPDATE_TEXTURE_BUFFER_SIZE;
+  if (m_use_texture_buffer_for_vram_writes)
  {
-    glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, reinterpret_cast<GLint*>(&m_max_texture_buffer_size));
-    Log_InfoPrintf("Max texel buffer size: %u", m_max_texture_buffer_size);
-    if (m_max_texture_buffer_size < VRAM_WIDTH * VRAM_HEIGHT)
+    GLint max_texel_buffer_size;
+    glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, reinterpret_cast<GLint*>(&max_texel_buffer_size));
+    Log_InfoPrintf("Max texel buffer size: %u", max_texel_buffer_size);
+    if (max_texel_buffer_size < VRAM_WIDTH * VRAM_HEIGHT)
    {
      Log_WarningPrintf("Maximum texture buffer size is less than VRAM size, not using texel buffers.");
-      m_supports_texture_buffer = false;
+      m_use_texture_buffer_for_vram_writes = false;
+    }
+    else
+    {
+      m_texture_stream_buffer_size =
+        std::min<u32>(VRAM_UPDATE_TEXTURE_BUFFER_SIZE, static_cast<u32>(max_texel_buffer_size) * sizeof(u16));
    }
  }

-  if (!m_supports_texture_buffer || m_max_texture_buffer_size < VRAM_WIDTH * VRAM_HEIGHT)
+  if (!m_use_texture_buffer_for_vram_writes)
  {
    // Try SSBOs.
    GLint max_fragment_storage_blocks = 0;
@ -352,11 +359,13 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display)
    if (m_use_ssbo_for_vram_writes)
    {
      Log_InfoPrintf("Using shader storage buffers for VRAM writes.");
+      m_texture_stream_buffer_size =
+        static_cast<u32>(std::min<u64>(VRAM_UPDATE_TEXTURE_BUFFER_SIZE, static_cast<u64>(max_ssbo_size)));
    }
    else
    {
-      Log_WarningPrintf(
-        "Texture buffers are not supported, VRAM writes will be slower and multisampling will be unavailable.");
+      Log_WarningPrintf("Texture buffers and SSBOs are not supported, VRAM writes will be slower and multisampling "
+                        "will be unavailable.");
      m_max_multisamples = 1;
      m_supports_per_sample_shading = false;
    }
@ -475,15 +484,14 @@ bool GPU_HW_OpenGL::CreateUniformBuffer()

 bool GPU_HW_OpenGL::CreateTextureBuffer()
 {
-  // We use the pixel unpack buffer here because we share it with CPU-decoded VRAM writes.
  const GLenum target =
    (m_use_ssbo_for_vram_writes ? GL_SHADER_STORAGE_BUFFER :
-                                  (m_supports_texture_buffer ? GL_TEXTURE_BUFFER : GL_PIXEL_UNPACK_BUFFER));
-  m_texture_stream_buffer = GL::StreamBuffer::Create(target, VRAM_UPDATE_TEXTURE_BUFFER_SIZE);
+                                  (m_use_texture_buffer_for_vram_writes ? GL_TEXTURE_BUFFER : GL_PIXEL_UNPACK_BUFFER));
+  m_texture_stream_buffer = GL::StreamBuffer::Create(target, m_texture_stream_buffer_size);
  if (!m_texture_stream_buffer)
    return false;

-  if (m_max_texture_buffer_size > 0)
+  if (m_use_texture_buffer_for_vram_writes)
  {
    glGenTextures(1, &m_texture_buffer_r16ui_texture);
    glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
@ -673,7 +681,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
  m_vram_update_depth_program = std::move(*prog);
  UPDATE_PROGRESS();

-  if (m_supports_texture_buffer || m_use_ssbo_for_vram_writes)
+  if (m_use_texture_buffer_for_vram_writes || m_use_ssbo_for_vram_writes)
  {
    prog = shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {},
                                   shadergen.GenerateVRAMWriteFragmentShader(m_use_ssbo_for_vram_writes),
@ -1060,7 +1068,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
  }

  const u32 num_pixels = width * height;
-  if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes)
+  if (m_use_texture_buffer_for_vram_writes || m_use_ssbo_for_vram_writes)
  {
    const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16));
    std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
@ -1092,10 +1100,10 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
  }
  else
  {
-    if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
+    if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT || check_mask)
    {
      // CPU round trip if oversized for now.
-      Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
+      Log_WarningPrintf("Oversized/masked VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
      ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
      GPU::UpdateVRAM(x, y, width, height, data, set_mask, check_mask);
      UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false);
@ -1109,6 +1117,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
    // reverse copy the rows so it matches opengl's lower-left origin
    const u32 source_stride = width * sizeof(u16);
    const u8* source_ptr = static_cast<const u8*>(data) + (source_stride * (height - 1));
+    const u16 mask_or = set_mask ? 0x8000 : 0x0000;
    u32* dest_ptr = static_cast<u32*>(map_result.pointer);
    for (u32 row = 0; row < height; row++)
    {
@ -1119,8 +1128,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
        u16 src_col;
        std::memcpy(&src_col, source_row_ptr, sizeof(src_col));
        source_row_ptr += sizeof(src_col);
-
-        *(dest_ptr++) = RGBA5551ToRGBA8888(src_col);
+        *(dest_ptr++) = RGBA5551ToRGBA8888(src_col | mask_or);
      }

      source_ptr -= source_stride;
--- a/src/core/gpu_hw_opengl.h
+++ b/src/core/gpu_hw_opengl.h
@ -104,9 +104,9 @@ private:
  GL::Program m_vram_update_depth_program;

  u32 m_uniform_buffer_alignment = 1;
-  u32 m_max_texture_buffer_size = 0;
+  u32 m_texture_stream_buffer_size = 0;

-  bool m_supports_texture_buffer = false;
+  bool m_use_texture_buffer_for_vram_writes = false;
  bool m_use_ssbo_for_vram_writes = false;

  GLenum m_current_depth_test = 0;