From 89e54fbd6ca9cdfdb5766ea9ecf5fa1d81e786c7 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 11 May 2016 22:19:59 +1000 Subject: [PATCH] OGL: Work around slowdown of glMapBufferRange with SSBO on NVIDIA drivers Using glMapBufferRange to read back the contents of the SSBO is extremely slow on NVIDIA drivers. This is more noticeable at higher internal resolutions. Using glGetBufferSubData instead does not seem to exhibit this slowdown. --- Source/Core/VideoBackends/OGL/BoundingBox.cpp | 22 +++++++++++++++---- Source/Core/VideoCommon/DriverDetails.cpp | 1 + Source/Core/VideoCommon/DriverDetails.h | 8 +++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.cpp b/Source/Core/VideoBackends/OGL/BoundingBox.cpp index b8e424f639..c4c0012ddc 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp @@ -6,6 +6,7 @@ #include "VideoBackends/OGL/BoundingBox.h" +#include "VideoCommon/DriverDetails.h" #include "VideoCommon/VideoConfig.h" static GLuint s_bbox_buffer_id; @@ -42,12 +43,25 @@ int BoundingBox::Get(int index) { int data = 0; glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), GL_MAP_READ_BIT); - if (ptr) + + if (!DriverDetails::HasBug(DriverDetails::BUG_SLOWGETBUFFERSUBDATA)) { - data = *(int*)ptr; - glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + // Using glMapBufferRange to read back the contents of the SSBO is extremely slow + // on nVidia drivers. This is more noticeable at higher internal resolutions. + // Using glGetBufferSubData instead does not seem to exhibit this slowdown. + glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); } + else + { + // Using glMapBufferRange is faster on AMD cards by a measurable margin. + void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), GL_MAP_READ_BIT); + if (ptr) + { + memcpy(&data, ptr, sizeof(int)); + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + } + } + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); return data; } diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index 7e7aa9039b..9e147aec6b 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -60,6 +60,7 @@ namespace DriverDetails {OS_WINDOWS,VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true}, {OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true}, {OS_WINDOWS,VENDOR_INTEL, DRIVER_INTEL, Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE, 101810.3907, 101810.3960, true}, + {OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, -1.0, true}, }; static std::map m_bugs; diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index ebfe4a1944..7b9f999961 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -184,6 +184,14 @@ namespace DriverDetails // Qualcomm seems to have lots of overhead on exlicit flushing, but the coherent mapping path is fine. // So let's use coherent mapping there. BUG_BROKENEXPLICITFLUSH, + + // Bug: glGetBufferSubData for bounding box reads is slow on AMD drivers + // Started Version: -1 + // Ended Version: -1 + // Bounding box reads use glGetBufferSubData to read back the contents of the SSBO, but this is slow on AMD drivers, compared to + // using glMapBufferRange. glMapBufferRange is slower on Nvidia drivers, we suspect due to the first call moving the buffer from + // GPU memory to system memory. Use glMapBufferRange for BBox reads on AMD, and glGetBufferSubData everywhere else. + BUG_SLOWGETBUFFERSUBDATA, }; // Initializes our internal vendor, device family, and driver version