diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.cpp b/Source/Core/VideoBackends/OGL/BoundingBox.cpp index b8e424f639..c4c0012ddc 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp @@ -6,6 +6,7 @@ #include "VideoBackends/OGL/BoundingBox.h" +#include "VideoCommon/DriverDetails.h" #include "VideoCommon/VideoConfig.h" static GLuint s_bbox_buffer_id; @@ -42,12 +43,25 @@ int BoundingBox::Get(int index) { int data = 0; glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), GL_MAP_READ_BIT); - if (ptr) + + if (!DriverDetails::HasBug(DriverDetails::BUG_SLOWGETBUFFERSUBDATA)) { - data = *(int*)ptr; - glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + // Using glMapBufferRange to read back the contents of the SSBO is extremely slow + // on nVidia drivers. This is more noticeable at higher internal resolutions. + // Using glGetBufferSubData instead does not seem to exhibit this slowdown. + glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); } + else + { + // Using glMapBufferRange is faster on AMD cards by a measurable margin. + void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), GL_MAP_READ_BIT); + if (ptr) + { + memcpy(&data, ptr, sizeof(int)); + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + } + } + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); return data; } diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index 7e7aa9039b..9e147aec6b 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -60,6 +60,7 @@ namespace DriverDetails {OS_WINDOWS,VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true}, {OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true}, {OS_WINDOWS,VENDOR_INTEL, DRIVER_INTEL, Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE, 101810.3907, 101810.3960, true}, + {OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, -1.0, true}, }; static std::map m_bugs; diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index ebfe4a1944..7b9f999961 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -184,6 +184,14 @@ namespace DriverDetails // Qualcomm seems to have lots of overhead on exlicit flushing, but the coherent mapping path is fine. // So let's use coherent mapping there. BUG_BROKENEXPLICITFLUSH, + + // Bug: glGetBufferSubData for bounding box reads is slow on AMD drivers + // Started Version: -1 + // Ended Version: -1 + // Bounding box reads use glGetBufferSubData to read back the contents of the SSBO, but this is slow on AMD drivers, compared to + // using glMapBufferRange. glMapBufferRange is slower on Nvidia drivers, we suspect due to the first call moving the buffer from + // GPU memory to system memory. Use glMapBufferRange for BBox reads on AMD, and glGetBufferSubData everywhere else. + BUG_SLOWGETBUFFERSUBDATA, }; // Initializes our internal vendor, device family, and driver version