OGL: Work around slowdown of glMapBufferRange with SSBO on NVIDIA drivers

Using glMapBufferRange to read back the contents of the SSBO is extremely slow on NVIDIA drivers. This is more noticeable at higher internal resolutions. Using glGetBufferSubData instead does not seem to exhibit this slowdown.
2016-05-11 22:19:59 +10:00 · 2016-05-11 22:19:59 +10:00 · 89e54fbd6c
parent 24ea2dc2da
commit 89e54fbd6c
3 changed files with 27 additions and 4 deletions
--- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp
+++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp
@ -6,6 +6,7 @@

 #include "VideoBackends/OGL/BoundingBox.h"

+#include "VideoCommon/DriverDetails.h"
 #include "VideoCommon/VideoConfig.h"

 static GLuint s_bbox_buffer_id;
@ -42,12 +43,25 @@ int BoundingBox::Get(int index)
 {
 	int data = 0;
 	glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
+
+	if (!DriverDetails::HasBug(DriverDetails::BUG_SLOWGETBUFFERSUBDATA))
+	{
+		// Using glMapBufferRange to read back the contents of the SSBO is extremely slow
+		// on nVidia drivers. This is more noticeable at higher internal resolutions.
+		// Using glGetBufferSubData instead does not seem to exhibit this slowdown.
+		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data);
+	}
+	else
+	{
+		// Using glMapBufferRange is faster on AMD cards by a measurable margin.
 		void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), GL_MAP_READ_BIT);
 		if (ptr)
 		{
-		data = *(int*)ptr;
+			memcpy(&data, ptr, sizeof(int));
 			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
 		}
+	}
+
 	glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
 	return data;
 }
--- a/Source/Core/VideoCommon/DriverDetails.cpp
+++ b/Source/Core/VideoCommon/DriverDetails.cpp
@ -60,6 +60,7 @@ namespace DriverDetails
 		{OS_WINDOWS,VENDOR_NVIDIA,   DRIVER_NVIDIA,   Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true},
 		{OS_LINUX,  VENDOR_NVIDIA,   DRIVER_NVIDIA,   Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true},
 		{OS_WINDOWS,VENDOR_INTEL,    DRIVER_INTEL,    Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE, 101810.3907, 101810.3960, true},
+		{OS_ALL,    VENDOR_ATI,      DRIVER_ATI,      Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, -1.0, true},
 	};

 	static std::map<Bug, BugInfo> m_bugs;
--- a/Source/Core/VideoCommon/DriverDetails.h
+++ b/Source/Core/VideoCommon/DriverDetails.h
@ -184,6 +184,14 @@ namespace DriverDetails
 		// Qualcomm seems to have lots of overhead on exlicit flushing, but the coherent mapping path is fine.
 		// So let's use coherent mapping there.
 		BUG_BROKENEXPLICITFLUSH,
+
+		// Bug: glGetBufferSubData for bounding box reads is slow on AMD drivers
+		// Started Version: -1
+		// Ended Version: -1
+		// Bounding box reads use glGetBufferSubData to read back the contents of the SSBO, but this is slow on AMD drivers, compared to
+		// using glMapBufferRange. glMapBufferRange is slower on Nvidia drivers, we suspect due to the first call moving the buffer from
+		// GPU memory to system memory. Use glMapBufferRange for BBox reads on AMD, and glGetBufferSubData everywhere else.
+		BUG_SLOWGETBUFFERSUBDATA,
 	};

 	// Initializes our internal vendor, device family, and driver version