Merge pull request #3831 from stenzek/gl-bbox

OGL: Improve performance of bounding box on NVIDIA drivers
2016-05-19 13:31:30 +02:00 · 2016-05-19 13:31:30 +02:00 · d2db329a42
parent 24ea2dc2da 89e54fbd6c
commit d2db329a42
3 changed files with 27 additions and 4 deletions
--- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp
+++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp
@ -6,6 +6,7 @@

 #include "VideoBackends/OGL/BoundingBox.h"

+#include "VideoCommon/DriverDetails.h"
 #include "VideoCommon/VideoConfig.h"

 static GLuint s_bbox_buffer_id;
@ -42,12 +43,25 @@ int BoundingBox::Get(int index)
 {
 	int data = 0;
 	glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
-	void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), GL_MAP_READ_BIT);
-	if (ptr)
+
+	if (!DriverDetails::HasBug(DriverDetails::BUG_SLOWGETBUFFERSUBDATA))
 	{
-		data = *(int*)ptr;
-		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
+		// Using glMapBufferRange to read back the contents of the SSBO is extremely slow
+		// on nVidia drivers. This is more noticeable at higher internal resolutions.
+		// Using glGetBufferSubData instead does not seem to exhibit this slowdown.
+		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data);
 	}
+	else
+	{
+		// Using glMapBufferRange is faster on AMD cards by a measurable margin.
+		void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), GL_MAP_READ_BIT);
+		if (ptr)
+		{
+			memcpy(&data, ptr, sizeof(int));
+			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
+		}
+	}
+
 	glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
 	return data;
 }
--- a/Source/Core/VideoCommon/DriverDetails.cpp
+++ b/Source/Core/VideoCommon/DriverDetails.cpp
@ -60,6 +60,7 @@ namespace DriverDetails
 		{OS_WINDOWS,VENDOR_NVIDIA,   DRIVER_NVIDIA,   Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true},
 		{OS_LINUX,  VENDOR_NVIDIA,   DRIVER_NVIDIA,   Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true},
 		{OS_WINDOWS,VENDOR_INTEL,    DRIVER_INTEL,    Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE, 101810.3907, 101810.3960, true},
+		{OS_ALL,    VENDOR_ATI,      DRIVER_ATI,      Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, -1.0, true},
 	};

 	static std::map<Bug, BugInfo> m_bugs;
--- a/Source/Core/VideoCommon/DriverDetails.h
+++ b/Source/Core/VideoCommon/DriverDetails.h
@ -184,6 +184,14 @@ namespace DriverDetails
 		// Qualcomm seems to have lots of overhead on exlicit flushing, but the coherent mapping path is fine.
 		// So let's use coherent mapping there.
 		BUG_BROKENEXPLICITFLUSH,
+
+		// Bug: glGetBufferSubData for bounding box reads is slow on AMD drivers
+		// Started Version: -1
+		// Ended Version: -1
+		// Bounding box reads use glGetBufferSubData to read back the contents of the SSBO, but this is slow on AMD drivers, compared to
+		// using glMapBufferRange. glMapBufferRange is slower on Nvidia drivers, we suspect due to the first call moving the buffer from
+		// GPU memory to system memory. Use glMapBufferRange for BBox reads on AMD, and glGetBufferSubData everywhere else.
+		BUG_SLOWGETBUFFERSUBDATA,
 	};

 	// Initializes our internal vendor, device family, and driver version