OSX: don't avoid unsync mapping on nvida gpus just because the windows driver doesn't like it

OSX has their own driver, so performance issues aren't shared with the nvidia driver (unlike the closed source linux and windows nvidia driver). So now they'll also use the MapAndSync backend like all other osx drivers. fixes issue 6596 I've also cleaned up the if/else block selecting the best backend a bit.
2014-01-26 10:55:10 +01:00 · 2014-01-26 10:55:10 +01:00 · d3fd0eddbb
parent 6f74f59427
commit d3fd0eddbb
3 changed files with 47 additions and 18 deletions
--- a/Source/Core/VideoBackends/OGL/StreamBuffer.cpp
+++ b/Source/Core/VideoBackends/OGL/StreamBuffer.cpp
@ -152,7 +152,7 @@ public:
 			m_iterator = 0;
 		}
 		u8* pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size,
-			GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
+			GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
 		return std::make_pair(pointer, m_iterator);
 	}

@ -187,7 +187,7 @@ public:
 		Align(stride);
 		AllocMemory(size);
 		u8* pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size,
-			GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
+			GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
 		return std::make_pair(pointer, m_iterator);
 	}

@ -346,24 +346,40 @@ public:
 // choose best streaming library based on the supported extensions and known issues
 StreamBuffer* StreamBuffer::Create(u32 type, size_t size)
 {
-	bool nvidia = !strcmp(g_ogl_config.gl_vendor, "NVIDIA Corporation");
+	// without basevertex support, only streaming methods whith uploads everything to zero works fine:
+	if(!g_ogl_config.bSupportsGLBaseVertex)
+	{
+		if(!DriverDetails::HasBug(DriverDetails::BUG_BROKENBUFFERSTREAM))
+			return new BufferSubData(type, size);

-	if (g_ogl_config.bSupportsGLBufferStorage &&
-		!(DriverDetails::HasBug(DriverDetails::BUG_BROKENBUFFERSTORAGE) && type == GL_ARRAY_BUFFER))
-		return new BufferStorage(type, size);
-	else if(!g_ogl_config.bSupportsGLBaseVertex && !DriverDetails::HasBug(DriverDetails::BUG_BROKENBUFFERSTREAM))
-		return new BufferSubData(type, size);
-	else if(!g_ogl_config.bSupportsGLBaseVertex)
+		// BufferData is by far the worst way, only use it if needed
 		return new BufferData(type, size);
-	else if(g_ogl_config.bSupportsGLSync && g_ogl_config.bSupportsGLPinnedMemory &&
-		!(DriverDetails::HasBug(DriverDetails::BUG_BROKENPINNEDMEMORY) && type == GL_ELEMENT_ARRAY_BUFFER))
-		return new PinnedMemory(type, size);
-	else if(nvidia)
-		return new BufferSubData(type, size);
-	else if(g_ogl_config.bSupportsGLSync)
-		return new MapAndSync(type, size);
-	else
-		return new MapAndOrphan(type, size);
+	}
+
+	// Prefer the syncing buffers over the orphaning one
+	if(g_ogl_config.bSupportsGLSync)
+	{
+		// try to use buffer storage whenever possible
+		if (g_ogl_config.bSupportsGLBufferStorage &&
+			!(DriverDetails::HasBug(DriverDetails::BUG_BROKENBUFFERSTORAGE) && type == GL_ARRAY_BUFFER))
+			return new BufferStorage(type, size);
+
+		// pinned memory is almost as fine
+		if(g_ogl_config.bSupportsGLPinnedMemory &&
+			!(DriverDetails::HasBug(DriverDetails::BUG_BROKENPINNEDMEMORY) && type == GL_ELEMENT_ARRAY_BUFFER))
+			return new PinnedMemory(type, size);
+
+		// don't fall back to MapAnd* for nvidia drivers
+		if(DriverDetails::HasBug(DriverDetails::BUG_BROKENUNSYNCMAPPING))
+			return new BufferSubData(type, size);
+
+		// mapping fallback
+		if(g_ogl_config.bSupportsGLSync)
+			return new MapAndSync(type, size);
+	}
+
+	// default fallback, should work everywhere, but isn't the best way to do this job
+	return new MapAndOrphan(type, size);
 }

 }
--- a/Source/Core/VideoCommon/DriverDetails.cpp
+++ b/Source/Core/VideoCommon/DriverDetails.cpp
@ -55,6 +55,8 @@ namespace DriverDetails
 		{OS_WINDOWS,VENDOR_NVIDIA,   DRIVER_NVIDIA,       -1, BUG_BROKENBUFFERSTORAGE, -1.0, 33220.0, true},
 		{OS_LINUX,  VENDOR_NVIDIA,   DRIVER_NVIDIA,       -1, BUG_BROKENBUFFERSTORAGE, -1.0, 33138.0, true},
 		{OS_OSX,    VENDOR_INTEL,    DRIVER_INTEL,      3000, BUG_PRIMITIVERESTART,    -1.0, -1.0, true},
+		{OS_WINDOWS,VENDOR_NVIDIA,   DRIVER_NVIDIA,       -1, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true},
+		{OS_LINUX,  VENDOR_NVIDIA,   DRIVER_NVIDIA,       -1, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true},
 	};

 	std::map<Bug, BugInfo> m_bugs;
--- a/Source/Core/VideoCommon/DriverDetails.h
+++ b/Source/Core/VideoCommon/DriverDetails.h
@ -154,6 +154,17 @@ namespace DriverDetails
 		// The drivers on OS X has broken primitive restart.
 		// Intel HD 4000 series isn't affected by the bug
 		BUG_PRIMITIVERESTART,
+		// Bug: unsync mapping doesn't work fine
+		// Affected devices: nvidia driver
+		// Started Version: -1
+		// Ended Version: -1
+		// The nvidia driver (both windows + linux) doesn't like unsync mapping performance wise.
+		// Because of their threaded behavoir, they seem not to handle unsync mapping complete unsync,
+		// in fact, they serialize the driver which adds a much bigger overhead.
+		// Workaround: Use BufferSubData
+		// TODO: some windows AMD driver/gpu combination seems also affected
+		//       but as they all support pinned memory, it doesn't matter
+		BUG_BROKENUNSYNCMAPPING,
 	};

 	// Initializes our internal vendor, device family, and driver version