Merge pull request #3831 from stenzek/gl-bbox

OGL: Improve performance of bounding box on NVIDIA drivers
This commit is contained in:
Markus Wick 2016-05-19 13:31:30 +02:00
commit d2db329a42
3 changed files with 27 additions and 4 deletions

View File

@ -6,6 +6,7 @@
#include "VideoBackends/OGL/BoundingBox.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/VideoConfig.h"
static GLuint s_bbox_buffer_id;
@ -42,12 +43,25 @@ int BoundingBox::Get(int index)
{
int data = 0;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), GL_MAP_READ_BIT);
if (ptr)
if (!DriverDetails::HasBug(DriverDetails::BUG_SLOWGETBUFFERSUBDATA))
{
data = *(int*)ptr;
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
// Using glMapBufferRange to read back the contents of the SSBO is extremely slow
// on nVidia drivers. This is more noticeable at higher internal resolutions.
// Using glGetBufferSubData instead does not seem to exhibit this slowdown.
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data);
}
else
{
// Using glMapBufferRange is faster on AMD cards by a measurable margin.
void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), GL_MAP_READ_BIT);
if (ptr)
{
memcpy(&data, ptr, sizeof(int));
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
return data;
}

View File

@ -60,6 +60,7 @@ namespace DriverDetails
{OS_WINDOWS,VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true},
{OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true},
{OS_WINDOWS,VENDOR_INTEL, DRIVER_INTEL, Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE, 101810.3907, 101810.3960, true},
{OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, -1.0, true},
};
static std::map<Bug, BugInfo> m_bugs;

View File

@ -184,6 +184,14 @@ namespace DriverDetails
// Qualcomm seems to have lots of overhead on exlicit flushing, but the coherent mapping path is fine.
// So let's use coherent mapping there.
BUG_BROKENEXPLICITFLUSH,
// Bug: glGetBufferSubData for bounding box reads is slow on AMD drivers
// Started Version: -1
// Ended Version: -1
// Bounding box reads use glGetBufferSubData to read back the contents of the SSBO, but this is slow on AMD drivers, compared to
// using glMapBufferRange. glMapBufferRange is slower on Nvidia drivers, we suspect due to the first call moving the buffer from
// GPU memory to system memory. Use glMapBufferRange for BBox reads on AMD, and glGetBufferSubData everywhere else.
BUG_SLOWGETBUFFERSUBDATA,
};
// Initializes our internal vendor, device family, and driver version