Use glMultiDrawArrays instead of glDrawArrays, which significantly

reduces the number of OpenGL calls for large sets of triangle strips; 
this is something like a 10% perf improvement in the case I'm looking 
at.



git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@2120 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
magumagu9 2009-02-06 03:41:33 +00:00
parent 7cc80c32f9
commit 90c9810b85
1 changed files with 30 additions and 14 deletions

View File

@ -30,7 +30,10 @@ namespace VertexManager
static GLuint s_vboBuffers[0x40] = {0}; static GLuint s_vboBuffers[0x40] = {0};
static int s_nCurVBOIndex = 0; // current free buffer static int s_nCurVBOIndex = 0; // current free buffer
static u8 *s_pBaseBufferPointer = NULL; static u8 *s_pBaseBufferPointer = NULL;
static std::vector< std::pair<u32, u32> > s_vStoredPrimitives; // every element, mode and count to be passed to glDrawArrays static std::vector< GLint > s_vertexFirstOffset;
static std::vector< GLsizei > s_vertexGroupSize;
static std::vector< std::pair< GLenum, int > > s_vertexGroups;
u32 s_vertexCount;
static const GLenum c_primitiveType[8] = static const GLenum c_primitiveType[8] =
{ {
@ -58,7 +61,6 @@ bool Init()
glEnableClientState(GL_VERTEX_ARRAY); glEnableClientState(GL_VERTEX_ARRAY);
g_nativeVertexFmt = NULL; g_nativeVertexFmt = NULL;
s_vStoredPrimitives.reserve(1000);
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
return true; return true;
@ -70,7 +72,10 @@ void Shutdown()
glDeleteBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers); glDeleteBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers);
memset(s_vboBuffers, 0, sizeof(s_vboBuffers)); memset(s_vboBuffers, 0, sizeof(s_vboBuffers));
s_vStoredPrimitives.resize(0); s_vertexFirstOffset.resize(0);
s_vertexGroupSize.resize(0);
s_vertexGroups.resize(0);
s_vertexCount = 0;
s_nCurVBOIndex = 0; s_nCurVBOIndex = 0;
ResetBuffer(); ResetBuffer();
} }
@ -79,7 +84,10 @@ void ResetBuffer()
{ {
s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers); s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers);
s_pCurBufferPointer = s_pBaseBufferPointer; s_pCurBufferPointer = s_pBaseBufferPointer;
s_vStoredPrimitives.resize(0); s_vertexFirstOffset.resize(0);
s_vertexGroupSize.resize(0);
s_vertexGroups.resize(0);
s_vertexCount = 0;
} }
int GetRemainingSize() int GetRemainingSize()
@ -93,7 +101,7 @@ void AddVertices(int primitive, int numvertices)
_assert_(g_nativeVertexFmt != NULL); _assert_(g_nativeVertexFmt != NULL);
ADDSTAT(stats.thisFrame.numPrims, numvertices); ADDSTAT(stats.thisFrame.numPrims, numvertices);
if (s_vStoredPrimitives.size() && s_vStoredPrimitives[s_vStoredPrimitives.size() - 1].first == c_primitiveType[primitive]) { if (!s_vertexGroups.empty() && s_vertexGroups.back().first == c_primitiveType[primitive]) {
// We can join primitives for free here. Not likely to help much, though, but whatever... // We can join primitives for free here. Not likely to help much, though, but whatever...
if (c_primitiveType[primitive] == GL_TRIANGLES || if (c_primitiveType[primitive] == GL_TRIANGLES ||
c_primitiveType[primitive] == GL_LINES || c_primitiveType[primitive] == GL_LINES ||
@ -101,14 +109,19 @@ void AddVertices(int primitive, int numvertices)
c_primitiveType[primitive] == GL_QUADS) { c_primitiveType[primitive] == GL_QUADS) {
INCSTAT(stats.thisFrame.numPrimitiveJoins); INCSTAT(stats.thisFrame.numPrimitiveJoins);
// Easy join // Easy join
std::pair<u32, u32> &last_pair = s_vStoredPrimitives[s_vStoredPrimitives.size() - 1]; s_vertexGroupSize.back() += numvertices;
last_pair.second += numvertices; s_vertexCount += numvertices;
return; return;
} }
// Joining strips is a lot more work but would bring more gain. Not sure if it's worth it though.
} }
s_vStoredPrimitives.push_back(std::pair<int, int>(c_primitiveType[primitive], numvertices)); s_vertexFirstOffset.push_back(s_vertexCount);
s_vertexGroupSize.push_back(numvertices);
s_vertexCount += numvertices;
if (!s_vertexGroups.empty() && s_vertexGroups.back().first == c_primitiveType[primitive])
s_vertexGroups.back().second++;
else
s_vertexGroups.push_back(std::make_pair(c_primitiveType[primitive], 1));
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
static const char *sprims[8] = {"quads", "nothing", "tris", "tstrip", "tfan", "lines", "lstrip", "points"}; static const char *sprims[8] = {"quads", "nothing", "tris", "tstrip", "tfan", "lines", "lstrip", "points"};
@ -118,7 +131,7 @@ void AddVertices(int primitive, int numvertices)
void Flush() void Flush()
{ {
if (s_vStoredPrimitives.size() == 0) if (s_vertexCount == 0)
return; return;
_assert_(s_pCurBufferPointer != s_pBaseBufferPointer); _assert_(s_pCurBufferPointer != s_pBaseBufferPointer);
@ -260,12 +273,15 @@ void Flush()
PRIM_LOG("\n"); PRIM_LOG("\n");
#endif #endif
int offset = 0; int groupStart = 0;
for (std::vector< std::pair<u32, u32> >::const_iterator it = s_vStoredPrimitives.begin(); it != s_vStoredPrimitives.end(); ++it) for (unsigned i = 0; i < s_vertexGroups.size(); i++)
{ {
INCSTAT(stats.thisFrame.numDrawCalls); INCSTAT(stats.thisFrame.numDrawCalls);
glDrawArrays(it->first, offset, it->second); glMultiDrawArrays(s_vertexGroups[i].first,
offset += it->second; &s_vertexFirstOffset[groupStart],
&s_vertexGroupSize[groupStart],
s_vertexGroups[i].second);
groupStart += s_vertexGroups[i].second;
} }
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)