Use glMultiDrawArrays instead of glDrawArrays, which significantly
reduces the number of OpenGL calls for large sets of triangle strips; this is something like a 10% perf improvement in the case I'm looking at. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@2120 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
7cc80c32f9
commit
90c9810b85
|
@ -30,7 +30,10 @@ namespace VertexManager
|
||||||
static GLuint s_vboBuffers[0x40] = {0};
|
static GLuint s_vboBuffers[0x40] = {0};
|
||||||
static int s_nCurVBOIndex = 0; // current free buffer
|
static int s_nCurVBOIndex = 0; // current free buffer
|
||||||
static u8 *s_pBaseBufferPointer = NULL;
|
static u8 *s_pBaseBufferPointer = NULL;
|
||||||
static std::vector< std::pair<u32, u32> > s_vStoredPrimitives; // every element, mode and count to be passed to glDrawArrays
|
static std::vector< GLint > s_vertexFirstOffset;
|
||||||
|
static std::vector< GLsizei > s_vertexGroupSize;
|
||||||
|
static std::vector< std::pair< GLenum, int > > s_vertexGroups;
|
||||||
|
u32 s_vertexCount;
|
||||||
|
|
||||||
static const GLenum c_primitiveType[8] =
|
static const GLenum c_primitiveType[8] =
|
||||||
{
|
{
|
||||||
|
@ -58,7 +61,6 @@ bool Init()
|
||||||
|
|
||||||
glEnableClientState(GL_VERTEX_ARRAY);
|
glEnableClientState(GL_VERTEX_ARRAY);
|
||||||
g_nativeVertexFmt = NULL;
|
g_nativeVertexFmt = NULL;
|
||||||
s_vStoredPrimitives.reserve(1000);
|
|
||||||
GL_REPORT_ERRORD();
|
GL_REPORT_ERRORD();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -70,7 +72,10 @@ void Shutdown()
|
||||||
glDeleteBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers);
|
glDeleteBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers);
|
||||||
memset(s_vboBuffers, 0, sizeof(s_vboBuffers));
|
memset(s_vboBuffers, 0, sizeof(s_vboBuffers));
|
||||||
|
|
||||||
s_vStoredPrimitives.resize(0);
|
s_vertexFirstOffset.resize(0);
|
||||||
|
s_vertexGroupSize.resize(0);
|
||||||
|
s_vertexGroups.resize(0);
|
||||||
|
s_vertexCount = 0;
|
||||||
s_nCurVBOIndex = 0;
|
s_nCurVBOIndex = 0;
|
||||||
ResetBuffer();
|
ResetBuffer();
|
||||||
}
|
}
|
||||||
|
@ -79,7 +84,10 @@ void ResetBuffer()
|
||||||
{
|
{
|
||||||
s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers);
|
s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers);
|
||||||
s_pCurBufferPointer = s_pBaseBufferPointer;
|
s_pCurBufferPointer = s_pBaseBufferPointer;
|
||||||
s_vStoredPrimitives.resize(0);
|
s_vertexFirstOffset.resize(0);
|
||||||
|
s_vertexGroupSize.resize(0);
|
||||||
|
s_vertexGroups.resize(0);
|
||||||
|
s_vertexCount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int GetRemainingSize()
|
int GetRemainingSize()
|
||||||
|
@ -93,7 +101,7 @@ void AddVertices(int primitive, int numvertices)
|
||||||
_assert_(g_nativeVertexFmt != NULL);
|
_assert_(g_nativeVertexFmt != NULL);
|
||||||
|
|
||||||
ADDSTAT(stats.thisFrame.numPrims, numvertices);
|
ADDSTAT(stats.thisFrame.numPrims, numvertices);
|
||||||
if (s_vStoredPrimitives.size() && s_vStoredPrimitives[s_vStoredPrimitives.size() - 1].first == c_primitiveType[primitive]) {
|
if (!s_vertexGroups.empty() && s_vertexGroups.back().first == c_primitiveType[primitive]) {
|
||||||
// We can join primitives for free here. Not likely to help much, though, but whatever...
|
// We can join primitives for free here. Not likely to help much, though, but whatever...
|
||||||
if (c_primitiveType[primitive] == GL_TRIANGLES ||
|
if (c_primitiveType[primitive] == GL_TRIANGLES ||
|
||||||
c_primitiveType[primitive] == GL_LINES ||
|
c_primitiveType[primitive] == GL_LINES ||
|
||||||
|
@ -101,14 +109,19 @@ void AddVertices(int primitive, int numvertices)
|
||||||
c_primitiveType[primitive] == GL_QUADS) {
|
c_primitiveType[primitive] == GL_QUADS) {
|
||||||
INCSTAT(stats.thisFrame.numPrimitiveJoins);
|
INCSTAT(stats.thisFrame.numPrimitiveJoins);
|
||||||
// Easy join
|
// Easy join
|
||||||
std::pair<u32, u32> &last_pair = s_vStoredPrimitives[s_vStoredPrimitives.size() - 1];
|
s_vertexGroupSize.back() += numvertices;
|
||||||
last_pair.second += numvertices;
|
s_vertexCount += numvertices;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Joining strips is a lot more work but would bring more gain. Not sure if it's worth it though.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s_vStoredPrimitives.push_back(std::pair<int, int>(c_primitiveType[primitive], numvertices));
|
s_vertexFirstOffset.push_back(s_vertexCount);
|
||||||
|
s_vertexGroupSize.push_back(numvertices);
|
||||||
|
s_vertexCount += numvertices;
|
||||||
|
if (!s_vertexGroups.empty() && s_vertexGroups.back().first == c_primitiveType[primitive])
|
||||||
|
s_vertexGroups.back().second++;
|
||||||
|
else
|
||||||
|
s_vertexGroups.push_back(std::make_pair(c_primitiveType[primitive], 1));
|
||||||
|
|
||||||
#if defined(_DEBUG) || defined(DEBUGFAST)
|
#if defined(_DEBUG) || defined(DEBUGFAST)
|
||||||
static const char *sprims[8] = {"quads", "nothing", "tris", "tstrip", "tfan", "lines", "lstrip", "points"};
|
static const char *sprims[8] = {"quads", "nothing", "tris", "tstrip", "tfan", "lines", "lstrip", "points"};
|
||||||
|
@ -118,7 +131,7 @@ void AddVertices(int primitive, int numvertices)
|
||||||
|
|
||||||
void Flush()
|
void Flush()
|
||||||
{
|
{
|
||||||
if (s_vStoredPrimitives.size() == 0)
|
if (s_vertexCount == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
_assert_(s_pCurBufferPointer != s_pBaseBufferPointer);
|
_assert_(s_pCurBufferPointer != s_pBaseBufferPointer);
|
||||||
|
@ -260,12 +273,15 @@ void Flush()
|
||||||
PRIM_LOG("\n");
|
PRIM_LOG("\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int offset = 0;
|
int groupStart = 0;
|
||||||
for (std::vector< std::pair<u32, u32> >::const_iterator it = s_vStoredPrimitives.begin(); it != s_vStoredPrimitives.end(); ++it)
|
for (unsigned i = 0; i < s_vertexGroups.size(); i++)
|
||||||
{
|
{
|
||||||
INCSTAT(stats.thisFrame.numDrawCalls);
|
INCSTAT(stats.thisFrame.numDrawCalls);
|
||||||
glDrawArrays(it->first, offset, it->second);
|
glMultiDrawArrays(s_vertexGroups[i].first,
|
||||||
offset += it->second;
|
&s_vertexFirstOffset[groupStart],
|
||||||
|
&s_vertexGroupSize[groupStart],
|
||||||
|
s_vertexGroups[i].second);
|
||||||
|
groupStart += s_vertexGroups[i].second;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_DEBUG) || defined(DEBUGFAST)
|
#if defined(_DEBUG) || defined(DEBUGFAST)
|
||||||
|
|
Loading…
Reference in New Issue