OGL-StreamBuffer: make the SLOT calculation much easier

The size of the buffer is now power of 2, so we can use a shift instead of a division.
This was at about 2% of the global CPU usage.
This commit is contained in:
degasus 2014-06-05 11:06:41 +02:00
parent d81d2e8915
commit 02a4e3d70f
2 changed files with 14 additions and 11 deletions

View File

@ -23,7 +23,7 @@ static u32 genBuffer()
} }
StreamBuffer::StreamBuffer(u32 type, size_t size) StreamBuffer::StreamBuffer(u32 type, size_t size)
: m_buffer(genBuffer()), m_buffertype(type), m_size(size) : m_buffer(genBuffer()), m_buffertype(type), m_size(ROUND_UP_POW2(size)), m_bit_per_slot(Log2(ROUND_UP_POW2(size) / SYNC_POINTS))
{ {
m_iterator = 0; m_iterator = 0;
m_used_iterator = 0; m_used_iterator = 0;
@ -59,19 +59,20 @@ StreamBuffer::~StreamBuffer()
* As ring buffers have an ugly behavoir on rollover, have fun to read this code ;) * As ring buffers have an ugly behavoir on rollover, have fun to read this code ;)
*/ */
#define SLOT(x) ((x)*SYNC_POINTS/m_size)
void StreamBuffer::CreateFences() void StreamBuffer::CreateFences()
{ {
for (u32 i=0; i<SYNC_POINTS; i++) for (int i=0; i<SYNC_POINTS; i++)
{
fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
} }
void StreamBuffer::DeleteFences() void StreamBuffer::DeleteFences()
{ {
for (size_t i = SLOT(m_free_iterator) + 1; i < SYNC_POINTS; i++) for (int i = SLOT(m_free_iterator) + 1; i < SYNC_POINTS; i++)
{ {
glDeleteSync(fences[i]); glDeleteSync(fences[i]);
} }
for (size_t i = 0; i < SLOT(m_iterator); i++) for (int i = 0; i < SLOT(m_iterator); i++)
{ {
glDeleteSync(fences[i]); glDeleteSync(fences[i]);
} }
@ -79,14 +80,14 @@ void StreamBuffer::DeleteFences()
void StreamBuffer::AllocMemory(size_t size) void StreamBuffer::AllocMemory(size_t size)
{ {
// insert waiting slots for used memory // insert waiting slots for used memory
for (size_t i = SLOT(m_used_iterator); i < SLOT(m_iterator); i++) for (int i = SLOT(m_used_iterator); i < SLOT(m_iterator); i++)
{ {
fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
} }
m_used_iterator = m_iterator; m_used_iterator = m_iterator;
// wait for new slots to end of buffer // wait for new slots to end of buffer
for (size_t i = SLOT(m_free_iterator) + 1; i <= SLOT(m_iterator + size) && i < SYNC_POINTS; i++) for (int i = SLOT(m_free_iterator) + 1; i <= SLOT(m_iterator + size) && i < SYNC_POINTS; i++)
{ {
glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
glDeleteSync(fences[i]); glDeleteSync(fences[i]);
@ -97,7 +98,7 @@ void StreamBuffer::AllocMemory(size_t size)
if (m_iterator + size >= m_size) { if (m_iterator + size >= m_size) {
// insert waiting slots in unused space at the end of the buffer // insert waiting slots in unused space at the end of the buffer
for (size_t i = SLOT(m_used_iterator); i < SYNC_POINTS; i++) for (int i = SLOT(m_used_iterator); i < SYNC_POINTS; i++)
{ {
fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
} }
@ -106,7 +107,7 @@ void StreamBuffer::AllocMemory(size_t size)
m_used_iterator = m_iterator = 0; // offset 0 is always aligned m_used_iterator = m_iterator = 0; // offset 0 is always aligned
// wait for space at the start // wait for space at the start
for (u32 i = 0; i <= SLOT(m_iterator + size); i++) for (int i = 0; i <= SLOT(m_iterator + size); i++)
{ {
glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
glDeleteSync(fences[i]); glDeleteSync(fences[i]);
@ -114,7 +115,6 @@ void StreamBuffer::AllocMemory(size_t size)
m_free_iterator = m_iterator + size; m_free_iterator = m_iterator + size;
} }
} }
#undef SLOT
void StreamBuffer::Align(u32 stride) void StreamBuffer::Align(u32 stride)
{ {

View File

@ -46,7 +46,10 @@ protected:
size_t m_free_iterator; size_t m_free_iterator;
private: private:
static const u32 SYNC_POINTS = 16; static const int SYNC_POINTS = 16;
inline int SLOT(size_t x) const { return x >> m_bit_per_slot; }
const int m_bit_per_slot;
GLsync fences[SYNC_POINTS]; GLsync fences[SYNC_POINTS];
}; };