OpenGL: change StreamBuffer in a streaming way
This is a bit slower on map_and_* because of flushing and _very_ much slower on buffer(sub)?data because of a new memcpy. But this design allow us to decode directly into a gpu buffer, eg vertexloader will profit :)
This commit is contained in:
parent
650bae12e1
commit
be1fee6d74
|
@ -193,29 +193,19 @@ void ProgramShaderCache::UploadConstants()
|
|||
{
|
||||
if(PixelShaderManager::dirty || VertexShaderManager::dirty)
|
||||
{
|
||||
s_buffer->Alloc(s_ubo_buffer_size);
|
||||
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENBUFFERSTREAM))
|
||||
{
|
||||
// This is just a hack to support our BUFFERDATA upload method
|
||||
// as it's broken to uploaded in a splited way
|
||||
static u8 *tmpbuffer = new u8[s_ubo_buffer_size];
|
||||
memcpy(tmpbuffer, &PixelShaderManager::constants, sizeof(PixelShaderConstants));
|
||||
memcpy(tmpbuffer+ROUND_UP(sizeof(PixelShaderConstants), s_ubo_align), &VertexShaderManager::constants, sizeof(VertexShaderConstants));
|
||||
size_t offset = s_buffer->Upload(tmpbuffer, s_ubo_buffer_size);
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, 1,
|
||||
s_buffer->getBuffer(), offset, sizeof(PixelShaderConstants));
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, 2,
|
||||
s_buffer->getBuffer(), offset+ROUND_UP(sizeof(PixelShaderConstants), s_ubo_align), sizeof(VertexShaderConstants));
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t offset = s_buffer->Upload((u8*)&PixelShaderManager::constants, ROUND_UP(sizeof(PixelShaderConstants), s_ubo_align));
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, 1,
|
||||
s_buffer->getBuffer(), offset, sizeof(PixelShaderConstants));
|
||||
offset = s_buffer->Upload((u8*)&VertexShaderManager::constants, ROUND_UP(sizeof(VertexShaderConstants), s_ubo_align));
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, 2,
|
||||
s_buffer->getBuffer(), offset, sizeof(VertexShaderConstants));
|
||||
}
|
||||
u8* buffer = s_buffer->Map(s_ubo_buffer_size, s_ubo_align);
|
||||
|
||||
memcpy(buffer,
|
||||
&PixelShaderManager::constants, sizeof(PixelShaderConstants));
|
||||
|
||||
memcpy(buffer + ROUND_UP(sizeof(PixelShaderConstants), s_ubo_align),
|
||||
&VertexShaderManager::constants, sizeof(VertexShaderConstants));
|
||||
|
||||
size_t offset = s_buffer->Unmap(s_ubo_buffer_size);
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, 1, s_buffer->getBuffer(), offset,
|
||||
sizeof(PixelShaderConstants));
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, 2, s_buffer->getBuffer(), offset + ROUND_UP(sizeof(PixelShaderConstants), s_ubo_align),
|
||||
sizeof(VertexShaderConstants));
|
||||
|
||||
PixelShaderManager::dirty = false;
|
||||
VertexShaderManager::dirty = false;
|
||||
|
|
|
@ -51,20 +51,18 @@ StreamBuffer::~StreamBuffer()
|
|||
|
||||
#define SLOT(x) ((x)*SYNC_POINTS/m_size)
|
||||
|
||||
void StreamBuffer::Alloc ( size_t size, u32 stride )
|
||||
u8* StreamBuffer::Map ( size_t size, u32 stride )
|
||||
{
|
||||
size_t m_iterator_aligned = m_iterator;
|
||||
if(m_iterator_aligned && stride) {
|
||||
m_iterator_aligned--;
|
||||
m_iterator_aligned = m_iterator_aligned - (m_iterator_aligned % stride) + stride;
|
||||
if(m_iterator && stride) {
|
||||
m_iterator--;
|
||||
m_iterator = m_iterator - (m_iterator % stride) + stride;
|
||||
}
|
||||
size_t iter_end = m_iterator_aligned + size;
|
||||
|
||||
switch(m_uploadtype) {
|
||||
case MAP_AND_ORPHAN:
|
||||
if(iter_end >= m_size) {
|
||||
if(m_iterator + size >= m_size) {
|
||||
glBufferData(m_buffertype, m_size, NULL, GL_STREAM_DRAW);
|
||||
m_iterator_aligned = 0;
|
||||
m_iterator = 0;
|
||||
}
|
||||
break;
|
||||
case MAP_AND_SYNC:
|
||||
|
@ -78,15 +76,15 @@ void StreamBuffer::Alloc ( size_t size, u32 stride )
|
|||
m_used_iterator = m_iterator;
|
||||
|
||||
// wait for new slots to end of buffer
|
||||
for (size_t i = SLOT(m_free_iterator) + 1; i <= SLOT(iter_end) && i < SYNC_POINTS; i++)
|
||||
for (size_t i = SLOT(m_free_iterator) + 1; i <= SLOT(m_iterator + size) && i < SYNC_POINTS; i++)
|
||||
{
|
||||
glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
|
||||
glDeleteSync(fences[i]);
|
||||
}
|
||||
m_free_iterator = iter_end;
|
||||
m_free_iterator = m_iterator + size;
|
||||
|
||||
// if buffer is full
|
||||
if (iter_end >= m_size) {
|
||||
if (m_iterator + size >= m_size) {
|
||||
|
||||
// insert waiting slots in unused space at the end of the buffer
|
||||
for (size_t i = SLOT(m_used_iterator); i < SYNC_POINTS; i++)
|
||||
|
@ -95,54 +93,58 @@ void StreamBuffer::Alloc ( size_t size, u32 stride )
|
|||
}
|
||||
|
||||
// move to the start
|
||||
m_used_iterator = m_iterator_aligned = m_iterator = 0; // offset 0 is always aligned
|
||||
iter_end = size;
|
||||
m_used_iterator = m_iterator = 0; // offset 0 is always aligned
|
||||
|
||||
// wait for space at the start
|
||||
for (u32 i = 0; i <= SLOT(iter_end); i++)
|
||||
for (u32 i = 0; i <= SLOT(m_iterator + size); i++)
|
||||
{
|
||||
glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
|
||||
glDeleteSync(fences[i]);
|
||||
}
|
||||
m_free_iterator = iter_end;
|
||||
m_free_iterator = m_iterator + size;
|
||||
}
|
||||
|
||||
break;
|
||||
case BUFFERSUBDATA:
|
||||
case BUFFERDATA:
|
||||
m_iterator_aligned = 0;
|
||||
m_iterator = 0;
|
||||
break;
|
||||
}
|
||||
m_iterator = m_iterator_aligned;
|
||||
}
|
||||
|
||||
size_t StreamBuffer::Upload ( u8* data, size_t size )
|
||||
{
|
||||
// MAP_AND_* methods need to remap this buffer every time
|
||||
switch(m_uploadtype) {
|
||||
case MAP_AND_SYNC:
|
||||
case MAP_AND_ORPHAN:
|
||||
pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
if(pointer) {
|
||||
memcpy(pointer, data, size);
|
||||
glUnmapBuffer(m_buffertype);
|
||||
} else {
|
||||
ERROR_LOG(VIDEO, "Buffer mapping failed");
|
||||
}
|
||||
case MAP_AND_SYNC:
|
||||
pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size,
|
||||
GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT) - m_iterator;
|
||||
break;
|
||||
case PINNED_MEMORY:
|
||||
case BUFFERSTORAGE:
|
||||
if (pointer)
|
||||
memcpy(pointer + m_iterator, data, size);
|
||||
break;
|
||||
case BUFFERSUBDATA:
|
||||
glBufferSubData(m_buffertype, m_iterator, size, data);
|
||||
break;
|
||||
case BUFFERDATA:
|
||||
glBufferData(m_buffertype, size, data, GL_STREAM_DRAW);
|
||||
break;
|
||||
}
|
||||
return pointer + m_iterator;
|
||||
}
|
||||
|
||||
size_t StreamBuffer::Unmap(size_t used_size)
|
||||
{
|
||||
size_t ret = m_iterator;
|
||||
m_iterator += size;
|
||||
switch(m_uploadtype) {
|
||||
case MAP_AND_SYNC:
|
||||
case MAP_AND_ORPHAN:
|
||||
glFlushMappedBufferRange(m_buffertype, 0, used_size);
|
||||
glUnmapBuffer(m_buffertype);
|
||||
break;
|
||||
case PINNED_MEMORY:
|
||||
case BUFFERSTORAGE:
|
||||
case BUFFERSUBDATA:
|
||||
glBufferSubData(m_buffertype, 0, used_size, pointer);
|
||||
break;
|
||||
case BUFFERDATA:
|
||||
glBufferData(m_buffertype, used_size, pointer, GL_STREAM_DRAW);
|
||||
break;
|
||||
}
|
||||
m_iterator += used_size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -162,6 +164,7 @@ void StreamBuffer::Init()
|
|||
case BUFFERSUBDATA:
|
||||
glBindBuffer(m_buffertype, m_buffer);
|
||||
glBufferData(m_buffertype, m_size, NULL, GL_STREAM_DRAW);
|
||||
pointer = new u8[m_size];
|
||||
break;
|
||||
case PINNED_MEMORY:
|
||||
glGetError(); // errors before this allocation should be ignored
|
||||
|
@ -205,6 +208,7 @@ void StreamBuffer::Init()
|
|||
|
||||
case BUFFERDATA:
|
||||
glBindBuffer(m_buffertype, m_buffer);
|
||||
pointer = new u8[m_size];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -216,8 +220,10 @@ void StreamBuffer::Shutdown()
|
|||
DeleteFences();
|
||||
break;
|
||||
case MAP_AND_ORPHAN:
|
||||
break;
|
||||
case BUFFERSUBDATA:
|
||||
case BUFFERDATA:
|
||||
delete [] pointer;
|
||||
break;
|
||||
case PINNED_MEMORY:
|
||||
DeleteFences();
|
||||
|
|
|
@ -32,10 +32,10 @@ public:
|
|||
StreamBuffer(u32 type, size_t size);
|
||||
~StreamBuffer();
|
||||
|
||||
void Alloc(size_t size, u32 stride = 0);
|
||||
size_t Upload(u8 *data, size_t size);
|
||||
u8* Map(size_t size, u32 stride = 0);
|
||||
size_t Unmap(size_t used_size); // returns the offset of the beginning of the uploaded block
|
||||
|
||||
u32 getBuffer() { return m_buffer; }
|
||||
inline u32 getBuffer() { return m_buffer; }
|
||||
|
||||
private:
|
||||
void Init();
|
||||
|
|
|
@ -39,7 +39,7 @@ namespace OGL
|
|||
{
|
||||
//This are the initially requested size for the buffers expressed in bytes
|
||||
const u32 MAX_IBUFFER_SIZE = 2*1024*1024;
|
||||
const u32 MAX_VBUFFER_SIZE = 16*1024*1024;
|
||||
const u32 MAX_VBUFFER_SIZE = 32*1024*1024;
|
||||
|
||||
static StreamBuffer *s_vertexBuffer;
|
||||
static StreamBuffer *s_indexBuffer;
|
||||
|
@ -85,12 +85,14 @@ void VertexManager::PrepareDrawBuffers(u32 stride)
|
|||
u32 vertex_data_size = IndexGenerator::GetNumVerts() * stride;
|
||||
u32 index_data_size = IndexGenerator::GetIndexLen() * sizeof(u16);
|
||||
|
||||
s_vertexBuffer->Alloc(vertex_data_size, stride);
|
||||
size_t offset = s_vertexBuffer->Upload(GetVertexBuffer(), vertex_data_size);
|
||||
u8* buffer = s_vertexBuffer->Map(vertex_data_size, stride);
|
||||
memcpy(buffer, GetVertexBuffer(), vertex_data_size);
|
||||
size_t offset = s_vertexBuffer->Unmap(vertex_data_size);
|
||||
s_baseVertex = offset / stride;
|
||||
|
||||
s_indexBuffer->Alloc(index_data_size);
|
||||
s_index_offset = s_indexBuffer->Upload((u8*)GetIndexBuffer(), index_data_size);
|
||||
buffer = s_indexBuffer->Map(index_data_size);
|
||||
memcpy(buffer, GetIndexBuffer(), index_data_size);
|
||||
s_index_offset = s_indexBuffer->Unmap(index_data_size);
|
||||
|
||||
ADDSTAT(stats.thisFrame.bytesVertexStreamed, vertex_data_size);
|
||||
ADDSTAT(stats.thisFrame.bytesIndexStreamed, index_data_size);
|
||||
|
@ -234,4 +236,5 @@ void VertexManager::vFlush()
|
|||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
|
|
Loading…
Reference in New Issue