From 5236477043feaeeb61c4f5a23d7db7b2b83452f2 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Fri, 2 Jan 2015 18:01:48 -0800 Subject: [PATCH] Batch flushing buffer. --- src/xenia/gpu/gl4/circular_buffer.cc | 27 +++++++++++++++++++---- src/xenia/gpu/gl4/circular_buffer.h | 5 +++++ src/xenia/gpu/gl4/command_processor.cc | 1 + src/xenia/gpu/gl4/gl4_profiler_display.cc | 8 ++++++- 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/xenia/gpu/gl4/circular_buffer.cc b/src/xenia/gpu/gl4/circular_buffer.cc index 92ce0e643..537cd5604 100644 --- a/src/xenia/gpu/gl4/circular_buffer.cc +++ b/src/xenia/gpu/gl4/circular_buffer.cc @@ -24,6 +24,8 @@ CircularBuffer::CircularBuffer(size_t capacity, size_t alignment) : capacity_(capacity), alignment_(alignment), write_head_(0), + dirty_start_(UINT64_MAX), + dirty_end_(0), buffer_(0), gpu_base_(0), host_base_(nullptr) {} @@ -63,12 +65,15 @@ void CircularBuffer::Shutdown() { buffer_ = 0; } +bool CircularBuffer::CanAcquire(size_t length) { + size_t aligned_length = poly::round_up(length, alignment_); + return write_head_ + aligned_length <= capacity_; +} + CircularBuffer::Allocation CircularBuffer::Acquire(size_t length) { // Addresses must always be % 256. size_t aligned_length = poly::round_up(length, alignment_); - assert_true(aligned_length <= capacity_, "Request too large"); - if (write_head_ + aligned_length > capacity_) { // Flush and wait. WaitUntilClean(); @@ -79,16 +84,30 @@ CircularBuffer::Allocation CircularBuffer::Acquire(size_t length) { allocation.gpu_ptr = gpu_base_ + write_head_; allocation.offset = write_head_; allocation.length = length; + allocation.aligned_length = aligned_length; write_head_ += aligned_length; return allocation; } void CircularBuffer::Commit(Allocation allocation) { - glFlushMappedNamedBufferRange(buffer_, allocation.gpu_ptr - gpu_base_, - allocation.length); + uintptr_t start = allocation.gpu_ptr - gpu_base_; + uintptr_t end = start + allocation.aligned_length; + dirty_start_ = std::min(dirty_start_, start); + dirty_end_ = std::max(dirty_end_, end); +} + +void CircularBuffer::Flush() { + if (dirty_start_ == dirty_end_ || dirty_start_ == UINT64_MAX) { + return; + } + glFlushMappedNamedBufferRange(buffer_, dirty_start_, + dirty_end_ - dirty_start_); + dirty_start_ = UINT64_MAX; + dirty_end_ = 0; } void CircularBuffer::WaitUntilClean() { + Flush(); glFinish(); write_head_ = 0; } diff --git a/src/xenia/gpu/gl4/circular_buffer.h b/src/xenia/gpu/gl4/circular_buffer.h index 777f60274..2db1d639c 100644 --- a/src/xenia/gpu/gl4/circular_buffer.h +++ b/src/xenia/gpu/gl4/circular_buffer.h @@ -28,6 +28,7 @@ class CircularBuffer { GLuint64 gpu_ptr; size_t offset; size_t length; + size_t aligned_length; }; bool Initialize(); @@ -35,8 +36,10 @@ class CircularBuffer { GLuint handle() const { return buffer_; } + bool CanAcquire(size_t length); Allocation Acquire(size_t length); void Commit(Allocation allocation); + void Flush(); void WaitUntilClean(); @@ -44,6 +47,8 @@ class CircularBuffer { size_t capacity_; size_t alignment_; uintptr_t write_head_; + uintptr_t dirty_start_; + uintptr_t dirty_end_; GLuint buffer_; GLuint64 gpu_base_; uint8_t* host_base_; diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index 64029c914..50b562051 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -1428,6 +1428,7 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, scratch_buffer_.handle(), allocation.offset, allocation.length); scratch_buffer_.Commit(std::move(allocation)); + scratch_buffer_.Flush(); if (cmd.index_buffer.address) { // Indexed draw. diff --git a/src/xenia/gpu/gl4/gl4_profiler_display.cc b/src/xenia/gpu/gl4/gl4_profiler_display.cc index 37d4b1f67..f60723cce 100644 --- a/src/xenia/gpu/gl4/gl4_profiler_display.cc +++ b/src/xenia/gpu/gl4/gl4_profiler_display.cc @@ -382,7 +382,11 @@ GL4ProfilerDisplay::Vertex* GL4ProfilerDisplay::BeginVertices(size_t count) { if (draw_command_count_ + 1 > kMaxCommands) { Flush(); } - current_allocation_ = vertex_buffer_.Acquire(sizeof(Vertex) * count); + size_t total_length = sizeof(Vertex) * count; + if (!vertex_buffer_.CanAcquire(total_length)) { + Flush(); + } + current_allocation_ = vertex_buffer_.Acquire(total_length); return reinterpret_cast(current_allocation_.host_ptr); } @@ -408,12 +412,14 @@ void GL4ProfilerDisplay::Flush() { if (!draw_command_count_) { return; } + vertex_buffer_.Flush(); for (size_t i = 0; i < draw_command_count_; ++i) { glDrawArrays(draw_commands_[i].prim_type, GLint(draw_commands_[i].vertex_offset), GLsizei(draw_commands_[i].vertex_count)); } draw_command_count_ = 0; + // TODO(benvanik): don't finish here. vertex_buffer_.WaitUntilClean(); }