From 3859b0a091495275f46a01d3e22ca7f70ffdeea9 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Mon, 18 Jan 2016 20:57:36 -0600 Subject: [PATCH] Add an option for the draw batcher to collect vertex shader output. Disabled by default. --- src/xenia/gpu/gl4/draw_batcher.cc | 180 ++++++++++++++++++++++++++++-- src/xenia/gpu/gl4/draw_batcher.h | 22 ++++ 2 files changed, 192 insertions(+), 10 deletions(-) diff --git a/src/xenia/gpu/gl4/draw_batcher.cc b/src/xenia/gpu/gl4/draw_batcher.cc index d69ac7dad..7faa663a5 100644 --- a/src/xenia/gpu/gl4/draw_batcher.cc +++ b/src/xenia/gpu/gl4/draw_batcher.cc @@ -48,13 +48,92 @@ bool DrawBatcher::Initialize(CircularBuffer* array_data_buffer) { if (!state_buffer_.Initialize()) { return false; } + if (!InitializeTFB()) { + return false; + } + glBindBuffer(GL_DRAW_INDIRECT_BUFFER, command_buffer_.handle()); return true; } +// Initializes a transform feedback object +// We use this to capture vertex data straight from the vertex/geometry shader. +bool DrawBatcher::InitializeTFB() { + glGenBuffers(1, &tfvbo_); + if (!tfvbo_) { + return false; + } + + glGenTransformFeedbacks(1, &tfbo_); + if (!tfbo_) { + return false; + } + + glGenQueries(1, &tfqo_); + if (!tfqo_) { + return false; + } + + // TODO(DrChat): Calculate this based on the number of primitives drawn. + glBindBuffer(GL_ARRAY_BUFFER, tfvbo_); + glBufferData(GL_ARRAY_BUFFER, 16384 * 4, nullptr, GL_STATIC_READ); + glBindBuffer(GL_ARRAY_BUFFER, 0); + + return true; +} + +void DrawBatcher::ShutdownTFB() { + glDeleteBuffers(1, &tfvbo_); + glDeleteTransformFeedbacks(1, &tfbo_); + glDeleteQueries(1, &tfqo_); + + tfvbo_ = 0; + tfbo_ = 0; + tfqo_ = 0; +} + +size_t DrawBatcher::QueryTFBSize() { + if (!tfb_enabled_) { + return 0; + } + + size_t size = 0; + switch (tfb_prim_type_gl_) { + case GL_POINTS: + size = tfb_prim_count_ * 1 * 4 * 4; + break; + case GL_LINES: + size = tfb_prim_count_ * 2 * 4 * 4; + break; + case GL_TRIANGLES: + size = tfb_prim_count_ * 3 * 4 * 4; + break; + } + + return size; +} + +bool DrawBatcher::ReadbackTFB(void* buffer, size_t size) { + if (!tfb_enabled_) { + XELOGW("DrawBatcher::ReadbackTFB called when TFB was disabled!"); + return false; + } + + glBindBuffer(GL_ARRAY_BUFFER, tfvbo_); + void* data = glMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_READ_BIT); + + std::memcpy(buffer, data, size); + + glUnmapBuffer(GL_ARRAY_BUFFER); + glBindBuffer(GL_ARRAY_BUFFER, 0); + + return true; +} + void DrawBatcher::Shutdown() { command_buffer_.Shutdown(); state_buffer_.Shutdown(); + ShutdownTFB(); } bool DrawBatcher::ReconfigurePipeline(GL4Shader* vertex_shader, @@ -242,6 +321,84 @@ bool DrawBatcher::CommitDraw() { return true; } +void DrawBatcher::TFBBegin(PrimitiveType prim_type) { + if (!tfb_enabled_) { + return; + } + + // Translate the primitive typename to something compatible with TFB. + GLenum gl_prim_type = 0; + switch (prim_type) { + case PrimitiveType::kLineList: + gl_prim_type = GL_LINES; + break; + case PrimitiveType::kLineStrip: + gl_prim_type = GL_LINES; + break; + case PrimitiveType::kLineLoop: + gl_prim_type = GL_LINES; + break; + case PrimitiveType::kPointList: + // The geometry shader associated with this writes out triangles. + gl_prim_type = GL_TRIANGLES; + break; + case PrimitiveType::kTriangleList: + gl_prim_type = GL_TRIANGLES; + break; + case PrimitiveType::kTriangleStrip: + gl_prim_type = GL_TRIANGLES; + break; + case PrimitiveType::kRectangleList: + gl_prim_type = GL_TRIANGLES; + break; + case PrimitiveType::kTriangleFan: + gl_prim_type = GL_TRIANGLES; + break; + case PrimitiveType::kQuadList: + // FIXME: In some cases the geometry shader will output lines. + // See: GL4CommandProcessor::UpdateShaders + gl_prim_type = GL_TRIANGLES; + break; + default: + assert_unhandled_case(prim_type); + break; + } + + // TODO(DrChat): Resize the TFVBO here. + // Could draw a 2nd time with the rasterizer disabled once we have a primitive + // count. + + tfb_prim_type_ = prim_type; + tfb_prim_type_gl_ = gl_prim_type; + + glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, tfbo_); + + // Bind the buffer to the TFB object. + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, tfvbo_); + + // Begin a query for # prims written + glBeginQueryIndexed(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, 0, tfqo_); + + // Begin capturing. + glBeginTransformFeedback(gl_prim_type); +} + +void DrawBatcher::TFBEnd() { + if (!tfb_enabled_) { + return; + } + + glEndTransformFeedback(); + glEndQueryIndexed(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, 0); + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, 0); + glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0); + + // Cache the query size as query objects aren't shared. + GLint prim_count = 0; + glGetQueryObjectiv(tfqo_, GL_QUERY_RESULT, &prim_count); + tfb_prim_count_ = prim_count; +} + bool DrawBatcher::Flush(FlushMode mode) { GLboolean cull_enabled = 0; if (batch_state_.draw_count) { @@ -263,6 +420,7 @@ bool DrawBatcher::Flush(FlushMode mode) { batch_state_.state_range_length); GLenum prim_type = 0; + bool valid_prim = true; switch (batch_state_.prim_type) { case PrimitiveType::kPointList: prim_type = GL_POINTS; @@ -291,8 +449,6 @@ bool DrawBatcher::Flush(FlushMode mode) { // assert_true( // (register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 // & 0x3) == 0); - cull_enabled = glIsEnabled(GL_CULL_FACE); - glDisable(GL_CULL_FACE); break; case PrimitiveType::kQuadList: prim_type = GL_LINES_ADJACENCY; @@ -300,17 +456,21 @@ bool DrawBatcher::Flush(FlushMode mode) { default: case PrimitiveType::kUnknown0x07: prim_type = GL_POINTS; + valid_prim = false; XELOGE("unsupported primitive type %d", batch_state_.prim_type); assert_unhandled_case(batch_state_.prim_type); - DiscardDraw(); - return false; + break; } // Fast path for single draws. void* indirect_offset = reinterpret_cast(batch_state_.command_range_start); - if (batch_state_.draw_count == 1) { + if (tfb_enabled_) { + TFBBegin(batch_state_.prim_type); + } + + if (valid_prim && batch_state_.draw_count == 1) { // Fast path for one draw. Removes MDI overhead when not required. if (batch_state_.indexed) { auto& cmd = active_draw_.draw_elements_cmd; @@ -326,7 +486,7 @@ bool DrawBatcher::Flush(FlushMode mode) { cmd->count, cmd->instance_count, cmd->base_instance); } - } else { + } else if (valid_prim) { // Full multi-draw. if (batch_state_.indexed) { glMultiDrawElementsIndirect(prim_type, batch_state_.index_type, @@ -339,6 +499,10 @@ bool DrawBatcher::Flush(FlushMode mode) { } } + if (tfb_enabled_) { + TFBEnd(); + } + batch_state_.command_range_start = UINTPTR_MAX; batch_state_.command_range_length = 0; batch_state_.state_range_start = UINTPTR_MAX; @@ -346,10 +510,6 @@ bool DrawBatcher::Flush(FlushMode mode) { batch_state_.draw_count = 0; } - if (batch_state_.prim_type == PrimitiveType::kRectangleList && cull_enabled) { - glEnable(GL_CULL_FACE); - } - if (mode == FlushMode::kReconfigure) { // Reset - we'll update it as soon as we have all the information. batch_state_.needs_reconfigure = true; diff --git a/src/xenia/gpu/gl4/draw_batcher.h b/src/xenia/gpu/gl4/draw_batcher.h index 902b049e4..fdecfb9da 100644 --- a/src/xenia/gpu/gl4/draw_batcher.h +++ b/src/xenia/gpu/gl4/draw_batcher.h @@ -99,7 +99,21 @@ class DrawBatcher { bool CommitDraw(); bool Flush(FlushMode mode); + // TFB - Filled with vertex shader output from the last flush. + size_t QueryTFBSize(); + bool ReadbackTFB(void* buffer, size_t size); + + GLuint tfvbo() { return tfvbo_; } + bool is_tfb_enabled() const { return tfb_enabled_; } + void set_tfb_enabled(bool enabled) { tfb_enabled_ = enabled; } + private: + bool InitializeTFB(); + void ShutdownTFB(); + + void TFBBegin(PrimitiveType prim_type); + void TFBEnd(); + bool BeginDraw(); void CopyConstants(); @@ -108,6 +122,14 @@ class DrawBatcher { CircularBuffer state_buffer_; CircularBuffer* array_data_buffer_; + GLuint tfbo_ = 0; + GLuint tfvbo_ = 0; + GLuint tfqo_ = 0; + PrimitiveType tfb_prim_type_ = PrimitiveType::kNone; + GLenum tfb_prim_type_gl_ = 0; + GLint tfb_prim_count_ = 0; + bool tfb_enabled_ = false; + struct BatchState { bool needs_reconfigure; PrimitiveType prim_type;