MultiDrawIndirect draw batching - now down to <20us per draw.
This commit is contained in:
parent
eda38a7428
commit
4fcf9c6229
|
@ -35,6 +35,8 @@ class CircularBuffer {
|
||||||
void Shutdown();
|
void Shutdown();
|
||||||
|
|
||||||
GLuint handle() const { return buffer_; }
|
GLuint handle() const { return buffer_; }
|
||||||
|
GLuint64 gpu_handle() const { return gpu_base_; }
|
||||||
|
size_t capacity() const { return capacity_; }
|
||||||
|
|
||||||
bool CanAcquire(size_t length);
|
bool CanAcquire(size_t length);
|
||||||
Allocation Acquire(size_t length);
|
Allocation Acquire(size_t length);
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -18,6 +18,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <xenia/gpu/gl4/circular_buffer.h>
|
#include <xenia/gpu/gl4/circular_buffer.h>
|
||||||
|
#include <xenia/gpu/gl4/draw_batcher.h>
|
||||||
#include <xenia/gpu/gl4/gl_context.h>
|
#include <xenia/gpu/gl4/gl_context.h>
|
||||||
#include <xenia/gpu/gl4/gl4_shader.h>
|
#include <xenia/gpu/gl4/gl4_shader.h>
|
||||||
#include <xenia/gpu/gl4/texture_cache.h>
|
#include <xenia/gpu/gl4/texture_cache.h>
|
||||||
|
@ -41,73 +42,6 @@ struct SwapParameters {
|
||||||
GLenum attachment;
|
GLenum attachment;
|
||||||
};
|
};
|
||||||
|
|
||||||
// This must match the layout in gl4_shader.cc.
|
|
||||||
struct UniformDataBlock {
|
|
||||||
union float4 {
|
|
||||||
float v[4];
|
|
||||||
struct {
|
|
||||||
float x, y, z, w;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
float4 window_offset; // tx,ty,sx,sy
|
|
||||||
float4 window_scissor; // x0,y0,x1,y1
|
|
||||||
float4 vtx_fmt;
|
|
||||||
float4 viewport_offset; // tx,ty,tz,?
|
|
||||||
float4 viewport_scale; // sx,sy,sz,?
|
|
||||||
// TODO(benvanik): vertex format xyzw?
|
|
||||||
|
|
||||||
float4 alpha_test; // alpha test enable, func, ref, ?
|
|
||||||
|
|
||||||
// TODO(benvanik): pack tightly
|
|
||||||
uint64_t texture_samplers[32];
|
|
||||||
|
|
||||||
// Register data from 0x4000 to 0x4927.
|
|
||||||
// UpdateConstants relies on the packing of these.
|
|
||||||
struct {
|
|
||||||
// SHADER_CONSTANT_000_X...
|
|
||||||
float4 float_consts[512];
|
|
||||||
// SHADER_CONSTANT_FETCH_00_0 is omitted
|
|
||||||
// SHADER_CONSTANT_BOOL_000_031...
|
|
||||||
int32_t bool_consts[8];
|
|
||||||
// SHADER_CONSTANT_LOOP_00...
|
|
||||||
int32_t loop_consts[32];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
static_assert(sizeof(UniformDataBlock) <= 16 * 1024, "Need <=16k uniform data");
|
|
||||||
|
|
||||||
// TODO(benvanik): move more of the enums in here?
|
|
||||||
struct DrawCommand {
|
|
||||||
PrimitiveType prim_type;
|
|
||||||
uint32_t start_index;
|
|
||||||
uint32_t min_index;
|
|
||||||
uint32_t max_index;
|
|
||||||
uint32_t index_count;
|
|
||||||
uint32_t base_vertex;
|
|
||||||
|
|
||||||
// Index buffer, if present.
|
|
||||||
// If index_count > 0 but buffer is nullptr then auto draw.
|
|
||||||
struct {
|
|
||||||
const uint8_t* address;
|
|
||||||
size_t size;
|
|
||||||
xenos::Endian endianness;
|
|
||||||
xenos::IndexFormat format;
|
|
||||||
size_t buffer_offset;
|
|
||||||
} index_buffer;
|
|
||||||
|
|
||||||
// Texture samplers.
|
|
||||||
struct SamplerInput {
|
|
||||||
uint32_t input_index;
|
|
||||||
// TextureResource* texture;
|
|
||||||
// SamplerStateResource* sampler_state;
|
|
||||||
};
|
|
||||||
SamplerInput vertex_shader_samplers[32];
|
|
||||||
SamplerInput pixel_shader_samplers[32];
|
|
||||||
|
|
||||||
// NOTE: do not read from this - the mapped memory is likely write combined.
|
|
||||||
UniformDataBlock* state_data;
|
|
||||||
};
|
|
||||||
|
|
||||||
class CommandProcessor {
|
class CommandProcessor {
|
||||||
public:
|
public:
|
||||||
CommandProcessor(GL4GraphicsSystem* graphics_system);
|
CommandProcessor(GL4GraphicsSystem* graphics_system);
|
||||||
|
@ -241,22 +175,19 @@ class CommandProcessor {
|
||||||
bool LoadShader(ShaderType shader_type, const uint32_t* address,
|
bool LoadShader(ShaderType shader_type, const uint32_t* address,
|
||||||
uint32_t dword_count);
|
uint32_t dword_count);
|
||||||
|
|
||||||
void PrepareDraw(DrawCommand* draw_command);
|
bool IssueDraw();
|
||||||
bool IssueDraw(DrawCommand* draw_command);
|
UpdateStatus UpdateShaders(PrimitiveType prim_type);
|
||||||
UpdateStatus UpdateRenderTargets(DrawCommand* draw_command);
|
UpdateStatus UpdateRenderTargets();
|
||||||
UpdateStatus UpdateState(DrawCommand* draw_command);
|
UpdateStatus UpdateState();
|
||||||
UpdateStatus UpdateViewportState(DrawCommand* draw_command);
|
UpdateStatus UpdateViewportState();
|
||||||
UpdateStatus UpdateRasterizerState(DrawCommand* draw_command);
|
UpdateStatus UpdateRasterizerState();
|
||||||
UpdateStatus UpdateBlendState(DrawCommand* draw_command);
|
UpdateStatus UpdateBlendState();
|
||||||
UpdateStatus UpdateDepthStencilState(DrawCommand* draw_command);
|
UpdateStatus UpdateDepthStencilState();
|
||||||
UpdateStatus UpdateConstants(DrawCommand* draw_command);
|
UpdateStatus PopulateIndexBuffer();
|
||||||
UpdateStatus UpdateShaders(DrawCommand* draw_command);
|
UpdateStatus PopulateVertexBuffers();
|
||||||
UpdateStatus PopulateIndexBuffer(DrawCommand* draw_command);
|
UpdateStatus PopulateSamplers();
|
||||||
UpdateStatus PopulateVertexBuffers(DrawCommand* draw_command);
|
UpdateStatus PopulateSampler(const Shader::SamplerDesc& desc);
|
||||||
UpdateStatus PopulateSamplers(DrawCommand* draw_command);
|
bool IssueCopy();
|
||||||
UpdateStatus PopulateSampler(DrawCommand* draw_command,
|
|
||||||
const Shader::SamplerDesc& desc);
|
|
||||||
bool IssueCopy(DrawCommand* draw_command);
|
|
||||||
|
|
||||||
CachedFramebuffer* GetFramebuffer(GLuint color_targets[4],
|
CachedFramebuffer* GetFramebuffer(GLuint color_targets[4],
|
||||||
GLuint depth_target);
|
GLuint depth_target);
|
||||||
|
@ -306,21 +237,23 @@ class CommandProcessor {
|
||||||
std::vector<CachedDepthRenderTarget> cached_depth_render_targets_;
|
std::vector<CachedDepthRenderTarget> cached_depth_render_targets_;
|
||||||
std::vector<std::unique_ptr<CachedPipeline>> all_pipelines_;
|
std::vector<std::unique_ptr<CachedPipeline>> all_pipelines_;
|
||||||
std::unordered_map<uint64_t, CachedPipeline*> cached_pipelines_;
|
std::unordered_map<uint64_t, CachedPipeline*> cached_pipelines_;
|
||||||
GLuint vertex_array_;
|
|
||||||
GLuint point_list_geometry_program_;
|
GLuint point_list_geometry_program_;
|
||||||
GLuint rect_list_geometry_program_;
|
GLuint rect_list_geometry_program_;
|
||||||
GLuint quad_list_geometry_program_;
|
GLuint quad_list_geometry_program_;
|
||||||
|
struct {
|
||||||
|
xenos::IndexFormat format;
|
||||||
|
xenos::Endian endianness;
|
||||||
|
uint32_t count;
|
||||||
|
uint32_t guest_base;
|
||||||
|
size_t length;
|
||||||
|
uint32_t max_index_found;
|
||||||
|
} index_buffer_info_;
|
||||||
|
uint32_t draw_index_count_;
|
||||||
|
|
||||||
TextureCache texture_cache_;
|
TextureCache texture_cache_;
|
||||||
|
|
||||||
|
DrawBatcher draw_batcher_;
|
||||||
CircularBuffer scratch_buffer_;
|
CircularBuffer scratch_buffer_;
|
||||||
struct ScratchBufferStats {
|
|
||||||
size_t total_state_data_size = 0;
|
|
||||||
size_t total_indices_size = 0;
|
|
||||||
size_t total_vertices_size = 0;
|
|
||||||
} scratch_buffer_stats_;
|
|
||||||
|
|
||||||
DrawCommand draw_command_;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool SetShadowRegister(uint32_t& dest, uint32_t register_name);
|
bool SetShadowRegister(uint32_t& dest, uint32_t register_name);
|
||||||
|
@ -341,7 +274,6 @@ class CommandProcessor {
|
||||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||||
} update_render_targets_regs_;
|
} update_render_targets_regs_;
|
||||||
struct UpdateViewportStateRegisters {
|
struct UpdateViewportStateRegisters {
|
||||||
//
|
|
||||||
UpdateViewportStateRegisters() { Reset(); }
|
UpdateViewportStateRegisters() { Reset(); }
|
||||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||||
} update_viewport_state_regs_;
|
} update_viewport_state_regs_;
|
||||||
|
@ -367,7 +299,6 @@ class CommandProcessor {
|
||||||
UpdateDepthStencilStateRegisters() { Reset(); }
|
UpdateDepthStencilStateRegisters() { Reset(); }
|
||||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||||
} update_depth_stencil_state_regs_;
|
} update_depth_stencil_state_regs_;
|
||||||
// TODO(benvanik): constant bitmask?
|
|
||||||
struct UpdateShadersRegisters {
|
struct UpdateShadersRegisters {
|
||||||
PrimitiveType prim_type;
|
PrimitiveType prim_type;
|
||||||
uint32_t sq_program_cntl;
|
uint32_t sq_program_cntl;
|
||||||
|
@ -380,9 +311,6 @@ class CommandProcessor {
|
||||||
vertex_shader = pixel_shader = nullptr;
|
vertex_shader = pixel_shader = nullptr;
|
||||||
}
|
}
|
||||||
} update_shaders_regs_;
|
} update_shaders_regs_;
|
||||||
// ib
|
|
||||||
// vb
|
|
||||||
// samplers
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace gl4
|
} // namespace gl4
|
||||||
|
|
|
@ -0,0 +1,384 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <xenia/gpu/gl4/draw_batcher.h>
|
||||||
|
|
||||||
|
#include <poly/cxx_compat.h>
|
||||||
|
#include <poly/math.h>
|
||||||
|
#include <xenia/gpu/gl4/gl4_gpu-private.h>
|
||||||
|
#include <xenia/gpu/gpu-private.h>
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace gl4 {
|
||||||
|
|
||||||
|
using namespace xe::gpu::xenos;
|
||||||
|
|
||||||
|
extern "C" GLEWContext* glewGetContext();
|
||||||
|
|
||||||
|
const size_t kCommandBufferCapacity = 16 * (1024 * 1024);
|
||||||
|
const size_t kCommandBufferAlignment = 4;
|
||||||
|
const size_t kStateBufferCapacity = 64 * (1024 * 1024);
|
||||||
|
const size_t kStateBufferAlignment = 256;
|
||||||
|
|
||||||
|
DrawBatcher::DrawBatcher(RegisterFile* register_file)
|
||||||
|
: register_file_(register_file),
|
||||||
|
command_buffer_(kCommandBufferCapacity, kCommandBufferAlignment),
|
||||||
|
state_buffer_(kStateBufferCapacity, kStateBufferAlignment),
|
||||||
|
array_data_buffer_(nullptr),
|
||||||
|
has_bindless_mdi_(false),
|
||||||
|
draw_open_(false) {
|
||||||
|
std::memset(&batch_state_, 0, sizeof(batch_state_));
|
||||||
|
batch_state_.needs_reconfigure = true;
|
||||||
|
batch_state_.command_range_start = batch_state_.state_range_start =
|
||||||
|
UINTPTR_MAX;
|
||||||
|
std::memset(&active_draw_, 0, sizeof(active_draw_));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DrawBatcher::Initialize(CircularBuffer* array_data_buffer) {
|
||||||
|
array_data_buffer_ = array_data_buffer;
|
||||||
|
if (!command_buffer_.Initialize()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!state_buffer_.Initialize()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, command_buffer_.handle());
|
||||||
|
if (FLAGS_vendor_gl_extensions && GLEW_NV_bindless_multi_draw_indirect) {
|
||||||
|
has_bindless_mdi_ = true;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawBatcher::Shutdown() {
|
||||||
|
command_buffer_.Shutdown();
|
||||||
|
state_buffer_.Shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DrawBatcher::ReconfigurePipeline(GL4Shader* vertex_shader,
|
||||||
|
GL4Shader* pixel_shader,
|
||||||
|
GLuint pipeline) {
|
||||||
|
if (batch_state_.pipeline == pipeline) {
|
||||||
|
// No-op.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!Flush(FlushMode::kReconfigure)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
batch_state_.vertex_shader = vertex_shader;
|
||||||
|
batch_state_.pixel_shader = pixel_shader;
|
||||||
|
batch_state_.pipeline = pipeline;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DrawBatcher::BeginDrawArrays(PrimitiveType prim_type,
|
||||||
|
uint32_t index_count) {
|
||||||
|
assert_false(draw_open_);
|
||||||
|
if (batch_state_.prim_type != prim_type || batch_state_.indexed) {
|
||||||
|
if (!Flush(FlushMode::kReconfigure)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
batch_state_.prim_type = prim_type;
|
||||||
|
batch_state_.indexed = false;
|
||||||
|
|
||||||
|
if (!BeginDraw()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto cmd = active_draw_.draw_arrays_cmd;
|
||||||
|
cmd->base_instance = 0;
|
||||||
|
cmd->instance_count = 1;
|
||||||
|
cmd->count = index_count;
|
||||||
|
cmd->first_index = 0;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DrawBatcher::BeginDrawElements(PrimitiveType prim_type,
|
||||||
|
uint32_t index_count,
|
||||||
|
IndexFormat index_format) {
|
||||||
|
assert_false(draw_open_);
|
||||||
|
GLenum index_type =
|
||||||
|
index_format == IndexFormat::kInt32 ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT;
|
||||||
|
if (batch_state_.prim_type != prim_type || !batch_state_.indexed ||
|
||||||
|
batch_state_.index_type != index_type) {
|
||||||
|
if (!Flush(FlushMode::kReconfigure)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
batch_state_.prim_type = prim_type;
|
||||||
|
batch_state_.indexed = true;
|
||||||
|
batch_state_.index_type = index_type;
|
||||||
|
|
||||||
|
if (!BeginDraw()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t start_index = register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32;
|
||||||
|
assert_zero(start_index);
|
||||||
|
|
||||||
|
auto cmd = active_draw_.draw_elements_cmd;
|
||||||
|
cmd->base_instance = 0;
|
||||||
|
cmd->instance_count = 1;
|
||||||
|
cmd->count = index_count;
|
||||||
|
cmd->first_index = start_index;
|
||||||
|
cmd->base_vertex = 0;
|
||||||
|
|
||||||
|
if (has_bindless_mdi_) {
|
||||||
|
auto bindless_cmd = active_draw_.draw_elements_bindless_cmd;
|
||||||
|
bindless_cmd->reserved_zero = 0;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DrawBatcher::BeginDraw() {
|
||||||
|
draw_open_ = true;
|
||||||
|
|
||||||
|
if (batch_state_.needs_reconfigure) {
|
||||||
|
batch_state_.needs_reconfigure = false;
|
||||||
|
// Have been reconfigured since last draw - need to compute state size.
|
||||||
|
// Layout:
|
||||||
|
// [draw command]
|
||||||
|
// [common header]
|
||||||
|
// [consts]
|
||||||
|
|
||||||
|
// Padded to max.
|
||||||
|
GLsizei command_size = 0;
|
||||||
|
if (has_bindless_mdi_) {
|
||||||
|
if (batch_state_.indexed) {
|
||||||
|
command_size = sizeof(DrawElementsIndirectBindlessCommandNV);
|
||||||
|
} else {
|
||||||
|
command_size = sizeof(DrawArraysIndirectBindlessCommandNV);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (batch_state_.indexed) {
|
||||||
|
command_size = sizeof(DrawElementsIndirectCommand);
|
||||||
|
} else {
|
||||||
|
command_size = sizeof(DrawArraysIndirectCommand);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
batch_state_.command_stride =
|
||||||
|
poly::round_up(command_size, GLsizei(kCommandBufferAlignment));
|
||||||
|
|
||||||
|
GLsizei header_size = sizeof(CommonHeader);
|
||||||
|
|
||||||
|
// TODO(benvanik); consts sizing.
|
||||||
|
// GLsizei float_consts_size = sizeof(float4) * 512;
|
||||||
|
// GLsizei bool_consts_size = sizeof(uint32_t) * 8;
|
||||||
|
// GLsizei loop_consts_size = sizeof(uint32_t) * 32;
|
||||||
|
// GLsizei consts_size =
|
||||||
|
// float_consts_size + bool_consts_size + loop_consts_size;
|
||||||
|
// batch_state_.float_consts_offset = batch_state_.header_offset +
|
||||||
|
// header_size;
|
||||||
|
// batch_state_.bool_consts_offset =
|
||||||
|
// batch_state_.float_consts_offset + float_consts_size;
|
||||||
|
// batch_state_.loop_consts_offset =
|
||||||
|
// batch_state_.bool_consts_offset + bool_consts_size;
|
||||||
|
GLsizei consts_size = 0;
|
||||||
|
|
||||||
|
batch_state_.state_stride = header_size + consts_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate a command data block.
|
||||||
|
// We should treat it as write-only.
|
||||||
|
if (!command_buffer_.CanAcquire(batch_state_.command_stride)) {
|
||||||
|
Flush(FlushMode::kMakeCoherent);
|
||||||
|
}
|
||||||
|
active_draw_.command_allocation =
|
||||||
|
command_buffer_.Acquire(batch_state_.command_stride);
|
||||||
|
assert_not_null(active_draw_.command_allocation.host_ptr);
|
||||||
|
|
||||||
|
// Allocate a state data block.
|
||||||
|
// We should treat it as write-only.
|
||||||
|
if (!state_buffer_.CanAcquire(batch_state_.state_stride)) {
|
||||||
|
Flush(FlushMode::kMakeCoherent);
|
||||||
|
}
|
||||||
|
active_draw_.state_allocation =
|
||||||
|
state_buffer_.Acquire(batch_state_.state_stride);
|
||||||
|
assert_not_null(active_draw_.state_allocation.host_ptr);
|
||||||
|
|
||||||
|
active_draw_.command_address =
|
||||||
|
reinterpret_cast<uintptr_t>(active_draw_.command_allocation.host_ptr);
|
||||||
|
auto state_host_ptr =
|
||||||
|
reinterpret_cast<uintptr_t>(active_draw_.state_allocation.host_ptr);
|
||||||
|
active_draw_.header = reinterpret_cast<CommonHeader*>(state_host_ptr);
|
||||||
|
// active_draw_.float_consts =
|
||||||
|
// reinterpret_cast<float4*>(state_host_ptr +
|
||||||
|
// batch_state_.float_consts_offset);
|
||||||
|
// active_draw_.bool_consts =
|
||||||
|
// reinterpret_cast<uint32_t*>(state_host_ptr +
|
||||||
|
// batch_state_.bool_consts_offset);
|
||||||
|
// active_draw_.loop_consts =
|
||||||
|
// reinterpret_cast<uint32_t*>(state_host_ptr +
|
||||||
|
// batch_state_.loop_consts_offset);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawBatcher::DiscardDraw() {
|
||||||
|
if (!draw_open_) {
|
||||||
|
// No-op.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
draw_open_ = false;
|
||||||
|
|
||||||
|
command_buffer_.Discard(std::move(active_draw_.command_allocation));
|
||||||
|
state_buffer_.Discard(std::move(active_draw_.state_allocation));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DrawBatcher::CommitDraw() {
|
||||||
|
assert_true(draw_open_);
|
||||||
|
draw_open_ = false;
|
||||||
|
|
||||||
|
// Copy over required constants.
|
||||||
|
CopyConstants();
|
||||||
|
|
||||||
|
if (batch_state_.state_range_start == UINTPTR_MAX) {
|
||||||
|
batch_state_.command_range_start = active_draw_.command_allocation.offset;
|
||||||
|
batch_state_.state_range_start = active_draw_.state_allocation.offset;
|
||||||
|
}
|
||||||
|
batch_state_.command_range_length +=
|
||||||
|
active_draw_.command_allocation.aligned_length;
|
||||||
|
batch_state_.state_range_length +=
|
||||||
|
active_draw_.state_allocation.aligned_length;
|
||||||
|
|
||||||
|
command_buffer_.Commit(std::move(active_draw_.command_allocation));
|
||||||
|
state_buffer_.Commit(std::move(active_draw_.state_allocation));
|
||||||
|
|
||||||
|
++batch_state_.draw_count;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DrawBatcher::Flush(FlushMode mode) {
|
||||||
|
if (batch_state_.draw_count) {
|
||||||
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
|
||||||
|
assert_not_zero(batch_state_.command_stride);
|
||||||
|
assert_not_zero(batch_state_.state_stride);
|
||||||
|
|
||||||
|
// Flush pending buffer changes.
|
||||||
|
command_buffer_.Flush();
|
||||||
|
state_buffer_.Flush();
|
||||||
|
array_data_buffer_->Flush();
|
||||||
|
|
||||||
|
// State data is indexed by draw ID.
|
||||||
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, state_buffer_.handle(),
|
||||||
|
batch_state_.state_range_start,
|
||||||
|
batch_state_.state_range_length);
|
||||||
|
|
||||||
|
GLenum prim_type = 0;
|
||||||
|
switch (batch_state_.prim_type) {
|
||||||
|
case PrimitiveType::kPointList:
|
||||||
|
prim_type = GL_POINTS;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kLineList:
|
||||||
|
prim_type = GL_LINES;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kLineStrip:
|
||||||
|
prim_type = GL_LINE_STRIP;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kLineLoop:
|
||||||
|
prim_type = GL_LINE_LOOP;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kTriangleList:
|
||||||
|
prim_type = GL_TRIANGLES;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kTriangleStrip:
|
||||||
|
prim_type = GL_TRIANGLE_STRIP;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kTriangleFan:
|
||||||
|
prim_type = GL_TRIANGLE_FAN;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kRectangleList:
|
||||||
|
prim_type = GL_TRIANGLE_STRIP;
|
||||||
|
// Rect lists aren't culled. There may be other things they skip too.
|
||||||
|
// assert_true((register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32
|
||||||
|
// &
|
||||||
|
// 0x3) == 0);
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kQuadList:
|
||||||
|
prim_type = GL_LINES_ADJACENCY;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
case PrimitiveType::kUnknown0x07:
|
||||||
|
prim_type = GL_POINTS;
|
||||||
|
XELOGE("unsupported primitive type %d", batch_state_.prim_type);
|
||||||
|
assert_unhandled_case(batch_state_.prim_type);
|
||||||
|
DiscardDraw();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fast path for single draws.
|
||||||
|
void* indirect_offset =
|
||||||
|
reinterpret_cast<void*>(batch_state_.command_range_start);
|
||||||
|
|
||||||
|
if (has_bindless_mdi_) {
|
||||||
|
int vertex_buffer_count =
|
||||||
|
batch_state_.vertex_shader->buffer_inputs().total_elements_count;
|
||||||
|
assert_true(vertex_buffer_count < 8);
|
||||||
|
if (batch_state_.indexed) {
|
||||||
|
glMultiDrawElementsIndirectBindlessNV(
|
||||||
|
prim_type, batch_state_.index_type, indirect_offset,
|
||||||
|
batch_state_.draw_count, batch_state_.command_stride,
|
||||||
|
vertex_buffer_count);
|
||||||
|
} else {
|
||||||
|
glMultiDrawArraysIndirectBindlessNV(
|
||||||
|
prim_type, indirect_offset, batch_state_.draw_count,
|
||||||
|
batch_state_.command_stride, vertex_buffer_count);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (batch_state_.indexed) {
|
||||||
|
glMultiDrawElementsIndirect(prim_type, batch_state_.index_type,
|
||||||
|
indirect_offset, batch_state_.draw_count,
|
||||||
|
batch_state_.command_stride);
|
||||||
|
} else {
|
||||||
|
glMultiDrawArraysIndirect(prim_type, indirect_offset,
|
||||||
|
batch_state_.draw_count,
|
||||||
|
batch_state_.command_stride);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
batch_state_.command_range_start = UINTPTR_MAX;
|
||||||
|
batch_state_.command_range_length = 0;
|
||||||
|
batch_state_.state_range_start = UINTPTR_MAX;
|
||||||
|
batch_state_.state_range_length = 0;
|
||||||
|
batch_state_.draw_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mode == FlushMode::kReconfigure) {
|
||||||
|
// Reset - we'll update it as soon as we have all the information.
|
||||||
|
batch_state_.needs_reconfigure = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawBatcher::CopyConstants() {
|
||||||
|
// TODO(benvanik): partial updates, etc. We could use shader constant access
|
||||||
|
// knowledge that we get at compile time to only upload those constants
|
||||||
|
// required. If we did this as a variable length then we could really cut
|
||||||
|
// down on state block sizes.
|
||||||
|
|
||||||
|
std::memcpy(active_draw_.header->float_consts,
|
||||||
|
®ister_file_->values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32,
|
||||||
|
sizeof(active_draw_.header->float_consts));
|
||||||
|
std::memcpy(
|
||||||
|
active_draw_.header->bool_consts,
|
||||||
|
®ister_file_->values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].f32,
|
||||||
|
sizeof(active_draw_.header->bool_consts));
|
||||||
|
std::memcpy(active_draw_.header->loop_consts,
|
||||||
|
®ister_file_->values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].f32,
|
||||||
|
sizeof(active_draw_.header->loop_consts));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gl4
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
|
@ -0,0 +1,230 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XENIA_GPU_GL4_GL4_STATE_DATA_BUILDER_H_
|
||||||
|
#define XENIA_GPU_GL4_GL4_STATE_DATA_BUILDER_H_
|
||||||
|
|
||||||
|
#include <xenia/common.h>
|
||||||
|
#include <xenia/gpu/gl4/circular_buffer.h>
|
||||||
|
#include <xenia/gpu/gl4/gl_context.h>
|
||||||
|
#include <xenia/gpu/gl4/gl4_shader.h>
|
||||||
|
#include <xenia/gpu/register_file.h>
|
||||||
|
#include <xenia/gpu/xenos.h>
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace gl4 {
|
||||||
|
|
||||||
|
union float4 {
|
||||||
|
float v[4];
|
||||||
|
struct {
|
||||||
|
float x, y, z, w;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#pragma pack(push, 4)
|
||||||
|
struct DrawArraysIndirectCommand {
|
||||||
|
GLuint count;
|
||||||
|
GLuint instance_count;
|
||||||
|
GLuint first_index;
|
||||||
|
GLuint base_instance;
|
||||||
|
};
|
||||||
|
struct DrawElementsIndirectCommand {
|
||||||
|
GLuint count;
|
||||||
|
GLuint instance_count;
|
||||||
|
GLuint first_index;
|
||||||
|
GLint base_vertex;
|
||||||
|
GLuint base_instance;
|
||||||
|
};
|
||||||
|
struct BindlessPtrNV {
|
||||||
|
GLuint index;
|
||||||
|
GLuint reserved_zero;
|
||||||
|
GLuint64 address;
|
||||||
|
GLuint64 length;
|
||||||
|
};
|
||||||
|
struct DrawArraysIndirectBindlessCommandNV {
|
||||||
|
DrawArraysIndirectCommand cmd;
|
||||||
|
// NOTE: the spec is wrong here. For fucks sake.
|
||||||
|
// GLuint reserved_zero;
|
||||||
|
BindlessPtrNV vertex_buffers[8];
|
||||||
|
};
|
||||||
|
struct DrawElementsIndirectBindlessCommandNV {
|
||||||
|
DrawElementsIndirectCommand cmd;
|
||||||
|
GLuint reserved_zero;
|
||||||
|
BindlessPtrNV index_buffer;
|
||||||
|
BindlessPtrNV vertex_buffers[8];
|
||||||
|
};
|
||||||
|
#pragma pack(pop)
|
||||||
|
|
||||||
|
class DrawBatcher {
|
||||||
|
public:
|
||||||
|
enum class FlushMode {
|
||||||
|
kMakeCoherent,
|
||||||
|
kStateChange,
|
||||||
|
kReconfigure,
|
||||||
|
};
|
||||||
|
|
||||||
|
DrawBatcher(RegisterFile* register_file);
|
||||||
|
|
||||||
|
bool Initialize(CircularBuffer* array_data_buffer);
|
||||||
|
void Shutdown();
|
||||||
|
|
||||||
|
PrimitiveType prim_type() const { return batch_state_.prim_type; }
|
||||||
|
|
||||||
|
void set_window_offset(uint32_t x, uint32_t y) {
|
||||||
|
active_draw_.header->window_offset.x = float(x);
|
||||||
|
active_draw_.header->window_offset.y = float(y);
|
||||||
|
}
|
||||||
|
void set_window_scissor(uint32_t left, uint32_t top, uint32_t right,
|
||||||
|
uint32_t bottom) {
|
||||||
|
active_draw_.header->window_scissor.x = float(left);
|
||||||
|
active_draw_.header->window_scissor.y = float(top);
|
||||||
|
active_draw_.header->window_scissor.z = float(right);
|
||||||
|
active_draw_.header->window_scissor.w = float(bottom);
|
||||||
|
}
|
||||||
|
void set_window_scalar(float width_scalar, float height_scalar) {
|
||||||
|
active_draw_.header->window_offset.z = width_scalar;
|
||||||
|
active_draw_.header->window_offset.w = height_scalar;
|
||||||
|
}
|
||||||
|
void set_viewport_offset(float offset_x, float offset_y, float offset_z) {
|
||||||
|
active_draw_.header->viewport_offset.x = offset_x;
|
||||||
|
active_draw_.header->viewport_offset.y = offset_y;
|
||||||
|
active_draw_.header->viewport_offset.z = offset_z;
|
||||||
|
}
|
||||||
|
void set_viewport_scale(float scale_x, float scale_y, float scale_z) {
|
||||||
|
active_draw_.header->viewport_scale.x = scale_x;
|
||||||
|
active_draw_.header->viewport_scale.y = scale_y;
|
||||||
|
active_draw_.header->viewport_scale.z = scale_z;
|
||||||
|
}
|
||||||
|
void set_vtx_fmt(float xy, float z, float w) {
|
||||||
|
active_draw_.header->vtx_fmt.x = xy;
|
||||||
|
active_draw_.header->vtx_fmt.y = xy;
|
||||||
|
active_draw_.header->vtx_fmt.z = z;
|
||||||
|
active_draw_.header->vtx_fmt.w = w;
|
||||||
|
}
|
||||||
|
void set_alpha_test(bool enabled, uint32_t func, float ref) {
|
||||||
|
active_draw_.header->alpha_test.x = enabled ? 1.0f : 0.0f;
|
||||||
|
active_draw_.header->alpha_test.y = float(func);
|
||||||
|
active_draw_.header->alpha_test.z = ref;
|
||||||
|
}
|
||||||
|
void set_texture_sampler(int index, GLuint64 handle) {
|
||||||
|
active_draw_.header->texture_samplers[index] = handle;
|
||||||
|
}
|
||||||
|
void set_index_buffer(const CircularBuffer::Allocation& allocation) {
|
||||||
|
if (has_bindless_mdi_) {
|
||||||
|
auto& ptr = active_draw_.draw_elements_bindless_cmd->index_buffer;
|
||||||
|
ptr.reserved_zero = 0;
|
||||||
|
ptr.index = 0;
|
||||||
|
ptr.address = allocation.gpu_ptr;
|
||||||
|
ptr.length = allocation.length;
|
||||||
|
} else {
|
||||||
|
// Offset is used in glDrawElements.
|
||||||
|
auto& cmd = active_draw_.draw_elements_cmd;
|
||||||
|
size_t index_size = batch_state_.index_type == GL_UNSIGNED_SHORT ? 2 : 4;
|
||||||
|
cmd->first_index = GLuint(allocation.offset / index_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void set_vertex_buffer(int index, GLsizei offset, GLsizei stride,
|
||||||
|
const CircularBuffer::Allocation& allocation) {
|
||||||
|
if (has_bindless_mdi_) {
|
||||||
|
BindlessPtrNV* ptr;
|
||||||
|
if (batch_state_.indexed) {
|
||||||
|
ptr = &active_draw_.draw_elements_bindless_cmd->vertex_buffers[index];
|
||||||
|
} else {
|
||||||
|
ptr = &active_draw_.draw_arrays_bindless_cmd->vertex_buffers[index];
|
||||||
|
}
|
||||||
|
ptr->reserved_zero = 0;
|
||||||
|
ptr->index = index;
|
||||||
|
ptr->address = allocation.gpu_ptr + offset;
|
||||||
|
ptr->length = allocation.length - offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ReconfigurePipeline(GL4Shader* vertex_shader, GL4Shader* pixel_shader,
|
||||||
|
GLuint pipeline);
|
||||||
|
|
||||||
|
bool BeginDrawArrays(PrimitiveType prim_type, uint32_t index_count);
|
||||||
|
bool BeginDrawElements(PrimitiveType prim_type, uint32_t index_count,
|
||||||
|
xenos::IndexFormat index_format);
|
||||||
|
void DiscardDraw();
|
||||||
|
bool CommitDraw();
|
||||||
|
bool Flush(FlushMode mode);
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool BeginDraw();
|
||||||
|
void CopyConstants();
|
||||||
|
|
||||||
|
RegisterFile* register_file_;
|
||||||
|
CircularBuffer command_buffer_;
|
||||||
|
CircularBuffer state_buffer_;
|
||||||
|
CircularBuffer* array_data_buffer_;
|
||||||
|
|
||||||
|
bool has_bindless_mdi_;
|
||||||
|
|
||||||
|
struct BatchState {
|
||||||
|
bool needs_reconfigure;
|
||||||
|
PrimitiveType prim_type;
|
||||||
|
bool indexed;
|
||||||
|
GLenum index_type;
|
||||||
|
|
||||||
|
GL4Shader* vertex_shader;
|
||||||
|
GL4Shader* pixel_shader;
|
||||||
|
GLuint pipeline;
|
||||||
|
|
||||||
|
GLsizei command_stride;
|
||||||
|
GLsizei state_stride;
|
||||||
|
GLsizei float_consts_offset;
|
||||||
|
GLsizei bool_consts_offset;
|
||||||
|
GLsizei loop_consts_offset;
|
||||||
|
|
||||||
|
uintptr_t command_range_start;
|
||||||
|
uintptr_t command_range_length;
|
||||||
|
uintptr_t state_range_start;
|
||||||
|
uintptr_t state_range_length;
|
||||||
|
GLsizei draw_count;
|
||||||
|
} batch_state_;
|
||||||
|
|
||||||
|
// This must match GL4Shader's header.
|
||||||
|
struct CommonHeader {
|
||||||
|
float4 window_offset; // tx,ty,sx,sy
|
||||||
|
float4 window_scissor; // x0,y0,x1,y1
|
||||||
|
float4 viewport_offset; // tx,ty,tz,?
|
||||||
|
float4 viewport_scale; // sx,sy,sz,?
|
||||||
|
float4 vtx_fmt; //
|
||||||
|
float4 alpha_test; // alpha test enable, func, ref, ?
|
||||||
|
|
||||||
|
// TODO(benvanik): pack tightly
|
||||||
|
GLuint64 texture_samplers[32];
|
||||||
|
|
||||||
|
float4 float_consts[512];
|
||||||
|
uint32_t bool_consts[8];
|
||||||
|
uint32_t loop_consts[32];
|
||||||
|
};
|
||||||
|
struct {
|
||||||
|
CircularBuffer::Allocation command_allocation;
|
||||||
|
CircularBuffer::Allocation state_allocation;
|
||||||
|
|
||||||
|
union {
|
||||||
|
DrawArraysIndirectCommand* draw_arrays_cmd;
|
||||||
|
DrawElementsIndirectCommand* draw_elements_cmd;
|
||||||
|
DrawArraysIndirectBindlessCommandNV* draw_arrays_bindless_cmd;
|
||||||
|
DrawElementsIndirectBindlessCommandNV* draw_elements_bindless_cmd;
|
||||||
|
uintptr_t command_address;
|
||||||
|
};
|
||||||
|
|
||||||
|
CommonHeader* header;
|
||||||
|
} active_draw_;
|
||||||
|
bool draw_open_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace gl4
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
||||||
|
|
||||||
|
#endif // XENIA_GPU_GL4_GL4_STATE_DATA_BUILDER_H_
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include <poly/cxx_compat.h>
|
#include <poly/cxx_compat.h>
|
||||||
#include <poly/math.h>
|
#include <poly/math.h>
|
||||||
|
#include <xenia/gpu/gl4/gl4_gpu-private.h>
|
||||||
#include <xenia/gpu/gl4/gl4_shader_translator.h>
|
#include <xenia/gpu/gl4/gl4_shader_translator.h>
|
||||||
#include <xenia/gpu/gpu-private.h>
|
#include <xenia/gpu/gpu-private.h>
|
||||||
|
|
||||||
|
@ -18,6 +19,8 @@ namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace gl4 {
|
namespace gl4 {
|
||||||
|
|
||||||
|
using namespace xe::gpu::xenos;
|
||||||
|
|
||||||
extern "C" GLEWContext* glewGetContext();
|
extern "C" GLEWContext* glewGetContext();
|
||||||
|
|
||||||
// Stateful, but minimally.
|
// Stateful, but minimally.
|
||||||
|
@ -25,41 +28,147 @@ thread_local GL4ShaderTranslator shader_translator_;
|
||||||
|
|
||||||
GL4Shader::GL4Shader(ShaderType shader_type, uint64_t data_hash,
|
GL4Shader::GL4Shader(ShaderType shader_type, uint64_t data_hash,
|
||||||
const uint32_t* dword_ptr, uint32_t dword_count)
|
const uint32_t* dword_ptr, uint32_t dword_count)
|
||||||
: Shader(shader_type, data_hash, dword_ptr, dword_count), program_(0) {}
|
: Shader(shader_type, data_hash, dword_ptr, dword_count),
|
||||||
|
program_(0),
|
||||||
|
vao_(0) {}
|
||||||
|
|
||||||
GL4Shader::~GL4Shader() { glDeleteProgram(program_); }
|
GL4Shader::~GL4Shader() {
|
||||||
|
glDeleteProgram(program_);
|
||||||
|
glDeleteVertexArrays(1, &vao_);
|
||||||
|
}
|
||||||
|
|
||||||
const std::string header =
|
std::string GL4Shader::GetHeader() {
|
||||||
"#version 450\n"
|
static const std::string header =
|
||||||
"#extension all : warn\n"
|
"#version 450\n"
|
||||||
"#extension GL_ARB_bindless_texture : require\n"
|
"#extension all : warn\n"
|
||||||
"#extension GL_ARB_explicit_uniform_location : require\n"
|
"#extension GL_ARB_bindless_texture : require\n"
|
||||||
"#extension GL_ARB_shading_language_420pack : require\n"
|
"#extension GL_ARB_explicit_uniform_location : require\n"
|
||||||
"#extension GL_ARB_shader_storage_buffer_object : require\n"
|
"#extension GL_ARB_shader_draw_parameters : require\n"
|
||||||
"precision highp float;\n"
|
"#extension GL_ARB_shader_storage_buffer_object : require\n"
|
||||||
"precision highp int;\n"
|
"#extension GL_ARB_shading_language_420pack : require\n"
|
||||||
"layout(std140, column_major) uniform;\n"
|
"precision highp float;\n"
|
||||||
"layout(std430, column_major) buffer;\n"
|
"precision highp int;\n"
|
||||||
"struct StateData {\n"
|
"layout(std140, column_major) uniform;\n"
|
||||||
" vec4 window_offset;\n"
|
"layout(std430, column_major) buffer;\n"
|
||||||
" vec4 window_scissor;\n"
|
"\n"
|
||||||
" vec4 vtx_fmt;\n"
|
// This must match DrawBatcher::CommonHeader.
|
||||||
" vec4 viewport_offset;\n"
|
"struct StateData {\n"
|
||||||
" vec4 viewport_scale;\n"
|
" vec4 window_offset;\n"
|
||||||
" vec4 alpha_test;\n"
|
" vec4 window_scissor;\n"
|
||||||
" uvec2 texture_samplers[32];\n"
|
" vec4 viewport_offset;\n"
|
||||||
" vec4 float_consts[512];\n"
|
" vec4 viewport_scale;\n"
|
||||||
" uint fetch_consts[32 * 6];\n"
|
" vec4 vtx_fmt;\n"
|
||||||
" int bool_consts[8];\n"
|
" vec4 alpha_test;\n"
|
||||||
" int loop_consts[32];\n"
|
// TODO(benvanik): variable length.
|
||||||
"};\n"
|
" uvec2 texture_samplers[32];\n"
|
||||||
"struct VertexData {\n"
|
" vec4 float_consts[512];\n"
|
||||||
" vec4 o[16];\n"
|
" int bool_consts[8];\n"
|
||||||
"};\n"
|
" int loop_consts[32];\n"
|
||||||
"\n"
|
"};\n"
|
||||||
"layout(binding = 0) buffer State {\n"
|
"layout(binding = 0) buffer State {\n"
|
||||||
" StateData state;\n"
|
" StateData states[];\n"
|
||||||
"};\n";
|
"};\n"
|
||||||
|
"\n"
|
||||||
|
"struct VertexData {\n"
|
||||||
|
" vec4 o[16];\n"
|
||||||
|
"};\n";
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GL4Shader::PrepareVertexArrayObject() {
|
||||||
|
glCreateVertexArrays(1, &vao_);
|
||||||
|
|
||||||
|
bool has_bindless_vbos = false;
|
||||||
|
if (FLAGS_vendor_gl_extensions && GLEW_NV_vertex_buffer_unified_memory) {
|
||||||
|
has_bindless_vbos = true;
|
||||||
|
// Nasty, but no DSA for this.
|
||||||
|
glBindVertexArray(vao_);
|
||||||
|
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||||
|
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t el_index = 0;
|
||||||
|
for (uint32_t buffer_index = 0; buffer_index < buffer_inputs_.count;
|
||||||
|
++buffer_index) {
|
||||||
|
const auto& desc = buffer_inputs_.descs[buffer_index];
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < desc.element_count; ++i, ++el_index) {
|
||||||
|
const auto& el = desc.elements[i];
|
||||||
|
auto comp_count = GetVertexFormatComponentCount(el.format);
|
||||||
|
GLenum comp_type;
|
||||||
|
switch (el.format) {
|
||||||
|
case VertexFormat::k_8_8_8_8:
|
||||||
|
comp_type = el.is_signed ? GL_BYTE : GL_UNSIGNED_BYTE;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_2_10_10_10:
|
||||||
|
comp_type = el.is_signed ? GL_INT_2_10_10_10_REV
|
||||||
|
: GL_UNSIGNED_INT_2_10_10_10_REV;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_10_11_11:
|
||||||
|
assert_false(el.is_signed);
|
||||||
|
comp_type = GL_UNSIGNED_INT_10F_11F_11F_REV;
|
||||||
|
break;
|
||||||
|
/*case VertexFormat::k_11_11_10:
|
||||||
|
break;*/
|
||||||
|
case VertexFormat::k_16_16:
|
||||||
|
comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_16_16_FLOAT:
|
||||||
|
comp_type = GL_HALF_FLOAT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_16_16_16_16:
|
||||||
|
comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_16_16_16_16_FLOAT:
|
||||||
|
comp_type = GL_HALF_FLOAT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32:
|
||||||
|
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32_32:
|
||||||
|
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32_32_32_32:
|
||||||
|
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32_FLOAT:
|
||||||
|
comp_type = GL_FLOAT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32_32_FLOAT:
|
||||||
|
comp_type = GL_FLOAT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32_32_32_FLOAT:
|
||||||
|
comp_type = GL_FLOAT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32_32_32_32_FLOAT:
|
||||||
|
comp_type = GL_FLOAT;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert_unhandled_case(el.format);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
glEnableVertexArrayAttrib(vao_, el_index);
|
||||||
|
if (has_bindless_vbos) {
|
||||||
|
// NOTE: MultiDrawIndirectBindlessMumble doesn't handle separate
|
||||||
|
// vertex bindings/formats.
|
||||||
|
glVertexAttribFormat(el_index, comp_count, comp_type, el.is_normalized,
|
||||||
|
el.offset_words * 4);
|
||||||
|
glVertexArrayVertexBuffer(vao_, el_index, 0, 0, desc.stride_words * 4);
|
||||||
|
} else {
|
||||||
|
glVertexArrayAttribBinding(vao_, el_index, buffer_index);
|
||||||
|
glVertexArrayAttribFormat(vao_, el_index, comp_count, comp_type,
|
||||||
|
el.is_normalized, el.offset_words * 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_bindless_vbos) {
|
||||||
|
glBindVertexArray(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool GL4Shader::PrepareVertexShader(
|
bool GL4Shader::PrepareVertexShader(
|
||||||
const xenos::xe_gpu_program_cntl_t& program_cntl) {
|
const xenos::xe_gpu_program_cntl_t& program_cntl) {
|
||||||
|
@ -68,8 +177,14 @@ bool GL4Shader::PrepareVertexShader(
|
||||||
}
|
}
|
||||||
has_prepared_ = true;
|
has_prepared_ = true;
|
||||||
|
|
||||||
|
// Build static vertex array descriptor.
|
||||||
|
if (!PrepareVertexArrayObject()) {
|
||||||
|
PLOGE("Unable to prepare vertex shader array object");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
std::string apply_transform =
|
std::string apply_transform =
|
||||||
"vec4 applyTransform(vec4 pos) {\n"
|
"vec4 applyTransform(const in StateData state, vec4 pos) {\n"
|
||||||
" // Clip->NDC with perspective divide.\n"
|
" // Clip->NDC with perspective divide.\n"
|
||||||
" // We do this here because it's programmable on the 360.\n"
|
" // We do this here because it's programmable on the 360.\n"
|
||||||
" float w = pos.w;\n"
|
" float w = pos.w;\n"
|
||||||
|
@ -107,14 +222,15 @@ bool GL4Shader::PrepareVertexShader(
|
||||||
" return pos;\n"
|
" return pos;\n"
|
||||||
"}\n";
|
"}\n";
|
||||||
std::string source =
|
std::string source =
|
||||||
header + apply_transform +
|
GetHeader() + apply_transform +
|
||||||
"out gl_PerVertex {\n"
|
"out gl_PerVertex {\n"
|
||||||
" vec4 gl_Position;\n"
|
" vec4 gl_Position;\n"
|
||||||
" float gl_PointSize;\n"
|
" float gl_PointSize;\n"
|
||||||
" float gl_ClipDistance[];\n"
|
" float gl_ClipDistance[];\n"
|
||||||
"};\n"
|
"};\n"
|
||||||
"layout(location = 0) out VertexData vtx;\n"
|
"layout(location = 0) flat out uint draw_id;\n"
|
||||||
"void processVertex();\n"
|
"layout(location = 1) out VertexData vtx;\n"
|
||||||
|
"void processVertex(const in StateData state);\n"
|
||||||
"void main() {\n" +
|
"void main() {\n" +
|
||||||
(alloc_counts().positions ? " gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
|
(alloc_counts().positions ? " gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
|
||||||
: "") +
|
: "") +
|
||||||
|
@ -122,8 +238,10 @@ bool GL4Shader::PrepareVertexShader(
|
||||||
" for (int i = 0; i < vtx.o.length(); ++i) {\n"
|
" for (int i = 0; i < vtx.o.length(); ++i) {\n"
|
||||||
" vtx.o[i] = vec4(0.0, 0.0, 0.0, 0.0);\n"
|
" vtx.o[i] = vec4(0.0, 0.0, 0.0, 0.0);\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
" processVertex();\n"
|
" const StateData state = states[gl_DrawIDARB];\n"
|
||||||
" gl_Position = applyTransform(gl_Position);\n"
|
" processVertex(state);\n"
|
||||||
|
" gl_Position = applyTransform(state, gl_Position);\n"
|
||||||
|
" draw_id = gl_DrawIDARB;\n"
|
||||||
"}\n";
|
"}\n";
|
||||||
|
|
||||||
std::string translated_source =
|
std::string translated_source =
|
||||||
|
@ -149,12 +267,14 @@ bool GL4Shader::PreparePixelShader(
|
||||||
}
|
}
|
||||||
has_prepared_ = true;
|
has_prepared_ = true;
|
||||||
|
|
||||||
std::string source = header +
|
std::string source = GetHeader() +
|
||||||
"layout(location = 0) in VertexData vtx;\n"
|
"layout(location = 0) flat in uint draw_id;\n"
|
||||||
|
"layout(location = 1) in VertexData vtx;\n"
|
||||||
"layout(location = 0) out vec4 oC[4];\n"
|
"layout(location = 0) out vec4 oC[4];\n"
|
||||||
"void processFragment();\n"
|
"void processFragment(const in StateData state);\n"
|
||||||
"void main() {\n" +
|
"void main() {\n" +
|
||||||
" processFragment();\n"
|
" const StateData state = states[draw_id];\n"
|
||||||
|
" processFragment(state);\n"
|
||||||
"}\n";
|
"}\n";
|
||||||
|
|
||||||
std::string translated_source =
|
std::string translated_source =
|
||||||
|
|
|
@ -10,6 +10,8 @@
|
||||||
#ifndef XENIA_GPU_GL4_GL4_SHADER_H_
|
#ifndef XENIA_GPU_GL4_GL4_SHADER_H_
|
||||||
#define XENIA_GPU_GL4_GL4_SHADER_H_
|
#define XENIA_GPU_GL4_GL4_SHADER_H_
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
#include <xenia/common.h>
|
#include <xenia/common.h>
|
||||||
#include <xenia/gpu/gl4/gl_context.h>
|
#include <xenia/gpu/gl4/gl_context.h>
|
||||||
#include <xenia/gpu/shader.h>
|
#include <xenia/gpu/shader.h>
|
||||||
|
@ -25,14 +27,18 @@ class GL4Shader : public Shader {
|
||||||
~GL4Shader() override;
|
~GL4Shader() override;
|
||||||
|
|
||||||
GLuint program() const { return program_; }
|
GLuint program() const { return program_; }
|
||||||
|
GLuint vao() const { return vao_; }
|
||||||
|
|
||||||
bool PrepareVertexShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
|
bool PrepareVertexShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
|
||||||
bool PreparePixelShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
|
bool PreparePixelShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
std::string GetHeader();
|
||||||
|
bool PrepareVertexArrayObject();
|
||||||
bool CompileProgram(std::string source);
|
bool CompileProgram(std::string source);
|
||||||
|
|
||||||
GLuint program_;
|
GLuint program_;
|
||||||
|
GLuint vao_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace gl4
|
} // namespace gl4
|
||||||
|
|
|
@ -91,7 +91,7 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
|
||||||
const auto& alloc_counts = vertex_shader->alloc_counts();
|
const auto& alloc_counts = vertex_shader->alloc_counts();
|
||||||
|
|
||||||
// Vertex shader main() header.
|
// Vertex shader main() header.
|
||||||
Append("void processVertex() {\n");
|
Append("void processVertex(const in StateData state) {\n");
|
||||||
|
|
||||||
// Add temporaries for any registers we may use.
|
// Add temporaries for any registers we may use.
|
||||||
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
|
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
|
||||||
|
@ -126,7 +126,7 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
|
||||||
// (and less than the number of required registers), things may die.
|
// (and less than the number of required registers), things may die.
|
||||||
|
|
||||||
// Pixel shader main() header.
|
// Pixel shader main() header.
|
||||||
Append("void processFragment() {\n");
|
Append("void processFragment(const in StateData state) {\n");
|
||||||
|
|
||||||
// Add temporary registers.
|
// Add temporary registers.
|
||||||
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
|
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
|
||||||
|
|
|
@ -132,12 +132,15 @@ std::unique_ptr<GLContext> GLContext::CreateShared() {
|
||||||
GLContextLock context_lock(this);
|
GLContextLock context_lock(this);
|
||||||
|
|
||||||
int context_flags = 0;
|
int context_flags = 0;
|
||||||
|
//int profile = WGL_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB;
|
||||||
|
int profile = WGL_CONTEXT_CORE_PROFILE_BIT_ARB;
|
||||||
#if DEBUG
|
#if DEBUG
|
||||||
context_flags |= WGL_CONTEXT_DEBUG_BIT_ARB;
|
context_flags |= WGL_CONTEXT_DEBUG_BIT_ARB;
|
||||||
#endif // DEBUG
|
#endif // DEBUG
|
||||||
int attrib_list[] = {WGL_CONTEXT_MAJOR_VERSION_ARB, 4, //
|
int attrib_list[] = {WGL_CONTEXT_MAJOR_VERSION_ARB, 4, //
|
||||||
WGL_CONTEXT_MINOR_VERSION_ARB, 5, //
|
WGL_CONTEXT_MINOR_VERSION_ARB, 5, //
|
||||||
WGL_CONTEXT_FLAGS_ARB, context_flags, //
|
WGL_CONTEXT_FLAGS_ARB, context_flags, //
|
||||||
|
WGL_CONTEXT_PROFILE_MASK_ARB, profile, //
|
||||||
0};
|
0};
|
||||||
new_glrc = wglCreateContextAttribsARB(dc_, glrc_, attrib_list);
|
new_glrc = wglCreateContextAttribsARB(dc_, glrc_, attrib_list);
|
||||||
if (!new_glrc) {
|
if (!new_glrc) {
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
'circular_buffer.h',
|
'circular_buffer.h',
|
||||||
'command_processor.cc',
|
'command_processor.cc',
|
||||||
'command_processor.h',
|
'command_processor.h',
|
||||||
|
'draw_batcher.cc',
|
||||||
|
'draw_batcher.h',
|
||||||
'gl4_gpu-private.h',
|
'gl4_gpu-private.h',
|
||||||
'gl4_gpu.cc',
|
'gl4_gpu.cc',
|
||||||
'gl4_gpu.h',
|
'gl4_gpu.h',
|
||||||
|
|
Loading…
Reference in New Issue