MultiDrawIndirect draw batching - now down to <20us per draw.
This commit is contained in:
parent
eda38a7428
commit
4fcf9c6229
|
@ -35,6 +35,8 @@ class CircularBuffer {
|
|||
void Shutdown();
|
||||
|
||||
GLuint handle() const { return buffer_; }
|
||||
GLuint64 gpu_handle() const { return gpu_base_; }
|
||||
size_t capacity() const { return capacity_; }
|
||||
|
||||
bool CanAcquire(size_t length);
|
||||
Allocation Acquire(size_t length);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -18,6 +18,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include <xenia/gpu/gl4/circular_buffer.h>
|
||||
#include <xenia/gpu/gl4/draw_batcher.h>
|
||||
#include <xenia/gpu/gl4/gl_context.h>
|
||||
#include <xenia/gpu/gl4/gl4_shader.h>
|
||||
#include <xenia/gpu/gl4/texture_cache.h>
|
||||
|
@ -41,73 +42,6 @@ struct SwapParameters {
|
|||
GLenum attachment;
|
||||
};
|
||||
|
||||
// This must match the layout in gl4_shader.cc.
|
||||
struct UniformDataBlock {
|
||||
union float4 {
|
||||
float v[4];
|
||||
struct {
|
||||
float x, y, z, w;
|
||||
};
|
||||
};
|
||||
|
||||
float4 window_offset; // tx,ty,sx,sy
|
||||
float4 window_scissor; // x0,y0,x1,y1
|
||||
float4 vtx_fmt;
|
||||
float4 viewport_offset; // tx,ty,tz,?
|
||||
float4 viewport_scale; // sx,sy,sz,?
|
||||
// TODO(benvanik): vertex format xyzw?
|
||||
|
||||
float4 alpha_test; // alpha test enable, func, ref, ?
|
||||
|
||||
// TODO(benvanik): pack tightly
|
||||
uint64_t texture_samplers[32];
|
||||
|
||||
// Register data from 0x4000 to 0x4927.
|
||||
// UpdateConstants relies on the packing of these.
|
||||
struct {
|
||||
// SHADER_CONSTANT_000_X...
|
||||
float4 float_consts[512];
|
||||
// SHADER_CONSTANT_FETCH_00_0 is omitted
|
||||
// SHADER_CONSTANT_BOOL_000_031...
|
||||
int32_t bool_consts[8];
|
||||
// SHADER_CONSTANT_LOOP_00...
|
||||
int32_t loop_consts[32];
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(UniformDataBlock) <= 16 * 1024, "Need <=16k uniform data");
|
||||
|
||||
// TODO(benvanik): move more of the enums in here?
|
||||
struct DrawCommand {
|
||||
PrimitiveType prim_type;
|
||||
uint32_t start_index;
|
||||
uint32_t min_index;
|
||||
uint32_t max_index;
|
||||
uint32_t index_count;
|
||||
uint32_t base_vertex;
|
||||
|
||||
// Index buffer, if present.
|
||||
// If index_count > 0 but buffer is nullptr then auto draw.
|
||||
struct {
|
||||
const uint8_t* address;
|
||||
size_t size;
|
||||
xenos::Endian endianness;
|
||||
xenos::IndexFormat format;
|
||||
size_t buffer_offset;
|
||||
} index_buffer;
|
||||
|
||||
// Texture samplers.
|
||||
struct SamplerInput {
|
||||
uint32_t input_index;
|
||||
// TextureResource* texture;
|
||||
// SamplerStateResource* sampler_state;
|
||||
};
|
||||
SamplerInput vertex_shader_samplers[32];
|
||||
SamplerInput pixel_shader_samplers[32];
|
||||
|
||||
// NOTE: do not read from this - the mapped memory is likely write combined.
|
||||
UniformDataBlock* state_data;
|
||||
};
|
||||
|
||||
class CommandProcessor {
|
||||
public:
|
||||
CommandProcessor(GL4GraphicsSystem* graphics_system);
|
||||
|
@ -241,22 +175,19 @@ class CommandProcessor {
|
|||
bool LoadShader(ShaderType shader_type, const uint32_t* address,
|
||||
uint32_t dword_count);
|
||||
|
||||
void PrepareDraw(DrawCommand* draw_command);
|
||||
bool IssueDraw(DrawCommand* draw_command);
|
||||
UpdateStatus UpdateRenderTargets(DrawCommand* draw_command);
|
||||
UpdateStatus UpdateState(DrawCommand* draw_command);
|
||||
UpdateStatus UpdateViewportState(DrawCommand* draw_command);
|
||||
UpdateStatus UpdateRasterizerState(DrawCommand* draw_command);
|
||||
UpdateStatus UpdateBlendState(DrawCommand* draw_command);
|
||||
UpdateStatus UpdateDepthStencilState(DrawCommand* draw_command);
|
||||
UpdateStatus UpdateConstants(DrawCommand* draw_command);
|
||||
UpdateStatus UpdateShaders(DrawCommand* draw_command);
|
||||
UpdateStatus PopulateIndexBuffer(DrawCommand* draw_command);
|
||||
UpdateStatus PopulateVertexBuffers(DrawCommand* draw_command);
|
||||
UpdateStatus PopulateSamplers(DrawCommand* draw_command);
|
||||
UpdateStatus PopulateSampler(DrawCommand* draw_command,
|
||||
const Shader::SamplerDesc& desc);
|
||||
bool IssueCopy(DrawCommand* draw_command);
|
||||
bool IssueDraw();
|
||||
UpdateStatus UpdateShaders(PrimitiveType prim_type);
|
||||
UpdateStatus UpdateRenderTargets();
|
||||
UpdateStatus UpdateState();
|
||||
UpdateStatus UpdateViewportState();
|
||||
UpdateStatus UpdateRasterizerState();
|
||||
UpdateStatus UpdateBlendState();
|
||||
UpdateStatus UpdateDepthStencilState();
|
||||
UpdateStatus PopulateIndexBuffer();
|
||||
UpdateStatus PopulateVertexBuffers();
|
||||
UpdateStatus PopulateSamplers();
|
||||
UpdateStatus PopulateSampler(const Shader::SamplerDesc& desc);
|
||||
bool IssueCopy();
|
||||
|
||||
CachedFramebuffer* GetFramebuffer(GLuint color_targets[4],
|
||||
GLuint depth_target);
|
||||
|
@ -306,21 +237,23 @@ class CommandProcessor {
|
|||
std::vector<CachedDepthRenderTarget> cached_depth_render_targets_;
|
||||
std::vector<std::unique_ptr<CachedPipeline>> all_pipelines_;
|
||||
std::unordered_map<uint64_t, CachedPipeline*> cached_pipelines_;
|
||||
GLuint vertex_array_;
|
||||
GLuint point_list_geometry_program_;
|
||||
GLuint rect_list_geometry_program_;
|
||||
GLuint quad_list_geometry_program_;
|
||||
struct {
|
||||
xenos::IndexFormat format;
|
||||
xenos::Endian endianness;
|
||||
uint32_t count;
|
||||
uint32_t guest_base;
|
||||
size_t length;
|
||||
uint32_t max_index_found;
|
||||
} index_buffer_info_;
|
||||
uint32_t draw_index_count_;
|
||||
|
||||
TextureCache texture_cache_;
|
||||
|
||||
DrawBatcher draw_batcher_;
|
||||
CircularBuffer scratch_buffer_;
|
||||
struct ScratchBufferStats {
|
||||
size_t total_state_data_size = 0;
|
||||
size_t total_indices_size = 0;
|
||||
size_t total_vertices_size = 0;
|
||||
} scratch_buffer_stats_;
|
||||
|
||||
DrawCommand draw_command_;
|
||||
|
||||
private:
|
||||
bool SetShadowRegister(uint32_t& dest, uint32_t register_name);
|
||||
|
@ -341,7 +274,6 @@ class CommandProcessor {
|
|||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_render_targets_regs_;
|
||||
struct UpdateViewportStateRegisters {
|
||||
//
|
||||
UpdateViewportStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_viewport_state_regs_;
|
||||
|
@ -367,7 +299,6 @@ class CommandProcessor {
|
|||
UpdateDepthStencilStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_depth_stencil_state_regs_;
|
||||
// TODO(benvanik): constant bitmask?
|
||||
struct UpdateShadersRegisters {
|
||||
PrimitiveType prim_type;
|
||||
uint32_t sq_program_cntl;
|
||||
|
@ -380,9 +311,6 @@ class CommandProcessor {
|
|||
vertex_shader = pixel_shader = nullptr;
|
||||
}
|
||||
} update_shaders_regs_;
|
||||
// ib
|
||||
// vb
|
||||
// samplers
|
||||
};
|
||||
|
||||
} // namespace gl4
|
||||
|
|
|
@ -0,0 +1,384 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/gl4/draw_batcher.h>
|
||||
|
||||
#include <poly/cxx_compat.h>
|
||||
#include <poly/math.h>
|
||||
#include <xenia/gpu/gl4/gl4_gpu-private.h>
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
extern "C" GLEWContext* glewGetContext();
|
||||
|
||||
const size_t kCommandBufferCapacity = 16 * (1024 * 1024);
|
||||
const size_t kCommandBufferAlignment = 4;
|
||||
const size_t kStateBufferCapacity = 64 * (1024 * 1024);
|
||||
const size_t kStateBufferAlignment = 256;
|
||||
|
||||
DrawBatcher::DrawBatcher(RegisterFile* register_file)
|
||||
: register_file_(register_file),
|
||||
command_buffer_(kCommandBufferCapacity, kCommandBufferAlignment),
|
||||
state_buffer_(kStateBufferCapacity, kStateBufferAlignment),
|
||||
array_data_buffer_(nullptr),
|
||||
has_bindless_mdi_(false),
|
||||
draw_open_(false) {
|
||||
std::memset(&batch_state_, 0, sizeof(batch_state_));
|
||||
batch_state_.needs_reconfigure = true;
|
||||
batch_state_.command_range_start = batch_state_.state_range_start =
|
||||
UINTPTR_MAX;
|
||||
std::memset(&active_draw_, 0, sizeof(active_draw_));
|
||||
}
|
||||
|
||||
bool DrawBatcher::Initialize(CircularBuffer* array_data_buffer) {
|
||||
array_data_buffer_ = array_data_buffer;
|
||||
if (!command_buffer_.Initialize()) {
|
||||
return false;
|
||||
}
|
||||
if (!state_buffer_.Initialize()) {
|
||||
return false;
|
||||
}
|
||||
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, command_buffer_.handle());
|
||||
if (FLAGS_vendor_gl_extensions && GLEW_NV_bindless_multi_draw_indirect) {
|
||||
has_bindless_mdi_ = true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void DrawBatcher::Shutdown() {
|
||||
command_buffer_.Shutdown();
|
||||
state_buffer_.Shutdown();
|
||||
}
|
||||
|
||||
bool DrawBatcher::ReconfigurePipeline(GL4Shader* vertex_shader,
|
||||
GL4Shader* pixel_shader,
|
||||
GLuint pipeline) {
|
||||
if (batch_state_.pipeline == pipeline) {
|
||||
// No-op.
|
||||
return true;
|
||||
}
|
||||
if (!Flush(FlushMode::kReconfigure)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
batch_state_.vertex_shader = vertex_shader;
|
||||
batch_state_.pixel_shader = pixel_shader;
|
||||
batch_state_.pipeline = pipeline;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DrawBatcher::BeginDrawArrays(PrimitiveType prim_type,
|
||||
uint32_t index_count) {
|
||||
assert_false(draw_open_);
|
||||
if (batch_state_.prim_type != prim_type || batch_state_.indexed) {
|
||||
if (!Flush(FlushMode::kReconfigure)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
batch_state_.prim_type = prim_type;
|
||||
batch_state_.indexed = false;
|
||||
|
||||
if (!BeginDraw()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto cmd = active_draw_.draw_arrays_cmd;
|
||||
cmd->base_instance = 0;
|
||||
cmd->instance_count = 1;
|
||||
cmd->count = index_count;
|
||||
cmd->first_index = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DrawBatcher::BeginDrawElements(PrimitiveType prim_type,
|
||||
uint32_t index_count,
|
||||
IndexFormat index_format) {
|
||||
assert_false(draw_open_);
|
||||
GLenum index_type =
|
||||
index_format == IndexFormat::kInt32 ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT;
|
||||
if (batch_state_.prim_type != prim_type || !batch_state_.indexed ||
|
||||
batch_state_.index_type != index_type) {
|
||||
if (!Flush(FlushMode::kReconfigure)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
batch_state_.prim_type = prim_type;
|
||||
batch_state_.indexed = true;
|
||||
batch_state_.index_type = index_type;
|
||||
|
||||
if (!BeginDraw()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t start_index = register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32;
|
||||
assert_zero(start_index);
|
||||
|
||||
auto cmd = active_draw_.draw_elements_cmd;
|
||||
cmd->base_instance = 0;
|
||||
cmd->instance_count = 1;
|
||||
cmd->count = index_count;
|
||||
cmd->first_index = start_index;
|
||||
cmd->base_vertex = 0;
|
||||
|
||||
if (has_bindless_mdi_) {
|
||||
auto bindless_cmd = active_draw_.draw_elements_bindless_cmd;
|
||||
bindless_cmd->reserved_zero = 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DrawBatcher::BeginDraw() {
|
||||
draw_open_ = true;
|
||||
|
||||
if (batch_state_.needs_reconfigure) {
|
||||
batch_state_.needs_reconfigure = false;
|
||||
// Have been reconfigured since last draw - need to compute state size.
|
||||
// Layout:
|
||||
// [draw command]
|
||||
// [common header]
|
||||
// [consts]
|
||||
|
||||
// Padded to max.
|
||||
GLsizei command_size = 0;
|
||||
if (has_bindless_mdi_) {
|
||||
if (batch_state_.indexed) {
|
||||
command_size = sizeof(DrawElementsIndirectBindlessCommandNV);
|
||||
} else {
|
||||
command_size = sizeof(DrawArraysIndirectBindlessCommandNV);
|
||||
}
|
||||
} else {
|
||||
if (batch_state_.indexed) {
|
||||
command_size = sizeof(DrawElementsIndirectCommand);
|
||||
} else {
|
||||
command_size = sizeof(DrawArraysIndirectCommand);
|
||||
}
|
||||
}
|
||||
batch_state_.command_stride =
|
||||
poly::round_up(command_size, GLsizei(kCommandBufferAlignment));
|
||||
|
||||
GLsizei header_size = sizeof(CommonHeader);
|
||||
|
||||
// TODO(benvanik); consts sizing.
|
||||
// GLsizei float_consts_size = sizeof(float4) * 512;
|
||||
// GLsizei bool_consts_size = sizeof(uint32_t) * 8;
|
||||
// GLsizei loop_consts_size = sizeof(uint32_t) * 32;
|
||||
// GLsizei consts_size =
|
||||
// float_consts_size + bool_consts_size + loop_consts_size;
|
||||
// batch_state_.float_consts_offset = batch_state_.header_offset +
|
||||
// header_size;
|
||||
// batch_state_.bool_consts_offset =
|
||||
// batch_state_.float_consts_offset + float_consts_size;
|
||||
// batch_state_.loop_consts_offset =
|
||||
// batch_state_.bool_consts_offset + bool_consts_size;
|
||||
GLsizei consts_size = 0;
|
||||
|
||||
batch_state_.state_stride = header_size + consts_size;
|
||||
}
|
||||
|
||||
// Allocate a command data block.
|
||||
// We should treat it as write-only.
|
||||
if (!command_buffer_.CanAcquire(batch_state_.command_stride)) {
|
||||
Flush(FlushMode::kMakeCoherent);
|
||||
}
|
||||
active_draw_.command_allocation =
|
||||
command_buffer_.Acquire(batch_state_.command_stride);
|
||||
assert_not_null(active_draw_.command_allocation.host_ptr);
|
||||
|
||||
// Allocate a state data block.
|
||||
// We should treat it as write-only.
|
||||
if (!state_buffer_.CanAcquire(batch_state_.state_stride)) {
|
||||
Flush(FlushMode::kMakeCoherent);
|
||||
}
|
||||
active_draw_.state_allocation =
|
||||
state_buffer_.Acquire(batch_state_.state_stride);
|
||||
assert_not_null(active_draw_.state_allocation.host_ptr);
|
||||
|
||||
active_draw_.command_address =
|
||||
reinterpret_cast<uintptr_t>(active_draw_.command_allocation.host_ptr);
|
||||
auto state_host_ptr =
|
||||
reinterpret_cast<uintptr_t>(active_draw_.state_allocation.host_ptr);
|
||||
active_draw_.header = reinterpret_cast<CommonHeader*>(state_host_ptr);
|
||||
// active_draw_.float_consts =
|
||||
// reinterpret_cast<float4*>(state_host_ptr +
|
||||
// batch_state_.float_consts_offset);
|
||||
// active_draw_.bool_consts =
|
||||
// reinterpret_cast<uint32_t*>(state_host_ptr +
|
||||
// batch_state_.bool_consts_offset);
|
||||
// active_draw_.loop_consts =
|
||||
// reinterpret_cast<uint32_t*>(state_host_ptr +
|
||||
// batch_state_.loop_consts_offset);
|
||||
return true;
|
||||
}
|
||||
|
||||
void DrawBatcher::DiscardDraw() {
|
||||
if (!draw_open_) {
|
||||
// No-op.
|
||||
return;
|
||||
}
|
||||
draw_open_ = false;
|
||||
|
||||
command_buffer_.Discard(std::move(active_draw_.command_allocation));
|
||||
state_buffer_.Discard(std::move(active_draw_.state_allocation));
|
||||
}
|
||||
|
||||
bool DrawBatcher::CommitDraw() {
|
||||
assert_true(draw_open_);
|
||||
draw_open_ = false;
|
||||
|
||||
// Copy over required constants.
|
||||
CopyConstants();
|
||||
|
||||
if (batch_state_.state_range_start == UINTPTR_MAX) {
|
||||
batch_state_.command_range_start = active_draw_.command_allocation.offset;
|
||||
batch_state_.state_range_start = active_draw_.state_allocation.offset;
|
||||
}
|
||||
batch_state_.command_range_length +=
|
||||
active_draw_.command_allocation.aligned_length;
|
||||
batch_state_.state_range_length +=
|
||||
active_draw_.state_allocation.aligned_length;
|
||||
|
||||
command_buffer_.Commit(std::move(active_draw_.command_allocation));
|
||||
state_buffer_.Commit(std::move(active_draw_.state_allocation));
|
||||
|
||||
++batch_state_.draw_count;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DrawBatcher::Flush(FlushMode mode) {
|
||||
if (batch_state_.draw_count) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
assert_not_zero(batch_state_.command_stride);
|
||||
assert_not_zero(batch_state_.state_stride);
|
||||
|
||||
// Flush pending buffer changes.
|
||||
command_buffer_.Flush();
|
||||
state_buffer_.Flush();
|
||||
array_data_buffer_->Flush();
|
||||
|
||||
// State data is indexed by draw ID.
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, state_buffer_.handle(),
|
||||
batch_state_.state_range_start,
|
||||
batch_state_.state_range_length);
|
||||
|
||||
GLenum prim_type = 0;
|
||||
switch (batch_state_.prim_type) {
|
||||
case PrimitiveType::kPointList:
|
||||
prim_type = GL_POINTS;
|
||||
break;
|
||||
case PrimitiveType::kLineList:
|
||||
prim_type = GL_LINES;
|
||||
break;
|
||||
case PrimitiveType::kLineStrip:
|
||||
prim_type = GL_LINE_STRIP;
|
||||
break;
|
||||
case PrimitiveType::kLineLoop:
|
||||
prim_type = GL_LINE_LOOP;
|
||||
break;
|
||||
case PrimitiveType::kTriangleList:
|
||||
prim_type = GL_TRIANGLES;
|
||||
break;
|
||||
case PrimitiveType::kTriangleStrip:
|
||||
prim_type = GL_TRIANGLE_STRIP;
|
||||
break;
|
||||
case PrimitiveType::kTriangleFan:
|
||||
prim_type = GL_TRIANGLE_FAN;
|
||||
break;
|
||||
case PrimitiveType::kRectangleList:
|
||||
prim_type = GL_TRIANGLE_STRIP;
|
||||
// Rect lists aren't culled. There may be other things they skip too.
|
||||
// assert_true((register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32
|
||||
// &
|
||||
// 0x3) == 0);
|
||||
break;
|
||||
case PrimitiveType::kQuadList:
|
||||
prim_type = GL_LINES_ADJACENCY;
|
||||
break;
|
||||
default:
|
||||
case PrimitiveType::kUnknown0x07:
|
||||
prim_type = GL_POINTS;
|
||||
XELOGE("unsupported primitive type %d", batch_state_.prim_type);
|
||||
assert_unhandled_case(batch_state_.prim_type);
|
||||
DiscardDraw();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fast path for single draws.
|
||||
void* indirect_offset =
|
||||
reinterpret_cast<void*>(batch_state_.command_range_start);
|
||||
|
||||
if (has_bindless_mdi_) {
|
||||
int vertex_buffer_count =
|
||||
batch_state_.vertex_shader->buffer_inputs().total_elements_count;
|
||||
assert_true(vertex_buffer_count < 8);
|
||||
if (batch_state_.indexed) {
|
||||
glMultiDrawElementsIndirectBindlessNV(
|
||||
prim_type, batch_state_.index_type, indirect_offset,
|
||||
batch_state_.draw_count, batch_state_.command_stride,
|
||||
vertex_buffer_count);
|
||||
} else {
|
||||
glMultiDrawArraysIndirectBindlessNV(
|
||||
prim_type, indirect_offset, batch_state_.draw_count,
|
||||
batch_state_.command_stride, vertex_buffer_count);
|
||||
}
|
||||
} else {
|
||||
if (batch_state_.indexed) {
|
||||
glMultiDrawElementsIndirect(prim_type, batch_state_.index_type,
|
||||
indirect_offset, batch_state_.draw_count,
|
||||
batch_state_.command_stride);
|
||||
} else {
|
||||
glMultiDrawArraysIndirect(prim_type, indirect_offset,
|
||||
batch_state_.draw_count,
|
||||
batch_state_.command_stride);
|
||||
}
|
||||
}
|
||||
|
||||
batch_state_.command_range_start = UINTPTR_MAX;
|
||||
batch_state_.command_range_length = 0;
|
||||
batch_state_.state_range_start = UINTPTR_MAX;
|
||||
batch_state_.state_range_length = 0;
|
||||
batch_state_.draw_count = 0;
|
||||
}
|
||||
|
||||
if (mode == FlushMode::kReconfigure) {
|
||||
// Reset - we'll update it as soon as we have all the information.
|
||||
batch_state_.needs_reconfigure = true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void DrawBatcher::CopyConstants() {
|
||||
// TODO(benvanik): partial updates, etc. We could use shader constant access
|
||||
// knowledge that we get at compile time to only upload those constants
|
||||
// required. If we did this as a variable length then we could really cut
|
||||
// down on state block sizes.
|
||||
|
||||
std::memcpy(active_draw_.header->float_consts,
|
||||
®ister_file_->values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32,
|
||||
sizeof(active_draw_.header->float_consts));
|
||||
std::memcpy(
|
||||
active_draw_.header->bool_consts,
|
||||
®ister_file_->values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].f32,
|
||||
sizeof(active_draw_.header->bool_consts));
|
||||
std::memcpy(active_draw_.header->loop_consts,
|
||||
®ister_file_->values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].f32,
|
||||
sizeof(active_draw_.header->loop_consts));
|
||||
}
|
||||
|
||||
} // namespace gl4
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,230 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_GL4_GL4_STATE_DATA_BUILDER_H_
|
||||
#define XENIA_GPU_GL4_GL4_STATE_DATA_BUILDER_H_
|
||||
|
||||
#include <xenia/common.h>
|
||||
#include <xenia/gpu/gl4/circular_buffer.h>
|
||||
#include <xenia/gpu/gl4/gl_context.h>
|
||||
#include <xenia/gpu/gl4/gl4_shader.h>
|
||||
#include <xenia/gpu/register_file.h>
|
||||
#include <xenia/gpu/xenos.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
||||
union float4 {
|
||||
float v[4];
|
||||
struct {
|
||||
float x, y, z, w;
|
||||
};
|
||||
};
|
||||
|
||||
#pragma pack(push, 4)
|
||||
struct DrawArraysIndirectCommand {
|
||||
GLuint count;
|
||||
GLuint instance_count;
|
||||
GLuint first_index;
|
||||
GLuint base_instance;
|
||||
};
|
||||
struct DrawElementsIndirectCommand {
|
||||
GLuint count;
|
||||
GLuint instance_count;
|
||||
GLuint first_index;
|
||||
GLint base_vertex;
|
||||
GLuint base_instance;
|
||||
};
|
||||
struct BindlessPtrNV {
|
||||
GLuint index;
|
||||
GLuint reserved_zero;
|
||||
GLuint64 address;
|
||||
GLuint64 length;
|
||||
};
|
||||
struct DrawArraysIndirectBindlessCommandNV {
|
||||
DrawArraysIndirectCommand cmd;
|
||||
// NOTE: the spec is wrong here. For fucks sake.
|
||||
// GLuint reserved_zero;
|
||||
BindlessPtrNV vertex_buffers[8];
|
||||
};
|
||||
struct DrawElementsIndirectBindlessCommandNV {
|
||||
DrawElementsIndirectCommand cmd;
|
||||
GLuint reserved_zero;
|
||||
BindlessPtrNV index_buffer;
|
||||
BindlessPtrNV vertex_buffers[8];
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
class DrawBatcher {
|
||||
public:
|
||||
enum class FlushMode {
|
||||
kMakeCoherent,
|
||||
kStateChange,
|
||||
kReconfigure,
|
||||
};
|
||||
|
||||
DrawBatcher(RegisterFile* register_file);
|
||||
|
||||
bool Initialize(CircularBuffer* array_data_buffer);
|
||||
void Shutdown();
|
||||
|
||||
PrimitiveType prim_type() const { return batch_state_.prim_type; }
|
||||
|
||||
void set_window_offset(uint32_t x, uint32_t y) {
|
||||
active_draw_.header->window_offset.x = float(x);
|
||||
active_draw_.header->window_offset.y = float(y);
|
||||
}
|
||||
void set_window_scissor(uint32_t left, uint32_t top, uint32_t right,
|
||||
uint32_t bottom) {
|
||||
active_draw_.header->window_scissor.x = float(left);
|
||||
active_draw_.header->window_scissor.y = float(top);
|
||||
active_draw_.header->window_scissor.z = float(right);
|
||||
active_draw_.header->window_scissor.w = float(bottom);
|
||||
}
|
||||
void set_window_scalar(float width_scalar, float height_scalar) {
|
||||
active_draw_.header->window_offset.z = width_scalar;
|
||||
active_draw_.header->window_offset.w = height_scalar;
|
||||
}
|
||||
void set_viewport_offset(float offset_x, float offset_y, float offset_z) {
|
||||
active_draw_.header->viewport_offset.x = offset_x;
|
||||
active_draw_.header->viewport_offset.y = offset_y;
|
||||
active_draw_.header->viewport_offset.z = offset_z;
|
||||
}
|
||||
void set_viewport_scale(float scale_x, float scale_y, float scale_z) {
|
||||
active_draw_.header->viewport_scale.x = scale_x;
|
||||
active_draw_.header->viewport_scale.y = scale_y;
|
||||
active_draw_.header->viewport_scale.z = scale_z;
|
||||
}
|
||||
void set_vtx_fmt(float xy, float z, float w) {
|
||||
active_draw_.header->vtx_fmt.x = xy;
|
||||
active_draw_.header->vtx_fmt.y = xy;
|
||||
active_draw_.header->vtx_fmt.z = z;
|
||||
active_draw_.header->vtx_fmt.w = w;
|
||||
}
|
||||
void set_alpha_test(bool enabled, uint32_t func, float ref) {
|
||||
active_draw_.header->alpha_test.x = enabled ? 1.0f : 0.0f;
|
||||
active_draw_.header->alpha_test.y = float(func);
|
||||
active_draw_.header->alpha_test.z = ref;
|
||||
}
|
||||
void set_texture_sampler(int index, GLuint64 handle) {
|
||||
active_draw_.header->texture_samplers[index] = handle;
|
||||
}
|
||||
void set_index_buffer(const CircularBuffer::Allocation& allocation) {
|
||||
if (has_bindless_mdi_) {
|
||||
auto& ptr = active_draw_.draw_elements_bindless_cmd->index_buffer;
|
||||
ptr.reserved_zero = 0;
|
||||
ptr.index = 0;
|
||||
ptr.address = allocation.gpu_ptr;
|
||||
ptr.length = allocation.length;
|
||||
} else {
|
||||
// Offset is used in glDrawElements.
|
||||
auto& cmd = active_draw_.draw_elements_cmd;
|
||||
size_t index_size = batch_state_.index_type == GL_UNSIGNED_SHORT ? 2 : 4;
|
||||
cmd->first_index = GLuint(allocation.offset / index_size);
|
||||
}
|
||||
}
|
||||
void set_vertex_buffer(int index, GLsizei offset, GLsizei stride,
|
||||
const CircularBuffer::Allocation& allocation) {
|
||||
if (has_bindless_mdi_) {
|
||||
BindlessPtrNV* ptr;
|
||||
if (batch_state_.indexed) {
|
||||
ptr = &active_draw_.draw_elements_bindless_cmd->vertex_buffers[index];
|
||||
} else {
|
||||
ptr = &active_draw_.draw_arrays_bindless_cmd->vertex_buffers[index];
|
||||
}
|
||||
ptr->reserved_zero = 0;
|
||||
ptr->index = index;
|
||||
ptr->address = allocation.gpu_ptr + offset;
|
||||
ptr->length = allocation.length - offset;
|
||||
}
|
||||
}
|
||||
|
||||
bool ReconfigurePipeline(GL4Shader* vertex_shader, GL4Shader* pixel_shader,
|
||||
GLuint pipeline);
|
||||
|
||||
bool BeginDrawArrays(PrimitiveType prim_type, uint32_t index_count);
|
||||
bool BeginDrawElements(PrimitiveType prim_type, uint32_t index_count,
|
||||
xenos::IndexFormat index_format);
|
||||
void DiscardDraw();
|
||||
bool CommitDraw();
|
||||
bool Flush(FlushMode mode);
|
||||
|
||||
private:
|
||||
bool BeginDraw();
|
||||
void CopyConstants();
|
||||
|
||||
RegisterFile* register_file_;
|
||||
CircularBuffer command_buffer_;
|
||||
CircularBuffer state_buffer_;
|
||||
CircularBuffer* array_data_buffer_;
|
||||
|
||||
bool has_bindless_mdi_;
|
||||
|
||||
struct BatchState {
|
||||
bool needs_reconfigure;
|
||||
PrimitiveType prim_type;
|
||||
bool indexed;
|
||||
GLenum index_type;
|
||||
|
||||
GL4Shader* vertex_shader;
|
||||
GL4Shader* pixel_shader;
|
||||
GLuint pipeline;
|
||||
|
||||
GLsizei command_stride;
|
||||
GLsizei state_stride;
|
||||
GLsizei float_consts_offset;
|
||||
GLsizei bool_consts_offset;
|
||||
GLsizei loop_consts_offset;
|
||||
|
||||
uintptr_t command_range_start;
|
||||
uintptr_t command_range_length;
|
||||
uintptr_t state_range_start;
|
||||
uintptr_t state_range_length;
|
||||
GLsizei draw_count;
|
||||
} batch_state_;
|
||||
|
||||
// This must match GL4Shader's header.
|
||||
struct CommonHeader {
|
||||
float4 window_offset; // tx,ty,sx,sy
|
||||
float4 window_scissor; // x0,y0,x1,y1
|
||||
float4 viewport_offset; // tx,ty,tz,?
|
||||
float4 viewport_scale; // sx,sy,sz,?
|
||||
float4 vtx_fmt; //
|
||||
float4 alpha_test; // alpha test enable, func, ref, ?
|
||||
|
||||
// TODO(benvanik): pack tightly
|
||||
GLuint64 texture_samplers[32];
|
||||
|
||||
float4 float_consts[512];
|
||||
uint32_t bool_consts[8];
|
||||
uint32_t loop_consts[32];
|
||||
};
|
||||
struct {
|
||||
CircularBuffer::Allocation command_allocation;
|
||||
CircularBuffer::Allocation state_allocation;
|
||||
|
||||
union {
|
||||
DrawArraysIndirectCommand* draw_arrays_cmd;
|
||||
DrawElementsIndirectCommand* draw_elements_cmd;
|
||||
DrawArraysIndirectBindlessCommandNV* draw_arrays_bindless_cmd;
|
||||
DrawElementsIndirectBindlessCommandNV* draw_elements_bindless_cmd;
|
||||
uintptr_t command_address;
|
||||
};
|
||||
|
||||
CommonHeader* header;
|
||||
} active_draw_;
|
||||
bool draw_open_;
|
||||
};
|
||||
|
||||
} // namespace gl4
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_GL4_GL4_STATE_DATA_BUILDER_H_
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include <poly/cxx_compat.h>
|
||||
#include <poly/math.h>
|
||||
#include <xenia/gpu/gl4/gl4_gpu-private.h>
|
||||
#include <xenia/gpu/gl4/gl4_shader_translator.h>
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
|
||||
|
@ -18,6 +19,8 @@ namespace xe {
|
|||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
extern "C" GLEWContext* glewGetContext();
|
||||
|
||||
// Stateful, but minimally.
|
||||
|
@ -25,41 +28,147 @@ thread_local GL4ShaderTranslator shader_translator_;
|
|||
|
||||
GL4Shader::GL4Shader(ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count)
|
||||
: Shader(shader_type, data_hash, dword_ptr, dword_count), program_(0) {}
|
||||
: Shader(shader_type, data_hash, dword_ptr, dword_count),
|
||||
program_(0),
|
||||
vao_(0) {}
|
||||
|
||||
GL4Shader::~GL4Shader() { glDeleteProgram(program_); }
|
||||
GL4Shader::~GL4Shader() {
|
||||
glDeleteProgram(program_);
|
||||
glDeleteVertexArrays(1, &vao_);
|
||||
}
|
||||
|
||||
const std::string header =
|
||||
"#version 450\n"
|
||||
"#extension all : warn\n"
|
||||
"#extension GL_ARB_bindless_texture : require\n"
|
||||
"#extension GL_ARB_explicit_uniform_location : require\n"
|
||||
"#extension GL_ARB_shading_language_420pack : require\n"
|
||||
"#extension GL_ARB_shader_storage_buffer_object : require\n"
|
||||
"precision highp float;\n"
|
||||
"precision highp int;\n"
|
||||
"layout(std140, column_major) uniform;\n"
|
||||
"layout(std430, column_major) buffer;\n"
|
||||
"struct StateData {\n"
|
||||
" vec4 window_offset;\n"
|
||||
" vec4 window_scissor;\n"
|
||||
" vec4 vtx_fmt;\n"
|
||||
" vec4 viewport_offset;\n"
|
||||
" vec4 viewport_scale;\n"
|
||||
" vec4 alpha_test;\n"
|
||||
" uvec2 texture_samplers[32];\n"
|
||||
" vec4 float_consts[512];\n"
|
||||
" uint fetch_consts[32 * 6];\n"
|
||||
" int bool_consts[8];\n"
|
||||
" int loop_consts[32];\n"
|
||||
"};\n"
|
||||
"struct VertexData {\n"
|
||||
" vec4 o[16];\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"layout(binding = 0) buffer State {\n"
|
||||
" StateData state;\n"
|
||||
"};\n";
|
||||
std::string GL4Shader::GetHeader() {
|
||||
static const std::string header =
|
||||
"#version 450\n"
|
||||
"#extension all : warn\n"
|
||||
"#extension GL_ARB_bindless_texture : require\n"
|
||||
"#extension GL_ARB_explicit_uniform_location : require\n"
|
||||
"#extension GL_ARB_shader_draw_parameters : require\n"
|
||||
"#extension GL_ARB_shader_storage_buffer_object : require\n"
|
||||
"#extension GL_ARB_shading_language_420pack : require\n"
|
||||
"precision highp float;\n"
|
||||
"precision highp int;\n"
|
||||
"layout(std140, column_major) uniform;\n"
|
||||
"layout(std430, column_major) buffer;\n"
|
||||
"\n"
|
||||
// This must match DrawBatcher::CommonHeader.
|
||||
"struct StateData {\n"
|
||||
" vec4 window_offset;\n"
|
||||
" vec4 window_scissor;\n"
|
||||
" vec4 viewport_offset;\n"
|
||||
" vec4 viewport_scale;\n"
|
||||
" vec4 vtx_fmt;\n"
|
||||
" vec4 alpha_test;\n"
|
||||
// TODO(benvanik): variable length.
|
||||
" uvec2 texture_samplers[32];\n"
|
||||
" vec4 float_consts[512];\n"
|
||||
" int bool_consts[8];\n"
|
||||
" int loop_consts[32];\n"
|
||||
"};\n"
|
||||
"layout(binding = 0) buffer State {\n"
|
||||
" StateData states[];\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"struct VertexData {\n"
|
||||
" vec4 o[16];\n"
|
||||
"};\n";
|
||||
return header;
|
||||
}
|
||||
|
||||
bool GL4Shader::PrepareVertexArrayObject() {
|
||||
glCreateVertexArrays(1, &vao_);
|
||||
|
||||
bool has_bindless_vbos = false;
|
||||
if (FLAGS_vendor_gl_extensions && GLEW_NV_vertex_buffer_unified_memory) {
|
||||
has_bindless_vbos = true;
|
||||
// Nasty, but no DSA for this.
|
||||
glBindVertexArray(vao_);
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||
}
|
||||
|
||||
uint32_t el_index = 0;
|
||||
for (uint32_t buffer_index = 0; buffer_index < buffer_inputs_.count;
|
||||
++buffer_index) {
|
||||
const auto& desc = buffer_inputs_.descs[buffer_index];
|
||||
|
||||
for (uint32_t i = 0; i < desc.element_count; ++i, ++el_index) {
|
||||
const auto& el = desc.elements[i];
|
||||
auto comp_count = GetVertexFormatComponentCount(el.format);
|
||||
GLenum comp_type;
|
||||
switch (el.format) {
|
||||
case VertexFormat::k_8_8_8_8:
|
||||
comp_type = el.is_signed ? GL_BYTE : GL_UNSIGNED_BYTE;
|
||||
break;
|
||||
case VertexFormat::k_2_10_10_10:
|
||||
comp_type = el.is_signed ? GL_INT_2_10_10_10_REV
|
||||
: GL_UNSIGNED_INT_2_10_10_10_REV;
|
||||
break;
|
||||
case VertexFormat::k_10_11_11:
|
||||
assert_false(el.is_signed);
|
||||
comp_type = GL_UNSIGNED_INT_10F_11F_11F_REV;
|
||||
break;
|
||||
/*case VertexFormat::k_11_11_10:
|
||||
break;*/
|
||||
case VertexFormat::k_16_16:
|
||||
comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
|
||||
break;
|
||||
case VertexFormat::k_16_16_FLOAT:
|
||||
comp_type = GL_HALF_FLOAT;
|
||||
break;
|
||||
case VertexFormat::k_16_16_16_16:
|
||||
comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
|
||||
break;
|
||||
case VertexFormat::k_16_16_16_16_FLOAT:
|
||||
comp_type = GL_HALF_FLOAT;
|
||||
break;
|
||||
case VertexFormat::k_32:
|
||||
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||
break;
|
||||
case VertexFormat::k_32_32:
|
||||
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||
break;
|
||||
case VertexFormat::k_32_32_32_32:
|
||||
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||
break;
|
||||
case VertexFormat::k_32_FLOAT:
|
||||
comp_type = GL_FLOAT;
|
||||
break;
|
||||
case VertexFormat::k_32_32_FLOAT:
|
||||
comp_type = GL_FLOAT;
|
||||
break;
|
||||
case VertexFormat::k_32_32_32_FLOAT:
|
||||
comp_type = GL_FLOAT;
|
||||
break;
|
||||
case VertexFormat::k_32_32_32_32_FLOAT:
|
||||
comp_type = GL_FLOAT;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(el.format);
|
||||
return false;
|
||||
}
|
||||
|
||||
glEnableVertexArrayAttrib(vao_, el_index);
|
||||
if (has_bindless_vbos) {
|
||||
// NOTE: MultiDrawIndirectBindlessMumble doesn't handle separate
|
||||
// vertex bindings/formats.
|
||||
glVertexAttribFormat(el_index, comp_count, comp_type, el.is_normalized,
|
||||
el.offset_words * 4);
|
||||
glVertexArrayVertexBuffer(vao_, el_index, 0, 0, desc.stride_words * 4);
|
||||
} else {
|
||||
glVertexArrayAttribBinding(vao_, el_index, buffer_index);
|
||||
glVertexArrayAttribFormat(vao_, el_index, comp_count, comp_type,
|
||||
el.is_normalized, el.offset_words * 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (has_bindless_vbos) {
|
||||
glBindVertexArray(0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GL4Shader::PrepareVertexShader(
|
||||
const xenos::xe_gpu_program_cntl_t& program_cntl) {
|
||||
|
@ -68,8 +177,14 @@ bool GL4Shader::PrepareVertexShader(
|
|||
}
|
||||
has_prepared_ = true;
|
||||
|
||||
// Build static vertex array descriptor.
|
||||
if (!PrepareVertexArrayObject()) {
|
||||
PLOGE("Unable to prepare vertex shader array object");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string apply_transform =
|
||||
"vec4 applyTransform(vec4 pos) {\n"
|
||||
"vec4 applyTransform(const in StateData state, vec4 pos) {\n"
|
||||
" // Clip->NDC with perspective divide.\n"
|
||||
" // We do this here because it's programmable on the 360.\n"
|
||||
" float w = pos.w;\n"
|
||||
|
@ -107,14 +222,15 @@ bool GL4Shader::PrepareVertexShader(
|
|||
" return pos;\n"
|
||||
"}\n";
|
||||
std::string source =
|
||||
header + apply_transform +
|
||||
GetHeader() + apply_transform +
|
||||
"out gl_PerVertex {\n"
|
||||
" vec4 gl_Position;\n"
|
||||
" float gl_PointSize;\n"
|
||||
" float gl_ClipDistance[];\n"
|
||||
"};\n"
|
||||
"layout(location = 0) out VertexData vtx;\n"
|
||||
"void processVertex();\n"
|
||||
"layout(location = 0) flat out uint draw_id;\n"
|
||||
"layout(location = 1) out VertexData vtx;\n"
|
||||
"void processVertex(const in StateData state);\n"
|
||||
"void main() {\n" +
|
||||
(alloc_counts().positions ? " gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
|
||||
: "") +
|
||||
|
@ -122,8 +238,10 @@ bool GL4Shader::PrepareVertexShader(
|
|||
" for (int i = 0; i < vtx.o.length(); ++i) {\n"
|
||||
" vtx.o[i] = vec4(0.0, 0.0, 0.0, 0.0);\n"
|
||||
" }\n"
|
||||
" processVertex();\n"
|
||||
" gl_Position = applyTransform(gl_Position);\n"
|
||||
" const StateData state = states[gl_DrawIDARB];\n"
|
||||
" processVertex(state);\n"
|
||||
" gl_Position = applyTransform(state, gl_Position);\n"
|
||||
" draw_id = gl_DrawIDARB;\n"
|
||||
"}\n";
|
||||
|
||||
std::string translated_source =
|
||||
|
@ -149,12 +267,14 @@ bool GL4Shader::PreparePixelShader(
|
|||
}
|
||||
has_prepared_ = true;
|
||||
|
||||
std::string source = header +
|
||||
"layout(location = 0) in VertexData vtx;\n"
|
||||
std::string source = GetHeader() +
|
||||
"layout(location = 0) flat in uint draw_id;\n"
|
||||
"layout(location = 1) in VertexData vtx;\n"
|
||||
"layout(location = 0) out vec4 oC[4];\n"
|
||||
"void processFragment();\n"
|
||||
"void processFragment(const in StateData state);\n"
|
||||
"void main() {\n" +
|
||||
" processFragment();\n"
|
||||
" const StateData state = states[draw_id];\n"
|
||||
" processFragment(state);\n"
|
||||
"}\n";
|
||||
|
||||
std::string translated_source =
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#ifndef XENIA_GPU_GL4_GL4_SHADER_H_
|
||||
#define XENIA_GPU_GL4_GL4_SHADER_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <xenia/common.h>
|
||||
#include <xenia/gpu/gl4/gl_context.h>
|
||||
#include <xenia/gpu/shader.h>
|
||||
|
@ -25,14 +27,18 @@ class GL4Shader : public Shader {
|
|||
~GL4Shader() override;
|
||||
|
||||
GLuint program() const { return program_; }
|
||||
GLuint vao() const { return vao_; }
|
||||
|
||||
bool PrepareVertexShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
|
||||
bool PreparePixelShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
|
||||
|
||||
protected:
|
||||
std::string GetHeader();
|
||||
bool PrepareVertexArrayObject();
|
||||
bool CompileProgram(std::string source);
|
||||
|
||||
GLuint program_;
|
||||
GLuint vao_;
|
||||
};
|
||||
|
||||
} // namespace gl4
|
||||
|
|
|
@ -91,7 +91,7 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
|
|||
const auto& alloc_counts = vertex_shader->alloc_counts();
|
||||
|
||||
// Vertex shader main() header.
|
||||
Append("void processVertex() {\n");
|
||||
Append("void processVertex(const in StateData state) {\n");
|
||||
|
||||
// Add temporaries for any registers we may use.
|
||||
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
|
||||
|
@ -126,7 +126,7 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
|
|||
// (and less than the number of required registers), things may die.
|
||||
|
||||
// Pixel shader main() header.
|
||||
Append("void processFragment() {\n");
|
||||
Append("void processFragment(const in StateData state) {\n");
|
||||
|
||||
// Add temporary registers.
|
||||
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
|
||||
|
|
|
@ -132,12 +132,15 @@ std::unique_ptr<GLContext> GLContext::CreateShared() {
|
|||
GLContextLock context_lock(this);
|
||||
|
||||
int context_flags = 0;
|
||||
//int profile = WGL_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB;
|
||||
int profile = WGL_CONTEXT_CORE_PROFILE_BIT_ARB;
|
||||
#if DEBUG
|
||||
context_flags |= WGL_CONTEXT_DEBUG_BIT_ARB;
|
||||
#endif // DEBUG
|
||||
int attrib_list[] = {WGL_CONTEXT_MAJOR_VERSION_ARB, 4, //
|
||||
WGL_CONTEXT_MINOR_VERSION_ARB, 5, //
|
||||
WGL_CONTEXT_FLAGS_ARB, context_flags, //
|
||||
#endif // DEBUG
|
||||
int attrib_list[] = {WGL_CONTEXT_MAJOR_VERSION_ARB, 4, //
|
||||
WGL_CONTEXT_MINOR_VERSION_ARB, 5, //
|
||||
WGL_CONTEXT_FLAGS_ARB, context_flags, //
|
||||
WGL_CONTEXT_PROFILE_MASK_ARB, profile, //
|
||||
0};
|
||||
new_glrc = wglCreateContextAttribsARB(dc_, glrc_, attrib_list);
|
||||
if (!new_glrc) {
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
'circular_buffer.h',
|
||||
'command_processor.cc',
|
||||
'command_processor.h',
|
||||
'draw_batcher.cc',
|
||||
'draw_batcher.h',
|
||||
'gl4_gpu-private.h',
|
||||
'gl4_gpu.cc',
|
||||
'gl4_gpu.h',
|
||||
|
|
Loading…
Reference in New Issue