Experimenting with vertex pipeline.

This commit is contained in:
Ben Vanik 2014-12-26 23:14:15 -08:00
parent d2a3cba4f3
commit 14ee211ea9
9 changed files with 541 additions and 133 deletions

View File

@ -31,6 +31,8 @@ class CircularBuffer {
bool Initialize();
GLuint handle() const { return buffer_; }
Allocation Acquire(size_t length);
void Commit(Allocation allocation);

View File

@ -151,13 +151,17 @@ bool CommandProcessor::SetupGL() {
GL_MAP_WRITE_BIT | GL_DYNAMIC_STORAGE_BIT);
// Circular buffer holding scratch vertex/index data.
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
if (!scratch_buffer_.Initialize()) {
PLOGE("Unable to initialize scratch buffer");
return false;
}
GLuint vertex_array;
glGenVertexArrays(1, &vertex_array);
glBindVertexArray(vertex_array);
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
return true;
}
@ -251,8 +255,7 @@ void CommandProcessor::PrepareForWait() {
// TODO(benvanik): fences and fancy stuff. We should figure out a way to
// make interrupt callbacks from the GPU so that we don't have to do a full
// synchronize here.
// glFlush();
glFinish();
glFlush();
if (FLAGS_thread_safe_gl) {
context_->ClearCurrent();
@ -1162,10 +1165,11 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
return false;
}
// if (!PopulateShaders(draw_command)) {
// XELOGE("Unable to prepare draw shaders");
// return false;
//}
if (!UpdateShaders(draw_command)) {
PLOGE("Unable to prepare draw shaders");
return false;
}
// if (!PopulateSamplers(draw_command)) {
// XELOGE("Unable to prepare draw samplers");
// return false;
@ -1176,25 +1180,77 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
return false;
}
if (!PopulateVertexBuffers(draw_command)) {
XELOGE("Unable to setup vertex buffers");
PLOGE("Unable to setup vertex buffers");
return false;
}
GLenum prim_type = 0;
switch (cmd.prim_type) {
case PrimitiveType::kPointList:
prim_type = GL_POINTS;
/*if (vs->DemandGeometryShader(
D3D11VertexShaderResource::POINT_SPRITE_SHADER, &geometry_shader)) {
return 1;
}*/
break;
case PrimitiveType::kLineList:
prim_type = GL_LINES;
break;
case PrimitiveType::kLineStrip:
prim_type = GL_LINE_STRIP;
break;
case PrimitiveType::kLineLoop:
prim_type = GL_LINE_LOOP;
break;
case PrimitiveType::kTriangleList:
prim_type = GL_TRIANGLES;
break;
case PrimitiveType::kTriangleStrip:
prim_type = GL_TRIANGLE_STRIP;
break;
case PrimitiveType::kTriangleFan:
prim_type = GL_TRIANGLE_FAN;
break;
case PrimitiveType::kRectangleList:
prim_type = GL_TRIANGLE_STRIP;
/*if (vs->DemandGeometryShader(
D3D11VertexShaderResource::RECT_LIST_SHADER, &geometry_shader)) {
return 1;
}*/
break;
case PrimitiveType::kQuadList:
prim_type = GL_LINES_ADJACENCY;
/*if
(vs->DemandGeometryShader(D3D11VertexShaderResource::QUAD_LIST_SHADER,
&geometry_shader)) {
return 1;
}*/
break;
default:
case PrimitiveType::kUnknown0x07:
prim_type = GL_POINTS;
XELOGE("D3D11: unsupported primitive type %d", cmd.prim_type);
break;
}
// HACK HACK HACK
glDisable(GL_DEPTH_TEST);
if (cmd.index_buffer.address) {
// Indexed draw.
// PopulateIndexBuffer has our element array setup.
//size_t element_size = cmd.index_buffer.format == IndexFormat::kInt32
// ? sizeof(uint32_t)
// : sizeof(uint16_t);
//glDrawElementsBaseVertex(
// prim_type, cmd.index_count,
// cmd.index_buffer.format == IndexFormat::kInt32 ? GL_UNSIGNED_INT
// : GL_UNSIGNED_SHORT,
// reinterpret_cast<void*>(cmd.start_index * element_size),
// cmd.base_vertex);
size_t element_size = cmd.index_buffer.format == IndexFormat::kInt32
? sizeof(uint32_t)
: sizeof(uint16_t);
glDrawElementsBaseVertex(
prim_type, cmd.index_count,
cmd.index_buffer.format == IndexFormat::kInt32 ? GL_UNSIGNED_INT
: GL_UNSIGNED_SHORT,
reinterpret_cast<void*>(cmd.start_index * element_size),
cmd.base_vertex);
} else {
// Auto draw.
//glDrawArrays(prim_type, cmd.start_index, cmd.index_count);
glDrawArrays(prim_type, cmd.start_index, cmd.index_count);
}
return true;
@ -1215,10 +1271,10 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
};
};
struct UniformDataBlock {
float4 window_offset; // tx,ty,?,?
float4 window_scissor; // x0,y0,x1,y1
float4 viewport_offset; // tx,ty,tz,?
float4 viewport_scale; // sx,sy,sz,?
float4 window_offset; // tx,ty,rt_w,rt_h
float4 window_scissor; // x0,y0,x1,y1
float4 viewport_offset; // tx,ty,tz,?
float4 viewport_scale; // sx,sy,sz,?
// TODO(benvanik): vertex format xyzw?
float4 alpha_test; // alpha test enable, func, ref, ?
@ -1236,11 +1292,10 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
static_assert(sizeof(UniformDataBlock) <= 16 * 1024,
"Need <=16k uniform data");
auto buffer_ptr = reinterpret_cast<UniformDataBlock*>(
glMapNamedBufferRange(uniform_data_buffer_, 0, 16 * 1024,
GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
auto allocation = scratch_buffer_.Acquire(16 * 1024);
auto buffer_ptr = reinterpret_cast<UniformDataBlock*>(allocation.host_ptr);
if (!buffer_ptr) {
PLOGE("Unable to map uniform data buffer");
PLOGE("Unable to allocate uniform data buffer");
return false;
}
@ -1257,18 +1312,9 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
buffer_ptr->window_scissor.z = float(window_scissor_br & 0x7FFF);
buffer_ptr->window_scissor.w = float((window_scissor_br >> 16) & 0x7FFF);
// Viewport scaling. Only enabled if the flags are all set.
buffer_ptr->viewport_scale.x =
regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; // 640
buffer_ptr->viewport_offset.x =
regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32; // 640
buffer_ptr->viewport_scale.y =
regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; // -360
buffer_ptr->viewport_offset.y =
regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; // 360
buffer_ptr->viewport_scale.z = regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32; // 1
buffer_ptr->viewport_offset.z =
regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32; // 0
// HACK: no clue where to get these values.
buffer_ptr->window_offset.z = 1280;
buffer_ptr->window_offset.w = 720;
// Whether each of the viewport settings is enabled.
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
@ -1282,6 +1328,23 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
assert_true(vport_xscale_enable == vport_yscale_enable ==
vport_zscale_enable == vport_xoffset_enable ==
vport_yoffset_enable == vport_zoffset_enable);
// Viewport scaling. Only enabled if the flags are all set.
buffer_ptr->viewport_scale.x =
vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1; // 640
buffer_ptr->viewport_offset.x = vport_xoffset_enable
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
: 0; // 640
buffer_ptr->viewport_scale.y = vport_yscale_enable
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
: 1; // -360
buffer_ptr->viewport_offset.y = vport_yoffset_enable
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
: 0; // 360
buffer_ptr->viewport_scale.z =
vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1; // 1
buffer_ptr->viewport_offset.z =
vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0; // 0
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
// = false: multiply the X, Y coordinates by 1/W0.
bool vtx_xy_fmt = (vte_control >> 8) & 0x1;
@ -1504,7 +1567,9 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
stencil_op_map[(depth_control & 0x0001C000) >> 14]);
}
glUnmapNamedBuffer(uniform_data_buffer_);
// Stash - program setup will bind this to uniforms.
draw_command->state_data_gpu_ptr = allocation.gpu_ptr;
scratch_buffer_.Commit(std::move(allocation));
return true;
}
@ -1590,11 +1655,80 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
// TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST
// Pretend we are drawing.
glEnable(GL_SCISSOR_TEST);
glScissor(100, 100, 100, 100);
float red[] = {rand() / (float)RAND_MAX, 0, 0, 1.0f};
glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0, red);
glDisable(GL_SCISSOR_TEST);
// glEnable(GL_SCISSOR_TEST);
// glScissor(100, 100, 100, 100);
// float red[] = {rand() / (float)RAND_MAX, 0, 0, 1.0f};
// glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0,
// red);
// glDisable(GL_SCISSOR_TEST);
return true;
}
bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) {
SCOPE_profile_cpu_f("gpu");
auto& regs = *register_file_;
auto& cmd = *draw_command;
xe_gpu_program_cntl_t program_cntl;
program_cntl.dword_0 = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
if (!active_vertex_shader_->has_prepared()) {
if (!active_vertex_shader_->PrepareVertexShader(program_cntl)) {
XELOGE("Unable to prepare vertex shader");
return false;
}
} else if (!active_vertex_shader_->is_valid()) {
XELOGE("Vertex shader invalid");
return false;
}
if (!active_pixel_shader_->has_prepared()) {
if (!active_pixel_shader_->PreparePixelShader(program_cntl,
active_vertex_shader_)) {
XELOGE("Unable to prepare pixel shader");
return false;
}
} else if (!active_pixel_shader_->is_valid()) {
XELOGE("Pixel shader invalid");
return false;
}
GLuint vertex_program = active_vertex_shader_->program();
GLuint geometry_program = 0;
GLuint fragment_program = active_pixel_shader_->program();
GLuint pipeline;
glCreateProgramPipelines(1, &pipeline);
glUseProgramStages(pipeline, GL_VERTEX_SHADER_BIT, vertex_program);
glUseProgramStages(pipeline, GL_GEOMETRY_SHADER_BIT, geometry_program);
glUseProgramStages(pipeline, GL_FRAGMENT_SHADER_BIT, fragment_program);
// HACK: layout(location=0) on a bindless uniform crashes nvidia driver.
GLint vertex_state_loc = glGetUniformLocation(vertex_program, "state");
assert_true(vertex_state_loc == -1 || vertex_state_loc == 0);
GLint geometry_state_loc =
geometry_program ? glGetUniformLocation(geometry_program, "state") : -1;
assert_true(geometry_state_loc == -1 || geometry_state_loc == 0);
GLint fragment_state_loc = glGetUniformLocation(fragment_program, "state");
assert_true(fragment_state_loc == -1 || fragment_state_loc == 0);
// TODO(benvanik): do we need to do this for all stages if the locations
// match?
if (vertex_state_loc != -1) {
glProgramUniformHandleui64ARB(vertex_program, vertex_state_loc,
cmd.state_data_gpu_ptr);
}
if (geometry_program && geometry_state_loc != -1) {
glProgramUniformHandleui64ARB(geometry_program, geometry_state_loc,
cmd.state_data_gpu_ptr);
}
if (fragment_state_loc != -1) {
glProgramUniformHandleui64ARB(fragment_program, fragment_state_loc,
cmd.state_data_gpu_ptr);
}
glBindProgramPipeline(pipeline);
// glDeleteProgramPipelines(1, &pipeline);
return true;
}
@ -1641,15 +1775,9 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
SCOPE_profile_cpu_f("gpu");
auto& regs = *register_file_;
auto& cmd = *draw_command;
assert_not_null(active_vertex_shader_);
if (!cmd.vertex_shader) {
// No vertex shader, no-op.
return true;
}
const auto& buffer_inputs = cmd.vertex_shader->buffer_inputs();
// glBindVertexArray(vertex_array);
const auto& buffer_inputs = active_vertex_shader_->buffer_inputs();
for (size_t n = 0; n < buffer_inputs.count; n++) {
const auto& desc = buffer_inputs.descs[n];
@ -1685,9 +1813,100 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
reinterpret_cast<const uint32_t*>(membase_ + (fetch->address << 2)),
fetch->size);
/*glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV,
desc.input_index,
allocation.gpu_ptr, allocation.length);*/
uint32_t el_index = 0;
for (uint32_t i = 0; i < desc.element_count; ++i) {
const auto& el = desc.elements[i];
GLuint comp_count;
GLuint comp_size;
GLenum comp_type;
switch (el.format) {
case VertexFormat::k_8_8_8_8:
comp_count = 4;
comp_size = 1;
comp_type = el.is_signed ? GL_BYTE : GL_UNSIGNED_BYTE;
break;
case VertexFormat::k_2_10_10_10:
comp_count = 4;
comp_size = 4;
comp_type = el.is_signed ? GL_INT_2_10_10_10_REV
: GL_UNSIGNED_INT_2_10_10_10_REV;
break;
case VertexFormat::k_10_11_11:
comp_count = 3;
comp_size = 4;
assert_false(el.is_signed);
comp_type = GL_UNSIGNED_INT_10F_11F_11F_REV;
break;
/*case VertexFormat::k_11_11_10:
break;*/
case VertexFormat::k_16_16:
comp_count = 2;
comp_size = 2;
comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
break;
case VertexFormat::k_16_16_FLOAT:
comp_count = 2;
comp_size = 2;
comp_type = GL_HALF_FLOAT;
break;
case VertexFormat::k_16_16_16_16:
comp_count = 4;
comp_size = 2;
comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
break;
case VertexFormat::k_16_16_16_16_FLOAT:
comp_count = 4;
comp_size = 2;
comp_type = GL_HALF_FLOAT;
break;
case VertexFormat::k_32:
comp_count = 1;
comp_size = 4;
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
break;
case VertexFormat::k_32_32:
comp_count = 2;
comp_size = 4;
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
break;
case VertexFormat::k_32_32_32_32:
comp_count = 4;
comp_size = 4;
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
break;
case VertexFormat::k_32_FLOAT:
comp_count = 1;
comp_size = 4;
comp_type = GL_FLOAT;
break;
case VertexFormat::k_32_32_FLOAT:
comp_count = 2;
comp_size = 4;
comp_type = GL_FLOAT;
break;
case VertexFormat::k_32_32_32_FLOAT:
comp_count = 3;
comp_size = 4;
comp_type = GL_FLOAT;
break;
case VertexFormat::k_32_32_32_32_FLOAT:
comp_count = 4;
comp_size = 4;
comp_type = GL_FLOAT;
break;
default:
assert_unhandled_case(el.format);
break;
}
size_t offset = el.offset_words * sizeof(uint32_t);
glEnableVertexAttribArray(el_index);
glVertexAttribFormatNV(el_index, comp_count, comp_type, el.is_normalized,
desc.stride_words * sizeof(uint32_t));
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, el_index,
allocation.gpu_ptr + offset,
allocation.length - offset);
++el_index;
}
// Flush buffer before we draw.
scratch_buffer_.Commit(std::move(allocation));
@ -1782,7 +2001,7 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
GLenum read_format;
GLenum read_type;
switch (copy_dest_format) {
case ColorFormat::kColor_8_8_8_8:
case ColorFormat::k_8_8_8_8:
read_format = copy_dest_swap ? GL_BGRA : GL_RGBA;
read_type = GL_UNSIGNED_BYTE;
break;
@ -1832,10 +2051,10 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
// glBindBuffer(GL_READ_FRAMEBUFFER, framebuffer)
glNamedFramebufferReadBuffer(source_framebuffer->framebuffer,
GL_COLOR_ATTACHMENT0 + copy_src_select);
glReadPixels(x, y, w, h, read_format, read_type, ptr);
//glReadPixels(x, y, w, h, read_format, read_type, ptr);
} else {
// Source from the bound depth/stencil target.
glReadPixels(x, y, w, h, GL_DEPTH_STENCIL, read_type, ptr);
//glReadPixels(x, y, w, h, GL_DEPTH_STENCIL, read_type, ptr);
}
break;
case CopyCommand::kRaw:
@ -1876,7 +2095,7 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
glClearNamedFramebufferfi(source_framebuffer->framebuffer, GL_DEPTH_STENCIL,
depth.float_value, stencil);
}
return true;
}
@ -1890,8 +2109,8 @@ GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch,
uint32_t height = 2560;
// NOTE: we strip gamma formats down to normal ones.
if (format == ColorRenderTargetFormat::k8888Gamma) {
format = ColorRenderTargetFormat::k8888;
if (format == ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
format = ColorRenderTargetFormat::k_8_8_8_8;
}
for (auto& it = cached_color_render_targets_.begin();
@ -1910,8 +2129,8 @@ GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch,
GLenum internal_format;
switch (format) {
case ColorRenderTargetFormat::k8888:
case ColorRenderTargetFormat::k8888Gamma:
case ColorRenderTargetFormat::k_8_8_8_8:
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
internal_format = GL_RGBA8;
break;
default:

View File

@ -47,9 +47,6 @@ struct DrawCommand {
uint32_t index_count;
uint32_t base_vertex;
GL4Shader* vertex_shader;
GL4Shader* pixel_shader;
// Index buffer, if present.
// If index_count > 0 but buffer is nullptr then auto draw.
struct {
@ -69,6 +66,8 @@ struct DrawCommand {
size_t vertex_shader_sampler_count;
SamplerInput pixel_shader_samplers[32];
size_t pixel_shader_sampler_count;
GLuint64 state_data_gpu_ptr;
};
class CommandProcessor {
@ -188,6 +187,7 @@ class CommandProcessor {
bool IssueDraw(DrawCommand* draw_command);
bool UpdateState(DrawCommand* draw_command);
bool UpdateRenderTargets(DrawCommand* draw_command);
bool UpdateShaders(DrawCommand* draw_command);
bool PopulateIndexBuffer(DrawCommand* draw_command);
bool PopulateVertexBuffers(DrawCommand* draw_command);
bool IssueCopy(DrawCommand* draw_command);

View File

@ -11,6 +11,7 @@
#include <poly/threading.h>
#include <xenia/cpu/processor.h>
#include <xenia/gpu/gl4/gl4_gpu-private.h>
#include <xenia/gpu/gpu-private.h>
namespace xe {
@ -42,11 +43,19 @@ X_STATUS GL4GraphicsSystem::Setup() {
control_ = std::make_unique<WGLControl>(loop);
emulator_->main_window()->AddChild(control_.get());
if (FLAGS_thread_safe_gl) {
control_->context()->MakeCurrent();
}
// Setup the GL context the command processor will do all its drawing in.
// It's shared with the control context so that we can resolve framebuffers
// from it.
processor_context = control_->context()->CreateShared();
if (FLAGS_thread_safe_gl) {
control_->context()->ClearCurrent();
}
control_ready_fence.Signal();
});
control_ready_fence.Wait();

View File

@ -15,7 +15,163 @@ namespace xe {
namespace gpu {
namespace gl4 {
bool GL4Shader::TranslateImpl() { return true; }
extern "C" GLEWContext* glewGetContext();
GL4Shader::GL4Shader(ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count)
: Shader(shader_type, data_hash, dword_ptr, dword_count), program_(0) {}
GL4Shader::~GL4Shader() { glDeleteProgram(program_); }
const std::string header =
"#version 450\n"
"#extension all : warn\n"
"#extension GL_ARB_bindless_texture : require\n"
"#extension GL_ARB_explicit_uniform_location : require\n"
"#extension GL_ARB_shading_language_420pack : require\n"
"#extension GL_ARB_shader_storage_buffer_object : require\n"
"#extension GL_NV_shader_buffer_load : require\n"
"precision highp float;\n"
"precision highp int;\n"
"layout(std140, column_major) uniform;\n"
"layout(std430, column_major) buffer;\n"
"struct StateData {\n"
" vec4 window_offset;\n"
" vec4 window_scissor;\n"
" vec4 viewport_offset;\n"
" vec4 viewport_scale;\n"
" vec4 alpha_test;\n"
" vec4 float_consts[512];\n"
" uint fetch_consts[32 * 6];\n"
" int bool_consts[8];\n"
" int loop_consts[32];\n"
"};\n"
"struct VertexData {\n"
" vec4 o[16];\n"
"};\n"
"\n"
"uniform StateData* state;\n";
bool GL4Shader::PrepareVertexShader(
const xenos::xe_gpu_program_cntl_t& program_cntl) {
if (has_prepared_) {
return is_valid_;
}
has_prepared_ = true;
std::string apply_viewport =
"vec4 applyViewport(vec4 pos) {\n"
// TODO(benvanik): piecewise viewport_enable -> offset/scale logic.
" if (false) {\n"
" } else {\n"
/*" pos.xy = pos.xy / vec2(state->window_offset.z / 2.0, "
"-state->window_offset.w / 2.0) + vec2(-1.0, 1.0);\n"
" pos.zw = vec2(0.0, 1.0);\n"*/
" pos.xy = pos.xy / vec2(1280.0 / 2.0, "
"-720.0 / 2.0) + vec2(-1.0, 1.0);\n"
" //pos.zw = vec2(0.0, 1.0);\n"
" }\n"
" pos.x = pos.x * state->viewport_scale.x + \n"
" state->viewport_offset.x;\n"
" pos.y = pos.y * state->viewport_scale.y + \n"
" state->viewport_offset.y;\n"
" pos.z = pos.z * state->viewport_scale.z + \n"
" state->viewport_offset.z;\n"
" pos.xy += state->window_offset.xy;\n"
" return pos;\n"
"}\n";
std::string source =
header + apply_viewport +
"out gl_PerVertex {\n"
" vec4 gl_Position;\n"
" float gl_PointSize;\n"
" float gl_ClipDistance[];\n"
"};\n"
"layout(location = 0) in vec3 iF0;\n"
"layout(location = 1) in vec4 iF1;\n"
"layout(location = 0) out VertexData vtx;\n"
"void main() {\n"
//" vec4 oPos = vec4(iF0.xy, 0.0, 1.0);\n"
" vec4 oPos = iF0.xxxx * state->float_consts[0];\n"
" oPos = (iF0.yyyy * state->float_consts[1]) + oPos;\n"
" oPos = (iF0.zzzz * state->float_consts[2]) + oPos;\n"
" oPos = (vec4(1.0, 1.0, 1.0, 1.0) * state->float_consts[3]) + oPos;\n"
//" gl_PointSize = 1.0;\n"
" for (int i = 0; i < vtx.o.length(); ++i) {\n"
" vtx.o[0] = vec4(0.0, 0.0, 0.0, 0.0);\n"
" }\n"
" vtx.o[0] = iF1;\n"
" gl_Position = applyViewport(oPos);\n"
//" gl_Position = oPos;\n"
"}\n";
if (!CompileProgram(source)) {
return false;
}
is_valid_ = true;
return true;
}
bool GL4Shader::PreparePixelShader(
const xenos::xe_gpu_program_cntl_t& program_cntl,
GL4Shader* vertex_shader) {
if (has_prepared_) {
return is_valid_;
}
has_prepared_ = true;
std::string source = header +
"layout(location = 0) in VertexData vtx;\n"
"layout(location = 0) out vec4 oC[4];\n"
"void main() {\n"
" for (int i = 0; i < oC.length(); ++i) {\n"
" oC[i] = vec4(1.0, 0.0, 0.0, 1.0);\n"
" }\n"
" oC[0] = vtx.o[0];\n"
//" gl_FragDepth = 0.0;\n"
"}\n";
if (!CompileProgram(source)) {
return false;
}
is_valid_ = true;
return true;
}
bool GL4Shader::CompileProgram(std::string source) {
assert_zero(program_);
translated_disassembly_ = std::move(source);
const char* source_str = translated_disassembly_.c_str();
program_ = glCreateShaderProgramv(shader_type_ == ShaderType::kVertex
? GL_VERTEX_SHADER
: GL_FRAGMENT_SHADER,
1, &source_str);
if (!program_) {
PLOGE("Unable to create shader program");
return false;
}
GLint link_status = 0;
glGetProgramiv(program_, GL_LINK_STATUS, &link_status);
if (!link_status) {
// log_length includes the null character.
GLint log_length = 0;
glGetProgramiv(program_, GL_INFO_LOG_LENGTH, &log_length);
std::string info_log;
info_log.resize(log_length - 1);
glGetProgramInfoLog(program_, log_length, &log_length,
const_cast<char*>(info_log.data()));
PLOGE("Unable to link program: %s", info_log.c_str());
error_log_ = std::move(info_log);
return false;
}
return true;
}
} // namespace gl4
} // namespace gpu

View File

@ -11,6 +11,7 @@
#define XENIA_GPU_GL4_GL4_SHADER_H_
#include <xenia/common.h>
#include <xenia/gpu/gl4/gl_context.h>
#include <xenia/gpu/shader.h>
namespace xe {
@ -19,10 +20,20 @@ namespace gl4 {
class GL4Shader : public Shader {
public:
using Shader::Shader;
GL4Shader(ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count);
~GL4Shader() override;
GLuint program() const { return program_; }
bool PrepareVertexShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
bool PreparePixelShader(const xenos::xe_gpu_program_cntl_t& program_cntl,
GL4Shader* vertex_shader);
protected:
bool TranslateImpl() override;
bool CompileProgram(std::string source);
GLuint program_;
};
} // namespace gl4

View File

@ -16,10 +16,14 @@ namespace xe {
namespace gpu {
using namespace xe::gpu::ucode;
using namespace xe::gpu::xenos;
Shader::Shader(ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count)
: shader_type_(shader_type), data_hash_(data_hash), is_valid_(false) {
: shader_type_(shader_type),
data_hash_(data_hash),
has_prepared_(false),
is_valid_(false) {
data_.resize(dword_count);
poly::copy_and_swap(data_.data(), dword_ptr, dword_count);
std::memset(&alloc_counts_, 0, sizeof(alloc_counts_));
@ -35,18 +39,7 @@ Shader::Shader(ShaderType shader_type, uint64_t data_hash,
GatherIO();
}
bool Shader::Translate() {
assert_false(is_valid_);
// TODO(benvanik): disk cache/etc - lookup hash and load if found.
// TODO(benvanik): dump to disk.
// Attempt implementation-specific translation.
// This may take awhile, and probably will fail.
// TODO(benvanik): parallelize? (allow two translations at once, etc).
is_valid_ = TranslateImpl();
return is_valid_;
}
Shader::~Shader() = default;
void Shader::GatherIO() {
// Process all execution blocks.
@ -203,44 +196,43 @@ void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
}
el->vtx_fetch = *vtx;
el->format = vtx->format;
el->format = static_cast<VertexFormat>(vtx->format);
el->is_normalized = vtx->num_format_all == 0;
el->is_signed = vtx->format_comp_all == 1;
el->offset_words = vtx->offset;
el->size_words = 0;
switch (el->format) {
case FMT_8_8_8_8:
case FMT_2_10_10_10:
case FMT_10_11_11:
case FMT_11_11_10:
case VertexFormat::k_8_8_8_8:
case VertexFormat::k_2_10_10_10:
case VertexFormat::k_10_11_11:
case VertexFormat::k_11_11_10:
el->size_words = 1;
break;
case FMT_16_16:
case FMT_16_16_FLOAT:
case VertexFormat::k_16_16:
case VertexFormat::k_16_16_FLOAT:
el->size_words = 1;
break;
case FMT_16_16_16_16:
case FMT_16_16_16_16_FLOAT:
case VertexFormat::k_16_16_16_16:
case VertexFormat::k_16_16_16_16_FLOAT:
el->size_words = 2;
break;
case FMT_32:
case FMT_32_FLOAT:
case VertexFormat::k_32:
case VertexFormat::k_32_FLOAT:
el->size_words = 1;
break;
case FMT_32_32:
case FMT_32_32_FLOAT:
case VertexFormat::k_32_32:
case VertexFormat::k_32_32_FLOAT:
el->size_words = 2;
break;
case FMT_32_32_32_FLOAT:
case VertexFormat::k_32_32_32_FLOAT:
el->size_words = 3;
break;
case FMT_32_32_32_32:
case FMT_32_32_32_32_FLOAT:
case VertexFormat::k_32_32_32_32:
case VertexFormat::k_32_32_32_32_FLOAT:
el->size_words = 4;
break;
default:
XELOGE("Unknown vertex format: %d", el->format);
assert_always();
assert_unhandled_case(el->format);
break;
}
}

View File

@ -20,21 +20,19 @@ namespace gpu {
class Shader {
public:
Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr,
uint32_t dword_count);
virtual ~Shader();
ShaderType type() const { return shader_type_; }
bool has_prepared() const { return has_prepared_; }
bool is_valid() const { return is_valid_; }
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
const std::string& translated_disassembly() const {
return translated_disassembly_;
}
bool Translate();
struct BufferDescElement {
ucode::instr_fetch_vtx_t vtx_fetch;
uint32_t format;
xenos::VertexFormat format;
uint32_t offset_words;
uint32_t size_words;
bool is_signed;
@ -76,7 +74,8 @@ class Shader {
const std::vector<ucode::instr_cf_alloc_t>& allocs() const { return allocs_; }
protected:
virtual bool TranslateImpl() = 0;
Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr,
uint32_t dword_count);
void GatherIO();
void GatherAlloc(const ucode::instr_cf_alloc_t* cf);
@ -87,10 +86,12 @@ class Shader {
ShaderType shader_type_;
uint64_t data_hash_;
std::vector<uint32_t> data_;
bool has_prepared_;
bool is_valid_;
std::string ucode_disassembly_;
std::string translated_disassembly_;
std::string error_log_;
AllocCounts alloc_counts_;
std::vector<ucode::instr_cf_exec_t> execs_;

View File

@ -72,9 +72,9 @@ enum class MsaaSamples : uint32_t {
};
enum class ColorRenderTargetFormat : uint32_t {
k8888 = 0, // D3DFMT_A8R8G8B8 (or ABGR?)
k8888Gamma = 1, // D3DFMT_A8R8G8B8 with gamma correction
// ...
k_8_8_8_8 = 0, // D3DFMT_A8R8G8B8 (or ABGR?)
k_8_8_8_8_GAMMA = 1, // D3DFMT_A8R8G8B8 with gamma correction
// ...
};
enum class DepthRenderTargetFormat : uint32_t {
@ -98,29 +98,47 @@ enum class CopyCommand : uint32_t {
// Subset of a2xx_sq_surfaceformat.
enum class ColorFormat : uint32_t {
kColor_8 = 2,
kColor_1_5_5_5 = 3,
kColor_5_6_5 = 4,
kColor_6_5_5 = 5,
kColor_8_8_8_8 = 6,
kColor_2_10_10_10 = 7,
kColor_8_A = 8,
kColor_8_B = 9,
kColor_8_8 = 10,
kColor_8_8_8_8_A = 14,
kColor_4_4_4_4 = 15,
kColor_10_11_11 = 16,
kColor_11_11_10 = 17,
kColor_16 = 24,
kColor_16_16 = 25,
kColor_16_16_16_16 = 26,
kColor_16_FLOAT = 30,
kColor_16_16_FLOAT = 31,
kColor_16_16_16_16_FLOAT = 32,
kColor_32_FLOAT = 36,
kColor_32_32_FLOAT = 37,
kColor_32_32_32_32_FLOAT = 38,
kColor_2_10_10_10_FLOAT = 62,
k_8 = 2,
k_1_5_5_5 = 3,
k_5_6_5 = 4,
k_6_5_5 = 5,
k_8_8_8_8 = 6,
k_2_10_10_10 = 7,
k_8_A = 8,
k_8_B = 9,
k_8_8 = 10,
k_8_8_8_8_A = 14,
k_4_4_4_4 = 15,
k_10_11_11 = 16,
k_11_11_10 = 17,
k_16 = 24,
k_16_16 = 25,
k_16_16_16_16 = 26,
k_16_FLOAT = 30,
k_16_16_FLOAT = 31,
k_16_16_16_16_FLOAT = 32,
k_32_FLOAT = 36,
k_32_32_FLOAT = 37,
k_32_32_32_32_FLOAT = 38,
k_2_10_10_10_FLOAT = 62,
};
enum class VertexFormat : uint32_t {
k_8_8_8_8 = 6,
k_2_10_10_10 = 7,
k_10_11_11 = 16,
k_11_11_10 = 17,
k_16_16 = 25,
k_16_16_16_16 = 26,
k_16_16_FLOAT = 31,
k_16_16_16_16_FLOAT = 32,
k_32 = 33,
k_32_32 = 34,
k_32_32_32_32 = 35,
k_32_FLOAT = 36,
k_32_32_FLOAT = 37,
k_32_32_32_32_FLOAT = 38,
k_32_32_32_FLOAT = 57,
};
#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \