Experimenting with vertex pipeline.
This commit is contained in:
parent
d2a3cba4f3
commit
14ee211ea9
|
@ -31,6 +31,8 @@ class CircularBuffer {
|
|||
|
||||
bool Initialize();
|
||||
|
||||
GLuint handle() const { return buffer_; }
|
||||
|
||||
Allocation Acquire(size_t length);
|
||||
void Commit(Allocation allocation);
|
||||
|
||||
|
|
|
@ -151,13 +151,17 @@ bool CommandProcessor::SetupGL() {
|
|||
GL_MAP_WRITE_BIT | GL_DYNAMIC_STORAGE_BIT);
|
||||
|
||||
// Circular buffer holding scratch vertex/index data.
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||
if (!scratch_buffer_.Initialize()) {
|
||||
PLOGE("Unable to initialize scratch buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
GLuint vertex_array;
|
||||
glGenVertexArrays(1, &vertex_array);
|
||||
glBindVertexArray(vertex_array);
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -251,8 +255,7 @@ void CommandProcessor::PrepareForWait() {
|
|||
// TODO(benvanik): fences and fancy stuff. We should figure out a way to
|
||||
// make interrupt callbacks from the GPU so that we don't have to do a full
|
||||
// synchronize here.
|
||||
// glFlush();
|
||||
glFinish();
|
||||
glFlush();
|
||||
|
||||
if (FLAGS_thread_safe_gl) {
|
||||
context_->ClearCurrent();
|
||||
|
@ -1162,10 +1165,11 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// if (!PopulateShaders(draw_command)) {
|
||||
// XELOGE("Unable to prepare draw shaders");
|
||||
// return false;
|
||||
//}
|
||||
if (!UpdateShaders(draw_command)) {
|
||||
PLOGE("Unable to prepare draw shaders");
|
||||
return false;
|
||||
}
|
||||
|
||||
// if (!PopulateSamplers(draw_command)) {
|
||||
// XELOGE("Unable to prepare draw samplers");
|
||||
// return false;
|
||||
|
@ -1176,25 +1180,77 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
|||
return false;
|
||||
}
|
||||
if (!PopulateVertexBuffers(draw_command)) {
|
||||
XELOGE("Unable to setup vertex buffers");
|
||||
PLOGE("Unable to setup vertex buffers");
|
||||
return false;
|
||||
}
|
||||
|
||||
GLenum prim_type = 0;
|
||||
switch (cmd.prim_type) {
|
||||
case PrimitiveType::kPointList:
|
||||
prim_type = GL_POINTS;
|
||||
/*if (vs->DemandGeometryShader(
|
||||
D3D11VertexShaderResource::POINT_SPRITE_SHADER, &geometry_shader)) {
|
||||
return 1;
|
||||
}*/
|
||||
break;
|
||||
case PrimitiveType::kLineList:
|
||||
prim_type = GL_LINES;
|
||||
break;
|
||||
case PrimitiveType::kLineStrip:
|
||||
prim_type = GL_LINE_STRIP;
|
||||
break;
|
||||
case PrimitiveType::kLineLoop:
|
||||
prim_type = GL_LINE_LOOP;
|
||||
break;
|
||||
case PrimitiveType::kTriangleList:
|
||||
prim_type = GL_TRIANGLES;
|
||||
break;
|
||||
case PrimitiveType::kTriangleStrip:
|
||||
prim_type = GL_TRIANGLE_STRIP;
|
||||
break;
|
||||
case PrimitiveType::kTriangleFan:
|
||||
prim_type = GL_TRIANGLE_FAN;
|
||||
break;
|
||||
case PrimitiveType::kRectangleList:
|
||||
prim_type = GL_TRIANGLE_STRIP;
|
||||
/*if (vs->DemandGeometryShader(
|
||||
D3D11VertexShaderResource::RECT_LIST_SHADER, &geometry_shader)) {
|
||||
return 1;
|
||||
}*/
|
||||
break;
|
||||
case PrimitiveType::kQuadList:
|
||||
prim_type = GL_LINES_ADJACENCY;
|
||||
/*if
|
||||
(vs->DemandGeometryShader(D3D11VertexShaderResource::QUAD_LIST_SHADER,
|
||||
&geometry_shader)) {
|
||||
return 1;
|
||||
}*/
|
||||
break;
|
||||
default:
|
||||
case PrimitiveType::kUnknown0x07:
|
||||
prim_type = GL_POINTS;
|
||||
XELOGE("D3D11: unsupported primitive type %d", cmd.prim_type);
|
||||
break;
|
||||
}
|
||||
|
||||
// HACK HACK HACK
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
|
||||
if (cmd.index_buffer.address) {
|
||||
// Indexed draw.
|
||||
// PopulateIndexBuffer has our element array setup.
|
||||
//size_t element_size = cmd.index_buffer.format == IndexFormat::kInt32
|
||||
// ? sizeof(uint32_t)
|
||||
// : sizeof(uint16_t);
|
||||
//glDrawElementsBaseVertex(
|
||||
// prim_type, cmd.index_count,
|
||||
// cmd.index_buffer.format == IndexFormat::kInt32 ? GL_UNSIGNED_INT
|
||||
// : GL_UNSIGNED_SHORT,
|
||||
// reinterpret_cast<void*>(cmd.start_index * element_size),
|
||||
// cmd.base_vertex);
|
||||
size_t element_size = cmd.index_buffer.format == IndexFormat::kInt32
|
||||
? sizeof(uint32_t)
|
||||
: sizeof(uint16_t);
|
||||
glDrawElementsBaseVertex(
|
||||
prim_type, cmd.index_count,
|
||||
cmd.index_buffer.format == IndexFormat::kInt32 ? GL_UNSIGNED_INT
|
||||
: GL_UNSIGNED_SHORT,
|
||||
reinterpret_cast<void*>(cmd.start_index * element_size),
|
||||
cmd.base_vertex);
|
||||
} else {
|
||||
// Auto draw.
|
||||
//glDrawArrays(prim_type, cmd.start_index, cmd.index_count);
|
||||
glDrawArrays(prim_type, cmd.start_index, cmd.index_count);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -1215,10 +1271,10 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
|||
};
|
||||
};
|
||||
struct UniformDataBlock {
|
||||
float4 window_offset; // tx,ty,?,?
|
||||
float4 window_scissor; // x0,y0,x1,y1
|
||||
float4 viewport_offset; // tx,ty,tz,?
|
||||
float4 viewport_scale; // sx,sy,sz,?
|
||||
float4 window_offset; // tx,ty,rt_w,rt_h
|
||||
float4 window_scissor; // x0,y0,x1,y1
|
||||
float4 viewport_offset; // tx,ty,tz,?
|
||||
float4 viewport_scale; // sx,sy,sz,?
|
||||
// TODO(benvanik): vertex format xyzw?
|
||||
|
||||
float4 alpha_test; // alpha test enable, func, ref, ?
|
||||
|
@ -1236,11 +1292,10 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
|||
static_assert(sizeof(UniformDataBlock) <= 16 * 1024,
|
||||
"Need <=16k uniform data");
|
||||
|
||||
auto buffer_ptr = reinterpret_cast<UniformDataBlock*>(
|
||||
glMapNamedBufferRange(uniform_data_buffer_, 0, 16 * 1024,
|
||||
GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
|
||||
auto allocation = scratch_buffer_.Acquire(16 * 1024);
|
||||
auto buffer_ptr = reinterpret_cast<UniformDataBlock*>(allocation.host_ptr);
|
||||
if (!buffer_ptr) {
|
||||
PLOGE("Unable to map uniform data buffer");
|
||||
PLOGE("Unable to allocate uniform data buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1257,18 +1312,9 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
|||
buffer_ptr->window_scissor.z = float(window_scissor_br & 0x7FFF);
|
||||
buffer_ptr->window_scissor.w = float((window_scissor_br >> 16) & 0x7FFF);
|
||||
|
||||
// Viewport scaling. Only enabled if the flags are all set.
|
||||
buffer_ptr->viewport_scale.x =
|
||||
regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; // 640
|
||||
buffer_ptr->viewport_offset.x =
|
||||
regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32; // 640
|
||||
buffer_ptr->viewport_scale.y =
|
||||
regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; // -360
|
||||
buffer_ptr->viewport_offset.y =
|
||||
regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; // 360
|
||||
buffer_ptr->viewport_scale.z = regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32; // 1
|
||||
buffer_ptr->viewport_offset.z =
|
||||
regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32; // 0
|
||||
// HACK: no clue where to get these values.
|
||||
buffer_ptr->window_offset.z = 1280;
|
||||
buffer_ptr->window_offset.w = 720;
|
||||
|
||||
// Whether each of the viewport settings is enabled.
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
|
@ -1282,6 +1328,23 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
|||
assert_true(vport_xscale_enable == vport_yscale_enable ==
|
||||
vport_zscale_enable == vport_xoffset_enable ==
|
||||
vport_yoffset_enable == vport_zoffset_enable);
|
||||
|
||||
// Viewport scaling. Only enabled if the flags are all set.
|
||||
buffer_ptr->viewport_scale.x =
|
||||
vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1; // 640
|
||||
buffer_ptr->viewport_offset.x = vport_xoffset_enable
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
|
||||
: 0; // 640
|
||||
buffer_ptr->viewport_scale.y = vport_yscale_enable
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
|
||||
: 1; // -360
|
||||
buffer_ptr->viewport_offset.y = vport_yoffset_enable
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
||||
: 0; // 360
|
||||
buffer_ptr->viewport_scale.z =
|
||||
vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1; // 1
|
||||
buffer_ptr->viewport_offset.z =
|
||||
vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0; // 0
|
||||
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
|
||||
// = false: multiply the X, Y coordinates by 1/W0.
|
||||
bool vtx_xy_fmt = (vte_control >> 8) & 0x1;
|
||||
|
@ -1504,7 +1567,9 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
|||
stencil_op_map[(depth_control & 0x0001C000) >> 14]);
|
||||
}
|
||||
|
||||
glUnmapNamedBuffer(uniform_data_buffer_);
|
||||
// Stash - program setup will bind this to uniforms.
|
||||
draw_command->state_data_gpu_ptr = allocation.gpu_ptr;
|
||||
scratch_buffer_.Commit(std::move(allocation));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -1590,11 +1655,80 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
|
|||
|
||||
// TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST
|
||||
// Pretend we are drawing.
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
glScissor(100, 100, 100, 100);
|
||||
float red[] = {rand() / (float)RAND_MAX, 0, 0, 1.0f};
|
||||
glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0, red);
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
// glEnable(GL_SCISSOR_TEST);
|
||||
// glScissor(100, 100, 100, 100);
|
||||
// float red[] = {rand() / (float)RAND_MAX, 0, 0, 1.0f};
|
||||
// glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0,
|
||||
// red);
|
||||
// glDisable(GL_SCISSOR_TEST);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
auto& regs = *register_file_;
|
||||
auto& cmd = *draw_command;
|
||||
|
||||
xe_gpu_program_cntl_t program_cntl;
|
||||
program_cntl.dword_0 = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
|
||||
if (!active_vertex_shader_->has_prepared()) {
|
||||
if (!active_vertex_shader_->PrepareVertexShader(program_cntl)) {
|
||||
XELOGE("Unable to prepare vertex shader");
|
||||
return false;
|
||||
}
|
||||
} else if (!active_vertex_shader_->is_valid()) {
|
||||
XELOGE("Vertex shader invalid");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!active_pixel_shader_->has_prepared()) {
|
||||
if (!active_pixel_shader_->PreparePixelShader(program_cntl,
|
||||
active_vertex_shader_)) {
|
||||
XELOGE("Unable to prepare pixel shader");
|
||||
return false;
|
||||
}
|
||||
} else if (!active_pixel_shader_->is_valid()) {
|
||||
XELOGE("Pixel shader invalid");
|
||||
return false;
|
||||
}
|
||||
|
||||
GLuint vertex_program = active_vertex_shader_->program();
|
||||
GLuint geometry_program = 0;
|
||||
GLuint fragment_program = active_pixel_shader_->program();
|
||||
|
||||
GLuint pipeline;
|
||||
glCreateProgramPipelines(1, &pipeline);
|
||||
glUseProgramStages(pipeline, GL_VERTEX_SHADER_BIT, vertex_program);
|
||||
glUseProgramStages(pipeline, GL_GEOMETRY_SHADER_BIT, geometry_program);
|
||||
glUseProgramStages(pipeline, GL_FRAGMENT_SHADER_BIT, fragment_program);
|
||||
|
||||
// HACK: layout(location=0) on a bindless uniform crashes nvidia driver.
|
||||
GLint vertex_state_loc = glGetUniformLocation(vertex_program, "state");
|
||||
assert_true(vertex_state_loc == -1 || vertex_state_loc == 0);
|
||||
GLint geometry_state_loc =
|
||||
geometry_program ? glGetUniformLocation(geometry_program, "state") : -1;
|
||||
assert_true(geometry_state_loc == -1 || geometry_state_loc == 0);
|
||||
GLint fragment_state_loc = glGetUniformLocation(fragment_program, "state");
|
||||
assert_true(fragment_state_loc == -1 || fragment_state_loc == 0);
|
||||
|
||||
// TODO(benvanik): do we need to do this for all stages if the locations
|
||||
// match?
|
||||
if (vertex_state_loc != -1) {
|
||||
glProgramUniformHandleui64ARB(vertex_program, vertex_state_loc,
|
||||
cmd.state_data_gpu_ptr);
|
||||
}
|
||||
if (geometry_program && geometry_state_loc != -1) {
|
||||
glProgramUniformHandleui64ARB(geometry_program, geometry_state_loc,
|
||||
cmd.state_data_gpu_ptr);
|
||||
}
|
||||
if (fragment_state_loc != -1) {
|
||||
glProgramUniformHandleui64ARB(fragment_program, fragment_state_loc,
|
||||
cmd.state_data_gpu_ptr);
|
||||
}
|
||||
|
||||
glBindProgramPipeline(pipeline);
|
||||
// glDeleteProgramPipelines(1, &pipeline);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -1641,15 +1775,9 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
|
|||
SCOPE_profile_cpu_f("gpu");
|
||||
auto& regs = *register_file_;
|
||||
auto& cmd = *draw_command;
|
||||
assert_not_null(active_vertex_shader_);
|
||||
|
||||
if (!cmd.vertex_shader) {
|
||||
// No vertex shader, no-op.
|
||||
return true;
|
||||
}
|
||||
|
||||
const auto& buffer_inputs = cmd.vertex_shader->buffer_inputs();
|
||||
|
||||
// glBindVertexArray(vertex_array);
|
||||
const auto& buffer_inputs = active_vertex_shader_->buffer_inputs();
|
||||
|
||||
for (size_t n = 0; n < buffer_inputs.count; n++) {
|
||||
const auto& desc = buffer_inputs.descs[n];
|
||||
|
@ -1685,9 +1813,100 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
|
|||
reinterpret_cast<const uint32_t*>(membase_ + (fetch->address << 2)),
|
||||
fetch->size);
|
||||
|
||||
/*glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV,
|
||||
desc.input_index,
|
||||
allocation.gpu_ptr, allocation.length);*/
|
||||
uint32_t el_index = 0;
|
||||
for (uint32_t i = 0; i < desc.element_count; ++i) {
|
||||
const auto& el = desc.elements[i];
|
||||
GLuint comp_count;
|
||||
GLuint comp_size;
|
||||
GLenum comp_type;
|
||||
switch (el.format) {
|
||||
case VertexFormat::k_8_8_8_8:
|
||||
comp_count = 4;
|
||||
comp_size = 1;
|
||||
comp_type = el.is_signed ? GL_BYTE : GL_UNSIGNED_BYTE;
|
||||
break;
|
||||
case VertexFormat::k_2_10_10_10:
|
||||
comp_count = 4;
|
||||
comp_size = 4;
|
||||
comp_type = el.is_signed ? GL_INT_2_10_10_10_REV
|
||||
: GL_UNSIGNED_INT_2_10_10_10_REV;
|
||||
break;
|
||||
case VertexFormat::k_10_11_11:
|
||||
comp_count = 3;
|
||||
comp_size = 4;
|
||||
assert_false(el.is_signed);
|
||||
comp_type = GL_UNSIGNED_INT_10F_11F_11F_REV;
|
||||
break;
|
||||
/*case VertexFormat::k_11_11_10:
|
||||
break;*/
|
||||
case VertexFormat::k_16_16:
|
||||
comp_count = 2;
|
||||
comp_size = 2;
|
||||
comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
|
||||
break;
|
||||
case VertexFormat::k_16_16_FLOAT:
|
||||
comp_count = 2;
|
||||
comp_size = 2;
|
||||
comp_type = GL_HALF_FLOAT;
|
||||
break;
|
||||
case VertexFormat::k_16_16_16_16:
|
||||
comp_count = 4;
|
||||
comp_size = 2;
|
||||
comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
|
||||
break;
|
||||
case VertexFormat::k_16_16_16_16_FLOAT:
|
||||
comp_count = 4;
|
||||
comp_size = 2;
|
||||
comp_type = GL_HALF_FLOAT;
|
||||
break;
|
||||
case VertexFormat::k_32:
|
||||
comp_count = 1;
|
||||
comp_size = 4;
|
||||
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||
break;
|
||||
case VertexFormat::k_32_32:
|
||||
comp_count = 2;
|
||||
comp_size = 4;
|
||||
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||
break;
|
||||
case VertexFormat::k_32_32_32_32:
|
||||
comp_count = 4;
|
||||
comp_size = 4;
|
||||
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||
break;
|
||||
case VertexFormat::k_32_FLOAT:
|
||||
comp_count = 1;
|
||||
comp_size = 4;
|
||||
comp_type = GL_FLOAT;
|
||||
break;
|
||||
case VertexFormat::k_32_32_FLOAT:
|
||||
comp_count = 2;
|
||||
comp_size = 4;
|
||||
comp_type = GL_FLOAT;
|
||||
break;
|
||||
case VertexFormat::k_32_32_32_FLOAT:
|
||||
comp_count = 3;
|
||||
comp_size = 4;
|
||||
comp_type = GL_FLOAT;
|
||||
break;
|
||||
case VertexFormat::k_32_32_32_32_FLOAT:
|
||||
comp_count = 4;
|
||||
comp_size = 4;
|
||||
comp_type = GL_FLOAT;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(el.format);
|
||||
break;
|
||||
}
|
||||
size_t offset = el.offset_words * sizeof(uint32_t);
|
||||
glEnableVertexAttribArray(el_index);
|
||||
glVertexAttribFormatNV(el_index, comp_count, comp_type, el.is_normalized,
|
||||
desc.stride_words * sizeof(uint32_t));
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, el_index,
|
||||
allocation.gpu_ptr + offset,
|
||||
allocation.length - offset);
|
||||
++el_index;
|
||||
}
|
||||
|
||||
// Flush buffer before we draw.
|
||||
scratch_buffer_.Commit(std::move(allocation));
|
||||
|
@ -1782,7 +2001,7 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
|
|||
GLenum read_format;
|
||||
GLenum read_type;
|
||||
switch (copy_dest_format) {
|
||||
case ColorFormat::kColor_8_8_8_8:
|
||||
case ColorFormat::k_8_8_8_8:
|
||||
read_format = copy_dest_swap ? GL_BGRA : GL_RGBA;
|
||||
read_type = GL_UNSIGNED_BYTE;
|
||||
break;
|
||||
|
@ -1832,10 +2051,10 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
|
|||
// glBindBuffer(GL_READ_FRAMEBUFFER, framebuffer)
|
||||
glNamedFramebufferReadBuffer(source_framebuffer->framebuffer,
|
||||
GL_COLOR_ATTACHMENT0 + copy_src_select);
|
||||
glReadPixels(x, y, w, h, read_format, read_type, ptr);
|
||||
//glReadPixels(x, y, w, h, read_format, read_type, ptr);
|
||||
} else {
|
||||
// Source from the bound depth/stencil target.
|
||||
glReadPixels(x, y, w, h, GL_DEPTH_STENCIL, read_type, ptr);
|
||||
//glReadPixels(x, y, w, h, GL_DEPTH_STENCIL, read_type, ptr);
|
||||
}
|
||||
break;
|
||||
case CopyCommand::kRaw:
|
||||
|
@ -1876,7 +2095,7 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
|
|||
glClearNamedFramebufferfi(source_framebuffer->framebuffer, GL_DEPTH_STENCIL,
|
||||
depth.float_value, stencil);
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1890,8 +2109,8 @@ GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch,
|
|||
uint32_t height = 2560;
|
||||
|
||||
// NOTE: we strip gamma formats down to normal ones.
|
||||
if (format == ColorRenderTargetFormat::k8888Gamma) {
|
||||
format = ColorRenderTargetFormat::k8888;
|
||||
if (format == ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
|
||||
format = ColorRenderTargetFormat::k_8_8_8_8;
|
||||
}
|
||||
|
||||
for (auto& it = cached_color_render_targets_.begin();
|
||||
|
@ -1910,8 +2129,8 @@ GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch,
|
|||
|
||||
GLenum internal_format;
|
||||
switch (format) {
|
||||
case ColorRenderTargetFormat::k8888:
|
||||
case ColorRenderTargetFormat::k8888Gamma:
|
||||
case ColorRenderTargetFormat::k_8_8_8_8:
|
||||
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
|
||||
internal_format = GL_RGBA8;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -47,9 +47,6 @@ struct DrawCommand {
|
|||
uint32_t index_count;
|
||||
uint32_t base_vertex;
|
||||
|
||||
GL4Shader* vertex_shader;
|
||||
GL4Shader* pixel_shader;
|
||||
|
||||
// Index buffer, if present.
|
||||
// If index_count > 0 but buffer is nullptr then auto draw.
|
||||
struct {
|
||||
|
@ -69,6 +66,8 @@ struct DrawCommand {
|
|||
size_t vertex_shader_sampler_count;
|
||||
SamplerInput pixel_shader_samplers[32];
|
||||
size_t pixel_shader_sampler_count;
|
||||
|
||||
GLuint64 state_data_gpu_ptr;
|
||||
};
|
||||
|
||||
class CommandProcessor {
|
||||
|
@ -188,6 +187,7 @@ class CommandProcessor {
|
|||
bool IssueDraw(DrawCommand* draw_command);
|
||||
bool UpdateState(DrawCommand* draw_command);
|
||||
bool UpdateRenderTargets(DrawCommand* draw_command);
|
||||
bool UpdateShaders(DrawCommand* draw_command);
|
||||
bool PopulateIndexBuffer(DrawCommand* draw_command);
|
||||
bool PopulateVertexBuffers(DrawCommand* draw_command);
|
||||
bool IssueCopy(DrawCommand* draw_command);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include <poly/threading.h>
|
||||
#include <xenia/cpu/processor.h>
|
||||
#include <xenia/gpu/gl4/gl4_gpu-private.h>
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
|
||||
namespace xe {
|
||||
|
@ -42,11 +43,19 @@ X_STATUS GL4GraphicsSystem::Setup() {
|
|||
control_ = std::make_unique<WGLControl>(loop);
|
||||
emulator_->main_window()->AddChild(control_.get());
|
||||
|
||||
if (FLAGS_thread_safe_gl) {
|
||||
control_->context()->MakeCurrent();
|
||||
}
|
||||
|
||||
// Setup the GL context the command processor will do all its drawing in.
|
||||
// It's shared with the control context so that we can resolve framebuffers
|
||||
// from it.
|
||||
processor_context = control_->context()->CreateShared();
|
||||
|
||||
if (FLAGS_thread_safe_gl) {
|
||||
control_->context()->ClearCurrent();
|
||||
}
|
||||
|
||||
control_ready_fence.Signal();
|
||||
});
|
||||
control_ready_fence.Wait();
|
||||
|
|
|
@ -15,7 +15,163 @@ namespace xe {
|
|||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
||||
bool GL4Shader::TranslateImpl() { return true; }
|
||||
extern "C" GLEWContext* glewGetContext();
|
||||
|
||||
GL4Shader::GL4Shader(ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count)
|
||||
: Shader(shader_type, data_hash, dword_ptr, dword_count), program_(0) {}
|
||||
|
||||
GL4Shader::~GL4Shader() { glDeleteProgram(program_); }
|
||||
|
||||
const std::string header =
|
||||
"#version 450\n"
|
||||
"#extension all : warn\n"
|
||||
"#extension GL_ARB_bindless_texture : require\n"
|
||||
"#extension GL_ARB_explicit_uniform_location : require\n"
|
||||
"#extension GL_ARB_shading_language_420pack : require\n"
|
||||
"#extension GL_ARB_shader_storage_buffer_object : require\n"
|
||||
"#extension GL_NV_shader_buffer_load : require\n"
|
||||
"precision highp float;\n"
|
||||
"precision highp int;\n"
|
||||
"layout(std140, column_major) uniform;\n"
|
||||
"layout(std430, column_major) buffer;\n"
|
||||
"struct StateData {\n"
|
||||
" vec4 window_offset;\n"
|
||||
" vec4 window_scissor;\n"
|
||||
" vec4 viewport_offset;\n"
|
||||
" vec4 viewport_scale;\n"
|
||||
" vec4 alpha_test;\n"
|
||||
" vec4 float_consts[512];\n"
|
||||
" uint fetch_consts[32 * 6];\n"
|
||||
" int bool_consts[8];\n"
|
||||
" int loop_consts[32];\n"
|
||||
"};\n"
|
||||
"struct VertexData {\n"
|
||||
" vec4 o[16];\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"uniform StateData* state;\n";
|
||||
|
||||
bool GL4Shader::PrepareVertexShader(
|
||||
const xenos::xe_gpu_program_cntl_t& program_cntl) {
|
||||
if (has_prepared_) {
|
||||
return is_valid_;
|
||||
}
|
||||
has_prepared_ = true;
|
||||
|
||||
std::string apply_viewport =
|
||||
"vec4 applyViewport(vec4 pos) {\n"
|
||||
// TODO(benvanik): piecewise viewport_enable -> offset/scale logic.
|
||||
" if (false) {\n"
|
||||
" } else {\n"
|
||||
/*" pos.xy = pos.xy / vec2(state->window_offset.z / 2.0, "
|
||||
"-state->window_offset.w / 2.0) + vec2(-1.0, 1.0);\n"
|
||||
" pos.zw = vec2(0.0, 1.0);\n"*/
|
||||
" pos.xy = pos.xy / vec2(1280.0 / 2.0, "
|
||||
"-720.0 / 2.0) + vec2(-1.0, 1.0);\n"
|
||||
" //pos.zw = vec2(0.0, 1.0);\n"
|
||||
" }\n"
|
||||
" pos.x = pos.x * state->viewport_scale.x + \n"
|
||||
" state->viewport_offset.x;\n"
|
||||
" pos.y = pos.y * state->viewport_scale.y + \n"
|
||||
" state->viewport_offset.y;\n"
|
||||
" pos.z = pos.z * state->viewport_scale.z + \n"
|
||||
" state->viewport_offset.z;\n"
|
||||
" pos.xy += state->window_offset.xy;\n"
|
||||
" return pos;\n"
|
||||
"}\n";
|
||||
std::string source =
|
||||
header + apply_viewport +
|
||||
"out gl_PerVertex {\n"
|
||||
" vec4 gl_Position;\n"
|
||||
" float gl_PointSize;\n"
|
||||
" float gl_ClipDistance[];\n"
|
||||
"};\n"
|
||||
"layout(location = 0) in vec3 iF0;\n"
|
||||
"layout(location = 1) in vec4 iF1;\n"
|
||||
"layout(location = 0) out VertexData vtx;\n"
|
||||
"void main() {\n"
|
||||
//" vec4 oPos = vec4(iF0.xy, 0.0, 1.0);\n"
|
||||
" vec4 oPos = iF0.xxxx * state->float_consts[0];\n"
|
||||
" oPos = (iF0.yyyy * state->float_consts[1]) + oPos;\n"
|
||||
" oPos = (iF0.zzzz * state->float_consts[2]) + oPos;\n"
|
||||
" oPos = (vec4(1.0, 1.0, 1.0, 1.0) * state->float_consts[3]) + oPos;\n"
|
||||
//" gl_PointSize = 1.0;\n"
|
||||
" for (int i = 0; i < vtx.o.length(); ++i) {\n"
|
||||
" vtx.o[0] = vec4(0.0, 0.0, 0.0, 0.0);\n"
|
||||
" }\n"
|
||||
" vtx.o[0] = iF1;\n"
|
||||
" gl_Position = applyViewport(oPos);\n"
|
||||
//" gl_Position = oPos;\n"
|
||||
"}\n";
|
||||
|
||||
if (!CompileProgram(source)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
is_valid_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GL4Shader::PreparePixelShader(
|
||||
const xenos::xe_gpu_program_cntl_t& program_cntl,
|
||||
GL4Shader* vertex_shader) {
|
||||
if (has_prepared_) {
|
||||
return is_valid_;
|
||||
}
|
||||
has_prepared_ = true;
|
||||
|
||||
std::string source = header +
|
||||
"layout(location = 0) in VertexData vtx;\n"
|
||||
"layout(location = 0) out vec4 oC[4];\n"
|
||||
"void main() {\n"
|
||||
" for (int i = 0; i < oC.length(); ++i) {\n"
|
||||
" oC[i] = vec4(1.0, 0.0, 0.0, 1.0);\n"
|
||||
" }\n"
|
||||
" oC[0] = vtx.o[0];\n"
|
||||
//" gl_FragDepth = 0.0;\n"
|
||||
"}\n";
|
||||
|
||||
if (!CompileProgram(source)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
is_valid_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GL4Shader::CompileProgram(std::string source) {
|
||||
assert_zero(program_);
|
||||
|
||||
translated_disassembly_ = std::move(source);
|
||||
const char* source_str = translated_disassembly_.c_str();
|
||||
|
||||
program_ = glCreateShaderProgramv(shader_type_ == ShaderType::kVertex
|
||||
? GL_VERTEX_SHADER
|
||||
: GL_FRAGMENT_SHADER,
|
||||
1, &source_str);
|
||||
if (!program_) {
|
||||
PLOGE("Unable to create shader program");
|
||||
return false;
|
||||
}
|
||||
|
||||
GLint link_status = 0;
|
||||
glGetProgramiv(program_, GL_LINK_STATUS, &link_status);
|
||||
if (!link_status) {
|
||||
// log_length includes the null character.
|
||||
GLint log_length = 0;
|
||||
glGetProgramiv(program_, GL_INFO_LOG_LENGTH, &log_length);
|
||||
std::string info_log;
|
||||
info_log.resize(log_length - 1);
|
||||
glGetProgramInfoLog(program_, log_length, &log_length,
|
||||
const_cast<char*>(info_log.data()));
|
||||
PLOGE("Unable to link program: %s", info_log.c_str());
|
||||
error_log_ = std::move(info_log);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace gl4
|
||||
} // namespace gpu
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#define XENIA_GPU_GL4_GL4_SHADER_H_
|
||||
|
||||
#include <xenia/common.h>
|
||||
#include <xenia/gpu/gl4/gl_context.h>
|
||||
#include <xenia/gpu/shader.h>
|
||||
|
||||
namespace xe {
|
||||
|
@ -19,10 +20,20 @@ namespace gl4 {
|
|||
|
||||
class GL4Shader : public Shader {
|
||||
public:
|
||||
using Shader::Shader;
|
||||
GL4Shader(ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count);
|
||||
~GL4Shader() override;
|
||||
|
||||
GLuint program() const { return program_; }
|
||||
|
||||
bool PrepareVertexShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
|
||||
bool PreparePixelShader(const xenos::xe_gpu_program_cntl_t& program_cntl,
|
||||
GL4Shader* vertex_shader);
|
||||
|
||||
protected:
|
||||
bool TranslateImpl() override;
|
||||
bool CompileProgram(std::string source);
|
||||
|
||||
GLuint program_;
|
||||
};
|
||||
|
||||
} // namespace gl4
|
||||
|
|
|
@ -16,10 +16,14 @@ namespace xe {
|
|||
namespace gpu {
|
||||
|
||||
using namespace xe::gpu::ucode;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
Shader::Shader(ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count)
|
||||
: shader_type_(shader_type), data_hash_(data_hash), is_valid_(false) {
|
||||
: shader_type_(shader_type),
|
||||
data_hash_(data_hash),
|
||||
has_prepared_(false),
|
||||
is_valid_(false) {
|
||||
data_.resize(dword_count);
|
||||
poly::copy_and_swap(data_.data(), dword_ptr, dword_count);
|
||||
std::memset(&alloc_counts_, 0, sizeof(alloc_counts_));
|
||||
|
@ -35,18 +39,7 @@ Shader::Shader(ShaderType shader_type, uint64_t data_hash,
|
|||
GatherIO();
|
||||
}
|
||||
|
||||
bool Shader::Translate() {
|
||||
assert_false(is_valid_);
|
||||
|
||||
// TODO(benvanik): disk cache/etc - lookup hash and load if found.
|
||||
// TODO(benvanik): dump to disk.
|
||||
|
||||
// Attempt implementation-specific translation.
|
||||
// This may take awhile, and probably will fail.
|
||||
// TODO(benvanik): parallelize? (allow two translations at once, etc).
|
||||
is_valid_ = TranslateImpl();
|
||||
return is_valid_;
|
||||
}
|
||||
Shader::~Shader() = default;
|
||||
|
||||
void Shader::GatherIO() {
|
||||
// Process all execution blocks.
|
||||
|
@ -203,44 +196,43 @@ void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
|
|||
}
|
||||
|
||||
el->vtx_fetch = *vtx;
|
||||
el->format = vtx->format;
|
||||
el->format = static_cast<VertexFormat>(vtx->format);
|
||||
el->is_normalized = vtx->num_format_all == 0;
|
||||
el->is_signed = vtx->format_comp_all == 1;
|
||||
el->offset_words = vtx->offset;
|
||||
el->size_words = 0;
|
||||
switch (el->format) {
|
||||
case FMT_8_8_8_8:
|
||||
case FMT_2_10_10_10:
|
||||
case FMT_10_11_11:
|
||||
case FMT_11_11_10:
|
||||
case VertexFormat::k_8_8_8_8:
|
||||
case VertexFormat::k_2_10_10_10:
|
||||
case VertexFormat::k_10_11_11:
|
||||
case VertexFormat::k_11_11_10:
|
||||
el->size_words = 1;
|
||||
break;
|
||||
case FMT_16_16:
|
||||
case FMT_16_16_FLOAT:
|
||||
case VertexFormat::k_16_16:
|
||||
case VertexFormat::k_16_16_FLOAT:
|
||||
el->size_words = 1;
|
||||
break;
|
||||
case FMT_16_16_16_16:
|
||||
case FMT_16_16_16_16_FLOAT:
|
||||
case VertexFormat::k_16_16_16_16:
|
||||
case VertexFormat::k_16_16_16_16_FLOAT:
|
||||
el->size_words = 2;
|
||||
break;
|
||||
case FMT_32:
|
||||
case FMT_32_FLOAT:
|
||||
case VertexFormat::k_32:
|
||||
case VertexFormat::k_32_FLOAT:
|
||||
el->size_words = 1;
|
||||
break;
|
||||
case FMT_32_32:
|
||||
case FMT_32_32_FLOAT:
|
||||
case VertexFormat::k_32_32:
|
||||
case VertexFormat::k_32_32_FLOAT:
|
||||
el->size_words = 2;
|
||||
break;
|
||||
case FMT_32_32_32_FLOAT:
|
||||
case VertexFormat::k_32_32_32_FLOAT:
|
||||
el->size_words = 3;
|
||||
break;
|
||||
case FMT_32_32_32_32:
|
||||
case FMT_32_32_32_32_FLOAT:
|
||||
case VertexFormat::k_32_32_32_32:
|
||||
case VertexFormat::k_32_32_32_32_FLOAT:
|
||||
el->size_words = 4;
|
||||
break;
|
||||
default:
|
||||
XELOGE("Unknown vertex format: %d", el->format);
|
||||
assert_always();
|
||||
assert_unhandled_case(el->format);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,21 +20,19 @@ namespace gpu {
|
|||
|
||||
class Shader {
|
||||
public:
|
||||
Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr,
|
||||
uint32_t dword_count);
|
||||
virtual ~Shader();
|
||||
|
||||
ShaderType type() const { return shader_type_; }
|
||||
bool has_prepared() const { return has_prepared_; }
|
||||
bool is_valid() const { return is_valid_; }
|
||||
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
|
||||
const std::string& translated_disassembly() const {
|
||||
return translated_disassembly_;
|
||||
}
|
||||
|
||||
bool Translate();
|
||||
|
||||
struct BufferDescElement {
|
||||
ucode::instr_fetch_vtx_t vtx_fetch;
|
||||
uint32_t format;
|
||||
xenos::VertexFormat format;
|
||||
uint32_t offset_words;
|
||||
uint32_t size_words;
|
||||
bool is_signed;
|
||||
|
@ -76,7 +74,8 @@ class Shader {
|
|||
const std::vector<ucode::instr_cf_alloc_t>& allocs() const { return allocs_; }
|
||||
|
||||
protected:
|
||||
virtual bool TranslateImpl() = 0;
|
||||
Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr,
|
||||
uint32_t dword_count);
|
||||
|
||||
void GatherIO();
|
||||
void GatherAlloc(const ucode::instr_cf_alloc_t* cf);
|
||||
|
@ -87,10 +86,12 @@ class Shader {
|
|||
ShaderType shader_type_;
|
||||
uint64_t data_hash_;
|
||||
std::vector<uint32_t> data_;
|
||||
bool has_prepared_;
|
||||
bool is_valid_;
|
||||
|
||||
std::string ucode_disassembly_;
|
||||
std::string translated_disassembly_;
|
||||
std::string error_log_;
|
||||
|
||||
AllocCounts alloc_counts_;
|
||||
std::vector<ucode::instr_cf_exec_t> execs_;
|
||||
|
|
|
@ -72,9 +72,9 @@ enum class MsaaSamples : uint32_t {
|
|||
};
|
||||
|
||||
enum class ColorRenderTargetFormat : uint32_t {
|
||||
k8888 = 0, // D3DFMT_A8R8G8B8 (or ABGR?)
|
||||
k8888Gamma = 1, // D3DFMT_A8R8G8B8 with gamma correction
|
||||
// ...
|
||||
k_8_8_8_8 = 0, // D3DFMT_A8R8G8B8 (or ABGR?)
|
||||
k_8_8_8_8_GAMMA = 1, // D3DFMT_A8R8G8B8 with gamma correction
|
||||
// ...
|
||||
};
|
||||
|
||||
enum class DepthRenderTargetFormat : uint32_t {
|
||||
|
@ -98,29 +98,47 @@ enum class CopyCommand : uint32_t {
|
|||
|
||||
// Subset of a2xx_sq_surfaceformat.
|
||||
enum class ColorFormat : uint32_t {
|
||||
kColor_8 = 2,
|
||||
kColor_1_5_5_5 = 3,
|
||||
kColor_5_6_5 = 4,
|
||||
kColor_6_5_5 = 5,
|
||||
kColor_8_8_8_8 = 6,
|
||||
kColor_2_10_10_10 = 7,
|
||||
kColor_8_A = 8,
|
||||
kColor_8_B = 9,
|
||||
kColor_8_8 = 10,
|
||||
kColor_8_8_8_8_A = 14,
|
||||
kColor_4_4_4_4 = 15,
|
||||
kColor_10_11_11 = 16,
|
||||
kColor_11_11_10 = 17,
|
||||
kColor_16 = 24,
|
||||
kColor_16_16 = 25,
|
||||
kColor_16_16_16_16 = 26,
|
||||
kColor_16_FLOAT = 30,
|
||||
kColor_16_16_FLOAT = 31,
|
||||
kColor_16_16_16_16_FLOAT = 32,
|
||||
kColor_32_FLOAT = 36,
|
||||
kColor_32_32_FLOAT = 37,
|
||||
kColor_32_32_32_32_FLOAT = 38,
|
||||
kColor_2_10_10_10_FLOAT = 62,
|
||||
k_8 = 2,
|
||||
k_1_5_5_5 = 3,
|
||||
k_5_6_5 = 4,
|
||||
k_6_5_5 = 5,
|
||||
k_8_8_8_8 = 6,
|
||||
k_2_10_10_10 = 7,
|
||||
k_8_A = 8,
|
||||
k_8_B = 9,
|
||||
k_8_8 = 10,
|
||||
k_8_8_8_8_A = 14,
|
||||
k_4_4_4_4 = 15,
|
||||
k_10_11_11 = 16,
|
||||
k_11_11_10 = 17,
|
||||
k_16 = 24,
|
||||
k_16_16 = 25,
|
||||
k_16_16_16_16 = 26,
|
||||
k_16_FLOAT = 30,
|
||||
k_16_16_FLOAT = 31,
|
||||
k_16_16_16_16_FLOAT = 32,
|
||||
k_32_FLOAT = 36,
|
||||
k_32_32_FLOAT = 37,
|
||||
k_32_32_32_32_FLOAT = 38,
|
||||
k_2_10_10_10_FLOAT = 62,
|
||||
};
|
||||
|
||||
enum class VertexFormat : uint32_t {
|
||||
k_8_8_8_8 = 6,
|
||||
k_2_10_10_10 = 7,
|
||||
k_10_11_11 = 16,
|
||||
k_11_11_10 = 17,
|
||||
k_16_16 = 25,
|
||||
k_16_16_16_16 = 26,
|
||||
k_16_16_FLOAT = 31,
|
||||
k_16_16_16_16_FLOAT = 32,
|
||||
k_32 = 33,
|
||||
k_32_32 = 34,
|
||||
k_32_32_32_32 = 35,
|
||||
k_32_FLOAT = 36,
|
||||
k_32_32_FLOAT = 37,
|
||||
k_32_32_32_32_FLOAT = 38,
|
||||
k_32_32_32_FLOAT = 57,
|
||||
};
|
||||
|
||||
#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \
|
||||
|
|
Loading…
Reference in New Issue