Experimenting with vertex pipeline.
This commit is contained in:
parent
d2a3cba4f3
commit
14ee211ea9
|
@ -31,6 +31,8 @@ class CircularBuffer {
|
||||||
|
|
||||||
bool Initialize();
|
bool Initialize();
|
||||||
|
|
||||||
|
GLuint handle() const { return buffer_; }
|
||||||
|
|
||||||
Allocation Acquire(size_t length);
|
Allocation Acquire(size_t length);
|
||||||
void Commit(Allocation allocation);
|
void Commit(Allocation allocation);
|
||||||
|
|
||||||
|
|
|
@ -151,13 +151,17 @@ bool CommandProcessor::SetupGL() {
|
||||||
GL_MAP_WRITE_BIT | GL_DYNAMIC_STORAGE_BIT);
|
GL_MAP_WRITE_BIT | GL_DYNAMIC_STORAGE_BIT);
|
||||||
|
|
||||||
// Circular buffer holding scratch vertex/index data.
|
// Circular buffer holding scratch vertex/index data.
|
||||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
|
||||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
|
||||||
if (!scratch_buffer_.Initialize()) {
|
if (!scratch_buffer_.Initialize()) {
|
||||||
PLOGE("Unable to initialize scratch buffer");
|
PLOGE("Unable to initialize scratch buffer");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLuint vertex_array;
|
||||||
|
glGenVertexArrays(1, &vertex_array);
|
||||||
|
glBindVertexArray(vertex_array);
|
||||||
|
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||||
|
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,8 +255,7 @@ void CommandProcessor::PrepareForWait() {
|
||||||
// TODO(benvanik): fences and fancy stuff. We should figure out a way to
|
// TODO(benvanik): fences and fancy stuff. We should figure out a way to
|
||||||
// make interrupt callbacks from the GPU so that we don't have to do a full
|
// make interrupt callbacks from the GPU so that we don't have to do a full
|
||||||
// synchronize here.
|
// synchronize here.
|
||||||
// glFlush();
|
glFlush();
|
||||||
glFinish();
|
|
||||||
|
|
||||||
if (FLAGS_thread_safe_gl) {
|
if (FLAGS_thread_safe_gl) {
|
||||||
context_->ClearCurrent();
|
context_->ClearCurrent();
|
||||||
|
@ -1162,10 +1165,11 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (!PopulateShaders(draw_command)) {
|
if (!UpdateShaders(draw_command)) {
|
||||||
// XELOGE("Unable to prepare draw shaders");
|
PLOGE("Unable to prepare draw shaders");
|
||||||
// return false;
|
return false;
|
||||||
//}
|
}
|
||||||
|
|
||||||
// if (!PopulateSamplers(draw_command)) {
|
// if (!PopulateSamplers(draw_command)) {
|
||||||
// XELOGE("Unable to prepare draw samplers");
|
// XELOGE("Unable to prepare draw samplers");
|
||||||
// return false;
|
// return false;
|
||||||
|
@ -1176,25 +1180,77 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!PopulateVertexBuffers(draw_command)) {
|
if (!PopulateVertexBuffers(draw_command)) {
|
||||||
XELOGE("Unable to setup vertex buffers");
|
PLOGE("Unable to setup vertex buffers");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLenum prim_type = 0;
|
||||||
|
switch (cmd.prim_type) {
|
||||||
|
case PrimitiveType::kPointList:
|
||||||
|
prim_type = GL_POINTS;
|
||||||
|
/*if (vs->DemandGeometryShader(
|
||||||
|
D3D11VertexShaderResource::POINT_SPRITE_SHADER, &geometry_shader)) {
|
||||||
|
return 1;
|
||||||
|
}*/
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kLineList:
|
||||||
|
prim_type = GL_LINES;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kLineStrip:
|
||||||
|
prim_type = GL_LINE_STRIP;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kLineLoop:
|
||||||
|
prim_type = GL_LINE_LOOP;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kTriangleList:
|
||||||
|
prim_type = GL_TRIANGLES;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kTriangleStrip:
|
||||||
|
prim_type = GL_TRIANGLE_STRIP;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kTriangleFan:
|
||||||
|
prim_type = GL_TRIANGLE_FAN;
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kRectangleList:
|
||||||
|
prim_type = GL_TRIANGLE_STRIP;
|
||||||
|
/*if (vs->DemandGeometryShader(
|
||||||
|
D3D11VertexShaderResource::RECT_LIST_SHADER, &geometry_shader)) {
|
||||||
|
return 1;
|
||||||
|
}*/
|
||||||
|
break;
|
||||||
|
case PrimitiveType::kQuadList:
|
||||||
|
prim_type = GL_LINES_ADJACENCY;
|
||||||
|
/*if
|
||||||
|
(vs->DemandGeometryShader(D3D11VertexShaderResource::QUAD_LIST_SHADER,
|
||||||
|
&geometry_shader)) {
|
||||||
|
return 1;
|
||||||
|
}*/
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
case PrimitiveType::kUnknown0x07:
|
||||||
|
prim_type = GL_POINTS;
|
||||||
|
XELOGE("D3D11: unsupported primitive type %d", cmd.prim_type);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// HACK HACK HACK
|
||||||
|
glDisable(GL_DEPTH_TEST);
|
||||||
|
|
||||||
if (cmd.index_buffer.address) {
|
if (cmd.index_buffer.address) {
|
||||||
// Indexed draw.
|
// Indexed draw.
|
||||||
// PopulateIndexBuffer has our element array setup.
|
// PopulateIndexBuffer has our element array setup.
|
||||||
//size_t element_size = cmd.index_buffer.format == IndexFormat::kInt32
|
size_t element_size = cmd.index_buffer.format == IndexFormat::kInt32
|
||||||
// ? sizeof(uint32_t)
|
? sizeof(uint32_t)
|
||||||
// : sizeof(uint16_t);
|
: sizeof(uint16_t);
|
||||||
//glDrawElementsBaseVertex(
|
glDrawElementsBaseVertex(
|
||||||
// prim_type, cmd.index_count,
|
prim_type, cmd.index_count,
|
||||||
// cmd.index_buffer.format == IndexFormat::kInt32 ? GL_UNSIGNED_INT
|
cmd.index_buffer.format == IndexFormat::kInt32 ? GL_UNSIGNED_INT
|
||||||
// : GL_UNSIGNED_SHORT,
|
: GL_UNSIGNED_SHORT,
|
||||||
// reinterpret_cast<void*>(cmd.start_index * element_size),
|
reinterpret_cast<void*>(cmd.start_index * element_size),
|
||||||
// cmd.base_vertex);
|
cmd.base_vertex);
|
||||||
} else {
|
} else {
|
||||||
// Auto draw.
|
// Auto draw.
|
||||||
//glDrawArrays(prim_type, cmd.start_index, cmd.index_count);
|
glDrawArrays(prim_type, cmd.start_index, cmd.index_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -1215,7 +1271,7 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
struct UniformDataBlock {
|
struct UniformDataBlock {
|
||||||
float4 window_offset; // tx,ty,?,?
|
float4 window_offset; // tx,ty,rt_w,rt_h
|
||||||
float4 window_scissor; // x0,y0,x1,y1
|
float4 window_scissor; // x0,y0,x1,y1
|
||||||
float4 viewport_offset; // tx,ty,tz,?
|
float4 viewport_offset; // tx,ty,tz,?
|
||||||
float4 viewport_scale; // sx,sy,sz,?
|
float4 viewport_scale; // sx,sy,sz,?
|
||||||
|
@ -1236,11 +1292,10 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
||||||
static_assert(sizeof(UniformDataBlock) <= 16 * 1024,
|
static_assert(sizeof(UniformDataBlock) <= 16 * 1024,
|
||||||
"Need <=16k uniform data");
|
"Need <=16k uniform data");
|
||||||
|
|
||||||
auto buffer_ptr = reinterpret_cast<UniformDataBlock*>(
|
auto allocation = scratch_buffer_.Acquire(16 * 1024);
|
||||||
glMapNamedBufferRange(uniform_data_buffer_, 0, 16 * 1024,
|
auto buffer_ptr = reinterpret_cast<UniformDataBlock*>(allocation.host_ptr);
|
||||||
GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
|
|
||||||
if (!buffer_ptr) {
|
if (!buffer_ptr) {
|
||||||
PLOGE("Unable to map uniform data buffer");
|
PLOGE("Unable to allocate uniform data buffer");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1257,18 +1312,9 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
||||||
buffer_ptr->window_scissor.z = float(window_scissor_br & 0x7FFF);
|
buffer_ptr->window_scissor.z = float(window_scissor_br & 0x7FFF);
|
||||||
buffer_ptr->window_scissor.w = float((window_scissor_br >> 16) & 0x7FFF);
|
buffer_ptr->window_scissor.w = float((window_scissor_br >> 16) & 0x7FFF);
|
||||||
|
|
||||||
// Viewport scaling. Only enabled if the flags are all set.
|
// HACK: no clue where to get these values.
|
||||||
buffer_ptr->viewport_scale.x =
|
buffer_ptr->window_offset.z = 1280;
|
||||||
regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; // 640
|
buffer_ptr->window_offset.w = 720;
|
||||||
buffer_ptr->viewport_offset.x =
|
|
||||||
regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32; // 640
|
|
||||||
buffer_ptr->viewport_scale.y =
|
|
||||||
regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; // -360
|
|
||||||
buffer_ptr->viewport_offset.y =
|
|
||||||
regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; // 360
|
|
||||||
buffer_ptr->viewport_scale.z = regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32; // 1
|
|
||||||
buffer_ptr->viewport_offset.z =
|
|
||||||
regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32; // 0
|
|
||||||
|
|
||||||
// Whether each of the viewport settings is enabled.
|
// Whether each of the viewport settings is enabled.
|
||||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||||
|
@ -1282,6 +1328,23 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
||||||
assert_true(vport_xscale_enable == vport_yscale_enable ==
|
assert_true(vport_xscale_enable == vport_yscale_enable ==
|
||||||
vport_zscale_enable == vport_xoffset_enable ==
|
vport_zscale_enable == vport_xoffset_enable ==
|
||||||
vport_yoffset_enable == vport_zoffset_enable);
|
vport_yoffset_enable == vport_zoffset_enable);
|
||||||
|
|
||||||
|
// Viewport scaling. Only enabled if the flags are all set.
|
||||||
|
buffer_ptr->viewport_scale.x =
|
||||||
|
vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1; // 640
|
||||||
|
buffer_ptr->viewport_offset.x = vport_xoffset_enable
|
||||||
|
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
|
||||||
|
: 0; // 640
|
||||||
|
buffer_ptr->viewport_scale.y = vport_yscale_enable
|
||||||
|
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
|
||||||
|
: 1; // -360
|
||||||
|
buffer_ptr->viewport_offset.y = vport_yoffset_enable
|
||||||
|
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
||||||
|
: 0; // 360
|
||||||
|
buffer_ptr->viewport_scale.z =
|
||||||
|
vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1; // 1
|
||||||
|
buffer_ptr->viewport_offset.z =
|
||||||
|
vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0; // 0
|
||||||
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
|
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
|
||||||
// = false: multiply the X, Y coordinates by 1/W0.
|
// = false: multiply the X, Y coordinates by 1/W0.
|
||||||
bool vtx_xy_fmt = (vte_control >> 8) & 0x1;
|
bool vtx_xy_fmt = (vte_control >> 8) & 0x1;
|
||||||
|
@ -1504,7 +1567,9 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
||||||
stencil_op_map[(depth_control & 0x0001C000) >> 14]);
|
stencil_op_map[(depth_control & 0x0001C000) >> 14]);
|
||||||
}
|
}
|
||||||
|
|
||||||
glUnmapNamedBuffer(uniform_data_buffer_);
|
// Stash - program setup will bind this to uniforms.
|
||||||
|
draw_command->state_data_gpu_ptr = allocation.gpu_ptr;
|
||||||
|
scratch_buffer_.Commit(std::move(allocation));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1590,11 +1655,80 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
|
||||||
|
|
||||||
// TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST
|
// TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST
|
||||||
// Pretend we are drawing.
|
// Pretend we are drawing.
|
||||||
glEnable(GL_SCISSOR_TEST);
|
// glEnable(GL_SCISSOR_TEST);
|
||||||
glScissor(100, 100, 100, 100);
|
// glScissor(100, 100, 100, 100);
|
||||||
float red[] = {rand() / (float)RAND_MAX, 0, 0, 1.0f};
|
// float red[] = {rand() / (float)RAND_MAX, 0, 0, 1.0f};
|
||||||
glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0, red);
|
// glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0,
|
||||||
glDisable(GL_SCISSOR_TEST);
|
// red);
|
||||||
|
// glDisable(GL_SCISSOR_TEST);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) {
|
||||||
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
auto& regs = *register_file_;
|
||||||
|
auto& cmd = *draw_command;
|
||||||
|
|
||||||
|
xe_gpu_program_cntl_t program_cntl;
|
||||||
|
program_cntl.dword_0 = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
|
||||||
|
if (!active_vertex_shader_->has_prepared()) {
|
||||||
|
if (!active_vertex_shader_->PrepareVertexShader(program_cntl)) {
|
||||||
|
XELOGE("Unable to prepare vertex shader");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (!active_vertex_shader_->is_valid()) {
|
||||||
|
XELOGE("Vertex shader invalid");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!active_pixel_shader_->has_prepared()) {
|
||||||
|
if (!active_pixel_shader_->PreparePixelShader(program_cntl,
|
||||||
|
active_vertex_shader_)) {
|
||||||
|
XELOGE("Unable to prepare pixel shader");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (!active_pixel_shader_->is_valid()) {
|
||||||
|
XELOGE("Pixel shader invalid");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
GLuint vertex_program = active_vertex_shader_->program();
|
||||||
|
GLuint geometry_program = 0;
|
||||||
|
GLuint fragment_program = active_pixel_shader_->program();
|
||||||
|
|
||||||
|
GLuint pipeline;
|
||||||
|
glCreateProgramPipelines(1, &pipeline);
|
||||||
|
glUseProgramStages(pipeline, GL_VERTEX_SHADER_BIT, vertex_program);
|
||||||
|
glUseProgramStages(pipeline, GL_GEOMETRY_SHADER_BIT, geometry_program);
|
||||||
|
glUseProgramStages(pipeline, GL_FRAGMENT_SHADER_BIT, fragment_program);
|
||||||
|
|
||||||
|
// HACK: layout(location=0) on a bindless uniform crashes nvidia driver.
|
||||||
|
GLint vertex_state_loc = glGetUniformLocation(vertex_program, "state");
|
||||||
|
assert_true(vertex_state_loc == -1 || vertex_state_loc == 0);
|
||||||
|
GLint geometry_state_loc =
|
||||||
|
geometry_program ? glGetUniformLocation(geometry_program, "state") : -1;
|
||||||
|
assert_true(geometry_state_loc == -1 || geometry_state_loc == 0);
|
||||||
|
GLint fragment_state_loc = glGetUniformLocation(fragment_program, "state");
|
||||||
|
assert_true(fragment_state_loc == -1 || fragment_state_loc == 0);
|
||||||
|
|
||||||
|
// TODO(benvanik): do we need to do this for all stages if the locations
|
||||||
|
// match?
|
||||||
|
if (vertex_state_loc != -1) {
|
||||||
|
glProgramUniformHandleui64ARB(vertex_program, vertex_state_loc,
|
||||||
|
cmd.state_data_gpu_ptr);
|
||||||
|
}
|
||||||
|
if (geometry_program && geometry_state_loc != -1) {
|
||||||
|
glProgramUniformHandleui64ARB(geometry_program, geometry_state_loc,
|
||||||
|
cmd.state_data_gpu_ptr);
|
||||||
|
}
|
||||||
|
if (fragment_state_loc != -1) {
|
||||||
|
glProgramUniformHandleui64ARB(fragment_program, fragment_state_loc,
|
||||||
|
cmd.state_data_gpu_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
glBindProgramPipeline(pipeline);
|
||||||
|
// glDeleteProgramPipelines(1, &pipeline);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1641,15 +1775,9 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
auto& regs = *register_file_;
|
auto& regs = *register_file_;
|
||||||
auto& cmd = *draw_command;
|
auto& cmd = *draw_command;
|
||||||
|
assert_not_null(active_vertex_shader_);
|
||||||
|
|
||||||
if (!cmd.vertex_shader) {
|
const auto& buffer_inputs = active_vertex_shader_->buffer_inputs();
|
||||||
// No vertex shader, no-op.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto& buffer_inputs = cmd.vertex_shader->buffer_inputs();
|
|
||||||
|
|
||||||
// glBindVertexArray(vertex_array);
|
|
||||||
|
|
||||||
for (size_t n = 0; n < buffer_inputs.count; n++) {
|
for (size_t n = 0; n < buffer_inputs.count; n++) {
|
||||||
const auto& desc = buffer_inputs.descs[n];
|
const auto& desc = buffer_inputs.descs[n];
|
||||||
|
@ -1685,9 +1813,100 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
|
||||||
reinterpret_cast<const uint32_t*>(membase_ + (fetch->address << 2)),
|
reinterpret_cast<const uint32_t*>(membase_ + (fetch->address << 2)),
|
||||||
fetch->size);
|
fetch->size);
|
||||||
|
|
||||||
/*glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV,
|
uint32_t el_index = 0;
|
||||||
desc.input_index,
|
for (uint32_t i = 0; i < desc.element_count; ++i) {
|
||||||
allocation.gpu_ptr, allocation.length);*/
|
const auto& el = desc.elements[i];
|
||||||
|
GLuint comp_count;
|
||||||
|
GLuint comp_size;
|
||||||
|
GLenum comp_type;
|
||||||
|
switch (el.format) {
|
||||||
|
case VertexFormat::k_8_8_8_8:
|
||||||
|
comp_count = 4;
|
||||||
|
comp_size = 1;
|
||||||
|
comp_type = el.is_signed ? GL_BYTE : GL_UNSIGNED_BYTE;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_2_10_10_10:
|
||||||
|
comp_count = 4;
|
||||||
|
comp_size = 4;
|
||||||
|
comp_type = el.is_signed ? GL_INT_2_10_10_10_REV
|
||||||
|
: GL_UNSIGNED_INT_2_10_10_10_REV;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_10_11_11:
|
||||||
|
comp_count = 3;
|
||||||
|
comp_size = 4;
|
||||||
|
assert_false(el.is_signed);
|
||||||
|
comp_type = GL_UNSIGNED_INT_10F_11F_11F_REV;
|
||||||
|
break;
|
||||||
|
/*case VertexFormat::k_11_11_10:
|
||||||
|
break;*/
|
||||||
|
case VertexFormat::k_16_16:
|
||||||
|
comp_count = 2;
|
||||||
|
comp_size = 2;
|
||||||
|
comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_16_16_FLOAT:
|
||||||
|
comp_count = 2;
|
||||||
|
comp_size = 2;
|
||||||
|
comp_type = GL_HALF_FLOAT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_16_16_16_16:
|
||||||
|
comp_count = 4;
|
||||||
|
comp_size = 2;
|
||||||
|
comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_16_16_16_16_FLOAT:
|
||||||
|
comp_count = 4;
|
||||||
|
comp_size = 2;
|
||||||
|
comp_type = GL_HALF_FLOAT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32:
|
||||||
|
comp_count = 1;
|
||||||
|
comp_size = 4;
|
||||||
|
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32_32:
|
||||||
|
comp_count = 2;
|
||||||
|
comp_size = 4;
|
||||||
|
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32_32_32_32:
|
||||||
|
comp_count = 4;
|
||||||
|
comp_size = 4;
|
||||||
|
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32_FLOAT:
|
||||||
|
comp_count = 1;
|
||||||
|
comp_size = 4;
|
||||||
|
comp_type = GL_FLOAT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32_32_FLOAT:
|
||||||
|
comp_count = 2;
|
||||||
|
comp_size = 4;
|
||||||
|
comp_type = GL_FLOAT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32_32_32_FLOAT:
|
||||||
|
comp_count = 3;
|
||||||
|
comp_size = 4;
|
||||||
|
comp_type = GL_FLOAT;
|
||||||
|
break;
|
||||||
|
case VertexFormat::k_32_32_32_32_FLOAT:
|
||||||
|
comp_count = 4;
|
||||||
|
comp_size = 4;
|
||||||
|
comp_type = GL_FLOAT;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert_unhandled_case(el.format);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
size_t offset = el.offset_words * sizeof(uint32_t);
|
||||||
|
glEnableVertexAttribArray(el_index);
|
||||||
|
glVertexAttribFormatNV(el_index, comp_count, comp_type, el.is_normalized,
|
||||||
|
desc.stride_words * sizeof(uint32_t));
|
||||||
|
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, el_index,
|
||||||
|
allocation.gpu_ptr + offset,
|
||||||
|
allocation.length - offset);
|
||||||
|
++el_index;
|
||||||
|
}
|
||||||
|
|
||||||
// Flush buffer before we draw.
|
// Flush buffer before we draw.
|
||||||
scratch_buffer_.Commit(std::move(allocation));
|
scratch_buffer_.Commit(std::move(allocation));
|
||||||
|
@ -1782,7 +2001,7 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
|
||||||
GLenum read_format;
|
GLenum read_format;
|
||||||
GLenum read_type;
|
GLenum read_type;
|
||||||
switch (copy_dest_format) {
|
switch (copy_dest_format) {
|
||||||
case ColorFormat::kColor_8_8_8_8:
|
case ColorFormat::k_8_8_8_8:
|
||||||
read_format = copy_dest_swap ? GL_BGRA : GL_RGBA;
|
read_format = copy_dest_swap ? GL_BGRA : GL_RGBA;
|
||||||
read_type = GL_UNSIGNED_BYTE;
|
read_type = GL_UNSIGNED_BYTE;
|
||||||
break;
|
break;
|
||||||
|
@ -1832,10 +2051,10 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
|
||||||
// glBindBuffer(GL_READ_FRAMEBUFFER, framebuffer)
|
// glBindBuffer(GL_READ_FRAMEBUFFER, framebuffer)
|
||||||
glNamedFramebufferReadBuffer(source_framebuffer->framebuffer,
|
glNamedFramebufferReadBuffer(source_framebuffer->framebuffer,
|
||||||
GL_COLOR_ATTACHMENT0 + copy_src_select);
|
GL_COLOR_ATTACHMENT0 + copy_src_select);
|
||||||
glReadPixels(x, y, w, h, read_format, read_type, ptr);
|
//glReadPixels(x, y, w, h, read_format, read_type, ptr);
|
||||||
} else {
|
} else {
|
||||||
// Source from the bound depth/stencil target.
|
// Source from the bound depth/stencil target.
|
||||||
glReadPixels(x, y, w, h, GL_DEPTH_STENCIL, read_type, ptr);
|
//glReadPixels(x, y, w, h, GL_DEPTH_STENCIL, read_type, ptr);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case CopyCommand::kRaw:
|
case CopyCommand::kRaw:
|
||||||
|
@ -1890,8 +2109,8 @@ GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch,
|
||||||
uint32_t height = 2560;
|
uint32_t height = 2560;
|
||||||
|
|
||||||
// NOTE: we strip gamma formats down to normal ones.
|
// NOTE: we strip gamma formats down to normal ones.
|
||||||
if (format == ColorRenderTargetFormat::k8888Gamma) {
|
if (format == ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
|
||||||
format = ColorRenderTargetFormat::k8888;
|
format = ColorRenderTargetFormat::k_8_8_8_8;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto& it = cached_color_render_targets_.begin();
|
for (auto& it = cached_color_render_targets_.begin();
|
||||||
|
@ -1910,8 +2129,8 @@ GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch,
|
||||||
|
|
||||||
GLenum internal_format;
|
GLenum internal_format;
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case ColorRenderTargetFormat::k8888:
|
case ColorRenderTargetFormat::k_8_8_8_8:
|
||||||
case ColorRenderTargetFormat::k8888Gamma:
|
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
|
||||||
internal_format = GL_RGBA8;
|
internal_format = GL_RGBA8;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -47,9 +47,6 @@ struct DrawCommand {
|
||||||
uint32_t index_count;
|
uint32_t index_count;
|
||||||
uint32_t base_vertex;
|
uint32_t base_vertex;
|
||||||
|
|
||||||
GL4Shader* vertex_shader;
|
|
||||||
GL4Shader* pixel_shader;
|
|
||||||
|
|
||||||
// Index buffer, if present.
|
// Index buffer, if present.
|
||||||
// If index_count > 0 but buffer is nullptr then auto draw.
|
// If index_count > 0 but buffer is nullptr then auto draw.
|
||||||
struct {
|
struct {
|
||||||
|
@ -69,6 +66,8 @@ struct DrawCommand {
|
||||||
size_t vertex_shader_sampler_count;
|
size_t vertex_shader_sampler_count;
|
||||||
SamplerInput pixel_shader_samplers[32];
|
SamplerInput pixel_shader_samplers[32];
|
||||||
size_t pixel_shader_sampler_count;
|
size_t pixel_shader_sampler_count;
|
||||||
|
|
||||||
|
GLuint64 state_data_gpu_ptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
class CommandProcessor {
|
class CommandProcessor {
|
||||||
|
@ -188,6 +187,7 @@ class CommandProcessor {
|
||||||
bool IssueDraw(DrawCommand* draw_command);
|
bool IssueDraw(DrawCommand* draw_command);
|
||||||
bool UpdateState(DrawCommand* draw_command);
|
bool UpdateState(DrawCommand* draw_command);
|
||||||
bool UpdateRenderTargets(DrawCommand* draw_command);
|
bool UpdateRenderTargets(DrawCommand* draw_command);
|
||||||
|
bool UpdateShaders(DrawCommand* draw_command);
|
||||||
bool PopulateIndexBuffer(DrawCommand* draw_command);
|
bool PopulateIndexBuffer(DrawCommand* draw_command);
|
||||||
bool PopulateVertexBuffers(DrawCommand* draw_command);
|
bool PopulateVertexBuffers(DrawCommand* draw_command);
|
||||||
bool IssueCopy(DrawCommand* draw_command);
|
bool IssueCopy(DrawCommand* draw_command);
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include <poly/threading.h>
|
#include <poly/threading.h>
|
||||||
#include <xenia/cpu/processor.h>
|
#include <xenia/cpu/processor.h>
|
||||||
|
#include <xenia/gpu/gl4/gl4_gpu-private.h>
|
||||||
#include <xenia/gpu/gpu-private.h>
|
#include <xenia/gpu/gpu-private.h>
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
@ -42,11 +43,19 @@ X_STATUS GL4GraphicsSystem::Setup() {
|
||||||
control_ = std::make_unique<WGLControl>(loop);
|
control_ = std::make_unique<WGLControl>(loop);
|
||||||
emulator_->main_window()->AddChild(control_.get());
|
emulator_->main_window()->AddChild(control_.get());
|
||||||
|
|
||||||
|
if (FLAGS_thread_safe_gl) {
|
||||||
|
control_->context()->MakeCurrent();
|
||||||
|
}
|
||||||
|
|
||||||
// Setup the GL context the command processor will do all its drawing in.
|
// Setup the GL context the command processor will do all its drawing in.
|
||||||
// It's shared with the control context so that we can resolve framebuffers
|
// It's shared with the control context so that we can resolve framebuffers
|
||||||
// from it.
|
// from it.
|
||||||
processor_context = control_->context()->CreateShared();
|
processor_context = control_->context()->CreateShared();
|
||||||
|
|
||||||
|
if (FLAGS_thread_safe_gl) {
|
||||||
|
control_->context()->ClearCurrent();
|
||||||
|
}
|
||||||
|
|
||||||
control_ready_fence.Signal();
|
control_ready_fence.Signal();
|
||||||
});
|
});
|
||||||
control_ready_fence.Wait();
|
control_ready_fence.Wait();
|
||||||
|
|
|
@ -15,7 +15,163 @@ namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace gl4 {
|
namespace gl4 {
|
||||||
|
|
||||||
bool GL4Shader::TranslateImpl() { return true; }
|
extern "C" GLEWContext* glewGetContext();
|
||||||
|
|
||||||
|
GL4Shader::GL4Shader(ShaderType shader_type, uint64_t data_hash,
|
||||||
|
const uint32_t* dword_ptr, uint32_t dword_count)
|
||||||
|
: Shader(shader_type, data_hash, dword_ptr, dword_count), program_(0) {}
|
||||||
|
|
||||||
|
GL4Shader::~GL4Shader() { glDeleteProgram(program_); }
|
||||||
|
|
||||||
|
const std::string header =
|
||||||
|
"#version 450\n"
|
||||||
|
"#extension all : warn\n"
|
||||||
|
"#extension GL_ARB_bindless_texture : require\n"
|
||||||
|
"#extension GL_ARB_explicit_uniform_location : require\n"
|
||||||
|
"#extension GL_ARB_shading_language_420pack : require\n"
|
||||||
|
"#extension GL_ARB_shader_storage_buffer_object : require\n"
|
||||||
|
"#extension GL_NV_shader_buffer_load : require\n"
|
||||||
|
"precision highp float;\n"
|
||||||
|
"precision highp int;\n"
|
||||||
|
"layout(std140, column_major) uniform;\n"
|
||||||
|
"layout(std430, column_major) buffer;\n"
|
||||||
|
"struct StateData {\n"
|
||||||
|
" vec4 window_offset;\n"
|
||||||
|
" vec4 window_scissor;\n"
|
||||||
|
" vec4 viewport_offset;\n"
|
||||||
|
" vec4 viewport_scale;\n"
|
||||||
|
" vec4 alpha_test;\n"
|
||||||
|
" vec4 float_consts[512];\n"
|
||||||
|
" uint fetch_consts[32 * 6];\n"
|
||||||
|
" int bool_consts[8];\n"
|
||||||
|
" int loop_consts[32];\n"
|
||||||
|
"};\n"
|
||||||
|
"struct VertexData {\n"
|
||||||
|
" vec4 o[16];\n"
|
||||||
|
"};\n"
|
||||||
|
"\n"
|
||||||
|
"uniform StateData* state;\n";
|
||||||
|
|
||||||
|
bool GL4Shader::PrepareVertexShader(
|
||||||
|
const xenos::xe_gpu_program_cntl_t& program_cntl) {
|
||||||
|
if (has_prepared_) {
|
||||||
|
return is_valid_;
|
||||||
|
}
|
||||||
|
has_prepared_ = true;
|
||||||
|
|
||||||
|
std::string apply_viewport =
|
||||||
|
"vec4 applyViewport(vec4 pos) {\n"
|
||||||
|
// TODO(benvanik): piecewise viewport_enable -> offset/scale logic.
|
||||||
|
" if (false) {\n"
|
||||||
|
" } else {\n"
|
||||||
|
/*" pos.xy = pos.xy / vec2(state->window_offset.z / 2.0, "
|
||||||
|
"-state->window_offset.w / 2.0) + vec2(-1.0, 1.0);\n"
|
||||||
|
" pos.zw = vec2(0.0, 1.0);\n"*/
|
||||||
|
" pos.xy = pos.xy / vec2(1280.0 / 2.0, "
|
||||||
|
"-720.0 / 2.0) + vec2(-1.0, 1.0);\n"
|
||||||
|
" //pos.zw = vec2(0.0, 1.0);\n"
|
||||||
|
" }\n"
|
||||||
|
" pos.x = pos.x * state->viewport_scale.x + \n"
|
||||||
|
" state->viewport_offset.x;\n"
|
||||||
|
" pos.y = pos.y * state->viewport_scale.y + \n"
|
||||||
|
" state->viewport_offset.y;\n"
|
||||||
|
" pos.z = pos.z * state->viewport_scale.z + \n"
|
||||||
|
" state->viewport_offset.z;\n"
|
||||||
|
" pos.xy += state->window_offset.xy;\n"
|
||||||
|
" return pos;\n"
|
||||||
|
"}\n";
|
||||||
|
std::string source =
|
||||||
|
header + apply_viewport +
|
||||||
|
"out gl_PerVertex {\n"
|
||||||
|
" vec4 gl_Position;\n"
|
||||||
|
" float gl_PointSize;\n"
|
||||||
|
" float gl_ClipDistance[];\n"
|
||||||
|
"};\n"
|
||||||
|
"layout(location = 0) in vec3 iF0;\n"
|
||||||
|
"layout(location = 1) in vec4 iF1;\n"
|
||||||
|
"layout(location = 0) out VertexData vtx;\n"
|
||||||
|
"void main() {\n"
|
||||||
|
//" vec4 oPos = vec4(iF0.xy, 0.0, 1.0);\n"
|
||||||
|
" vec4 oPos = iF0.xxxx * state->float_consts[0];\n"
|
||||||
|
" oPos = (iF0.yyyy * state->float_consts[1]) + oPos;\n"
|
||||||
|
" oPos = (iF0.zzzz * state->float_consts[2]) + oPos;\n"
|
||||||
|
" oPos = (vec4(1.0, 1.0, 1.0, 1.0) * state->float_consts[3]) + oPos;\n"
|
||||||
|
//" gl_PointSize = 1.0;\n"
|
||||||
|
" for (int i = 0; i < vtx.o.length(); ++i) {\n"
|
||||||
|
" vtx.o[0] = vec4(0.0, 0.0, 0.0, 0.0);\n"
|
||||||
|
" }\n"
|
||||||
|
" vtx.o[0] = iF1;\n"
|
||||||
|
" gl_Position = applyViewport(oPos);\n"
|
||||||
|
//" gl_Position = oPos;\n"
|
||||||
|
"}\n";
|
||||||
|
|
||||||
|
if (!CompileProgram(source)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
is_valid_ = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GL4Shader::PreparePixelShader(
|
||||||
|
const xenos::xe_gpu_program_cntl_t& program_cntl,
|
||||||
|
GL4Shader* vertex_shader) {
|
||||||
|
if (has_prepared_) {
|
||||||
|
return is_valid_;
|
||||||
|
}
|
||||||
|
has_prepared_ = true;
|
||||||
|
|
||||||
|
std::string source = header +
|
||||||
|
"layout(location = 0) in VertexData vtx;\n"
|
||||||
|
"layout(location = 0) out vec4 oC[4];\n"
|
||||||
|
"void main() {\n"
|
||||||
|
" for (int i = 0; i < oC.length(); ++i) {\n"
|
||||||
|
" oC[i] = vec4(1.0, 0.0, 0.0, 1.0);\n"
|
||||||
|
" }\n"
|
||||||
|
" oC[0] = vtx.o[0];\n"
|
||||||
|
//" gl_FragDepth = 0.0;\n"
|
||||||
|
"}\n";
|
||||||
|
|
||||||
|
if (!CompileProgram(source)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
is_valid_ = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GL4Shader::CompileProgram(std::string source) {
|
||||||
|
assert_zero(program_);
|
||||||
|
|
||||||
|
translated_disassembly_ = std::move(source);
|
||||||
|
const char* source_str = translated_disassembly_.c_str();
|
||||||
|
|
||||||
|
program_ = glCreateShaderProgramv(shader_type_ == ShaderType::kVertex
|
||||||
|
? GL_VERTEX_SHADER
|
||||||
|
: GL_FRAGMENT_SHADER,
|
||||||
|
1, &source_str);
|
||||||
|
if (!program_) {
|
||||||
|
PLOGE("Unable to create shader program");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
GLint link_status = 0;
|
||||||
|
glGetProgramiv(program_, GL_LINK_STATUS, &link_status);
|
||||||
|
if (!link_status) {
|
||||||
|
// log_length includes the null character.
|
||||||
|
GLint log_length = 0;
|
||||||
|
glGetProgramiv(program_, GL_INFO_LOG_LENGTH, &log_length);
|
||||||
|
std::string info_log;
|
||||||
|
info_log.resize(log_length - 1);
|
||||||
|
glGetProgramInfoLog(program_, log_length, &log_length,
|
||||||
|
const_cast<char*>(info_log.data()));
|
||||||
|
PLOGE("Unable to link program: %s", info_log.c_str());
|
||||||
|
error_log_ = std::move(info_log);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace gl4
|
} // namespace gl4
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#define XENIA_GPU_GL4_GL4_SHADER_H_
|
#define XENIA_GPU_GL4_GL4_SHADER_H_
|
||||||
|
|
||||||
#include <xenia/common.h>
|
#include <xenia/common.h>
|
||||||
|
#include <xenia/gpu/gl4/gl_context.h>
|
||||||
#include <xenia/gpu/shader.h>
|
#include <xenia/gpu/shader.h>
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
@ -19,10 +20,20 @@ namespace gl4 {
|
||||||
|
|
||||||
class GL4Shader : public Shader {
|
class GL4Shader : public Shader {
|
||||||
public:
|
public:
|
||||||
using Shader::Shader;
|
GL4Shader(ShaderType shader_type, uint64_t data_hash,
|
||||||
|
const uint32_t* dword_ptr, uint32_t dword_count);
|
||||||
|
~GL4Shader() override;
|
||||||
|
|
||||||
|
GLuint program() const { return program_; }
|
||||||
|
|
||||||
|
bool PrepareVertexShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
|
||||||
|
bool PreparePixelShader(const xenos::xe_gpu_program_cntl_t& program_cntl,
|
||||||
|
GL4Shader* vertex_shader);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool TranslateImpl() override;
|
bool CompileProgram(std::string source);
|
||||||
|
|
||||||
|
GLuint program_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace gl4
|
} // namespace gl4
|
||||||
|
|
|
@ -16,10 +16,14 @@ namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
|
||||||
using namespace xe::gpu::ucode;
|
using namespace xe::gpu::ucode;
|
||||||
|
using namespace xe::gpu::xenos;
|
||||||
|
|
||||||
Shader::Shader(ShaderType shader_type, uint64_t data_hash,
|
Shader::Shader(ShaderType shader_type, uint64_t data_hash,
|
||||||
const uint32_t* dword_ptr, uint32_t dword_count)
|
const uint32_t* dword_ptr, uint32_t dword_count)
|
||||||
: shader_type_(shader_type), data_hash_(data_hash), is_valid_(false) {
|
: shader_type_(shader_type),
|
||||||
|
data_hash_(data_hash),
|
||||||
|
has_prepared_(false),
|
||||||
|
is_valid_(false) {
|
||||||
data_.resize(dword_count);
|
data_.resize(dword_count);
|
||||||
poly::copy_and_swap(data_.data(), dword_ptr, dword_count);
|
poly::copy_and_swap(data_.data(), dword_ptr, dword_count);
|
||||||
std::memset(&alloc_counts_, 0, sizeof(alloc_counts_));
|
std::memset(&alloc_counts_, 0, sizeof(alloc_counts_));
|
||||||
|
@ -35,18 +39,7 @@ Shader::Shader(ShaderType shader_type, uint64_t data_hash,
|
||||||
GatherIO();
|
GatherIO();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Shader::Translate() {
|
Shader::~Shader() = default;
|
||||||
assert_false(is_valid_);
|
|
||||||
|
|
||||||
// TODO(benvanik): disk cache/etc - lookup hash and load if found.
|
|
||||||
// TODO(benvanik): dump to disk.
|
|
||||||
|
|
||||||
// Attempt implementation-specific translation.
|
|
||||||
// This may take awhile, and probably will fail.
|
|
||||||
// TODO(benvanik): parallelize? (allow two translations at once, etc).
|
|
||||||
is_valid_ = TranslateImpl();
|
|
||||||
return is_valid_;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Shader::GatherIO() {
|
void Shader::GatherIO() {
|
||||||
// Process all execution blocks.
|
// Process all execution blocks.
|
||||||
|
@ -203,44 +196,43 @@ void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
el->vtx_fetch = *vtx;
|
el->vtx_fetch = *vtx;
|
||||||
el->format = vtx->format;
|
el->format = static_cast<VertexFormat>(vtx->format);
|
||||||
el->is_normalized = vtx->num_format_all == 0;
|
el->is_normalized = vtx->num_format_all == 0;
|
||||||
el->is_signed = vtx->format_comp_all == 1;
|
el->is_signed = vtx->format_comp_all == 1;
|
||||||
el->offset_words = vtx->offset;
|
el->offset_words = vtx->offset;
|
||||||
el->size_words = 0;
|
el->size_words = 0;
|
||||||
switch (el->format) {
|
switch (el->format) {
|
||||||
case FMT_8_8_8_8:
|
case VertexFormat::k_8_8_8_8:
|
||||||
case FMT_2_10_10_10:
|
case VertexFormat::k_2_10_10_10:
|
||||||
case FMT_10_11_11:
|
case VertexFormat::k_10_11_11:
|
||||||
case FMT_11_11_10:
|
case VertexFormat::k_11_11_10:
|
||||||
el->size_words = 1;
|
el->size_words = 1;
|
||||||
break;
|
break;
|
||||||
case FMT_16_16:
|
case VertexFormat::k_16_16:
|
||||||
case FMT_16_16_FLOAT:
|
case VertexFormat::k_16_16_FLOAT:
|
||||||
el->size_words = 1;
|
el->size_words = 1;
|
||||||
break;
|
break;
|
||||||
case FMT_16_16_16_16:
|
case VertexFormat::k_16_16_16_16:
|
||||||
case FMT_16_16_16_16_FLOAT:
|
case VertexFormat::k_16_16_16_16_FLOAT:
|
||||||
el->size_words = 2;
|
el->size_words = 2;
|
||||||
break;
|
break;
|
||||||
case FMT_32:
|
case VertexFormat::k_32:
|
||||||
case FMT_32_FLOAT:
|
case VertexFormat::k_32_FLOAT:
|
||||||
el->size_words = 1;
|
el->size_words = 1;
|
||||||
break;
|
break;
|
||||||
case FMT_32_32:
|
case VertexFormat::k_32_32:
|
||||||
case FMT_32_32_FLOAT:
|
case VertexFormat::k_32_32_FLOAT:
|
||||||
el->size_words = 2;
|
el->size_words = 2;
|
||||||
break;
|
break;
|
||||||
case FMT_32_32_32_FLOAT:
|
case VertexFormat::k_32_32_32_FLOAT:
|
||||||
el->size_words = 3;
|
el->size_words = 3;
|
||||||
break;
|
break;
|
||||||
case FMT_32_32_32_32:
|
case VertexFormat::k_32_32_32_32:
|
||||||
case FMT_32_32_32_32_FLOAT:
|
case VertexFormat::k_32_32_32_32_FLOAT:
|
||||||
el->size_words = 4;
|
el->size_words = 4;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
XELOGE("Unknown vertex format: %d", el->format);
|
assert_unhandled_case(el->format);
|
||||||
assert_always();
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,21 +20,19 @@ namespace gpu {
|
||||||
|
|
||||||
class Shader {
|
class Shader {
|
||||||
public:
|
public:
|
||||||
Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr,
|
virtual ~Shader();
|
||||||
uint32_t dword_count);
|
|
||||||
|
|
||||||
ShaderType type() const { return shader_type_; }
|
ShaderType type() const { return shader_type_; }
|
||||||
|
bool has_prepared() const { return has_prepared_; }
|
||||||
bool is_valid() const { return is_valid_; }
|
bool is_valid() const { return is_valid_; }
|
||||||
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
|
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
|
||||||
const std::string& translated_disassembly() const {
|
const std::string& translated_disassembly() const {
|
||||||
return translated_disassembly_;
|
return translated_disassembly_;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Translate();
|
|
||||||
|
|
||||||
struct BufferDescElement {
|
struct BufferDescElement {
|
||||||
ucode::instr_fetch_vtx_t vtx_fetch;
|
ucode::instr_fetch_vtx_t vtx_fetch;
|
||||||
uint32_t format;
|
xenos::VertexFormat format;
|
||||||
uint32_t offset_words;
|
uint32_t offset_words;
|
||||||
uint32_t size_words;
|
uint32_t size_words;
|
||||||
bool is_signed;
|
bool is_signed;
|
||||||
|
@ -76,7 +74,8 @@ class Shader {
|
||||||
const std::vector<ucode::instr_cf_alloc_t>& allocs() const { return allocs_; }
|
const std::vector<ucode::instr_cf_alloc_t>& allocs() const { return allocs_; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual bool TranslateImpl() = 0;
|
Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr,
|
||||||
|
uint32_t dword_count);
|
||||||
|
|
||||||
void GatherIO();
|
void GatherIO();
|
||||||
void GatherAlloc(const ucode::instr_cf_alloc_t* cf);
|
void GatherAlloc(const ucode::instr_cf_alloc_t* cf);
|
||||||
|
@ -87,10 +86,12 @@ class Shader {
|
||||||
ShaderType shader_type_;
|
ShaderType shader_type_;
|
||||||
uint64_t data_hash_;
|
uint64_t data_hash_;
|
||||||
std::vector<uint32_t> data_;
|
std::vector<uint32_t> data_;
|
||||||
|
bool has_prepared_;
|
||||||
bool is_valid_;
|
bool is_valid_;
|
||||||
|
|
||||||
std::string ucode_disassembly_;
|
std::string ucode_disassembly_;
|
||||||
std::string translated_disassembly_;
|
std::string translated_disassembly_;
|
||||||
|
std::string error_log_;
|
||||||
|
|
||||||
AllocCounts alloc_counts_;
|
AllocCounts alloc_counts_;
|
||||||
std::vector<ucode::instr_cf_exec_t> execs_;
|
std::vector<ucode::instr_cf_exec_t> execs_;
|
||||||
|
|
|
@ -72,8 +72,8 @@ enum class MsaaSamples : uint32_t {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class ColorRenderTargetFormat : uint32_t {
|
enum class ColorRenderTargetFormat : uint32_t {
|
||||||
k8888 = 0, // D3DFMT_A8R8G8B8 (or ABGR?)
|
k_8_8_8_8 = 0, // D3DFMT_A8R8G8B8 (or ABGR?)
|
||||||
k8888Gamma = 1, // D3DFMT_A8R8G8B8 with gamma correction
|
k_8_8_8_8_GAMMA = 1, // D3DFMT_A8R8G8B8 with gamma correction
|
||||||
// ...
|
// ...
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -98,29 +98,47 @@ enum class CopyCommand : uint32_t {
|
||||||
|
|
||||||
// Subset of a2xx_sq_surfaceformat.
|
// Subset of a2xx_sq_surfaceformat.
|
||||||
enum class ColorFormat : uint32_t {
|
enum class ColorFormat : uint32_t {
|
||||||
kColor_8 = 2,
|
k_8 = 2,
|
||||||
kColor_1_5_5_5 = 3,
|
k_1_5_5_5 = 3,
|
||||||
kColor_5_6_5 = 4,
|
k_5_6_5 = 4,
|
||||||
kColor_6_5_5 = 5,
|
k_6_5_5 = 5,
|
||||||
kColor_8_8_8_8 = 6,
|
k_8_8_8_8 = 6,
|
||||||
kColor_2_10_10_10 = 7,
|
k_2_10_10_10 = 7,
|
||||||
kColor_8_A = 8,
|
k_8_A = 8,
|
||||||
kColor_8_B = 9,
|
k_8_B = 9,
|
||||||
kColor_8_8 = 10,
|
k_8_8 = 10,
|
||||||
kColor_8_8_8_8_A = 14,
|
k_8_8_8_8_A = 14,
|
||||||
kColor_4_4_4_4 = 15,
|
k_4_4_4_4 = 15,
|
||||||
kColor_10_11_11 = 16,
|
k_10_11_11 = 16,
|
||||||
kColor_11_11_10 = 17,
|
k_11_11_10 = 17,
|
||||||
kColor_16 = 24,
|
k_16 = 24,
|
||||||
kColor_16_16 = 25,
|
k_16_16 = 25,
|
||||||
kColor_16_16_16_16 = 26,
|
k_16_16_16_16 = 26,
|
||||||
kColor_16_FLOAT = 30,
|
k_16_FLOAT = 30,
|
||||||
kColor_16_16_FLOAT = 31,
|
k_16_16_FLOAT = 31,
|
||||||
kColor_16_16_16_16_FLOAT = 32,
|
k_16_16_16_16_FLOAT = 32,
|
||||||
kColor_32_FLOAT = 36,
|
k_32_FLOAT = 36,
|
||||||
kColor_32_32_FLOAT = 37,
|
k_32_32_FLOAT = 37,
|
||||||
kColor_32_32_32_32_FLOAT = 38,
|
k_32_32_32_32_FLOAT = 38,
|
||||||
kColor_2_10_10_10_FLOAT = 62,
|
k_2_10_10_10_FLOAT = 62,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class VertexFormat : uint32_t {
|
||||||
|
k_8_8_8_8 = 6,
|
||||||
|
k_2_10_10_10 = 7,
|
||||||
|
k_10_11_11 = 16,
|
||||||
|
k_11_11_10 = 17,
|
||||||
|
k_16_16 = 25,
|
||||||
|
k_16_16_16_16 = 26,
|
||||||
|
k_16_16_FLOAT = 31,
|
||||||
|
k_16_16_16_16_FLOAT = 32,
|
||||||
|
k_32 = 33,
|
||||||
|
k_32_32 = 34,
|
||||||
|
k_32_32_32_32 = 35,
|
||||||
|
k_32_FLOAT = 36,
|
||||||
|
k_32_32_FLOAT = 37,
|
||||||
|
k_32_32_32_32_FLOAT = 38,
|
||||||
|
k_32_32_32_FLOAT = 57,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \
|
#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \
|
||||||
|
|
Loading…
Reference in New Issue