diff --git a/src/alloy/string_buffer.cc b/src/alloy/string_buffer.cc index bb9270def..d6cebc2c4 100644 --- a/src/alloy/string_buffer.cc +++ b/src/alloy/string_buffer.cc @@ -62,6 +62,8 @@ void StringBuffer::AppendBytes(const uint8_t* buffer, size_t length) { const char* StringBuffer::GetString() const { return buffer_.data(); } +std::string StringBuffer::to_string() { return std::string(buffer_.data()); } + char* StringBuffer::ToString() { return strdup(buffer_.data()); } } // namespace alloy diff --git a/src/alloy/string_buffer.h b/src/alloy/string_buffer.h index e2b294d64..549ddfe7a 100644 --- a/src/alloy/string_buffer.h +++ b/src/alloy/string_buffer.h @@ -31,6 +31,7 @@ class StringBuffer { void AppendBytes(const uint8_t* buffer, size_t length); const char* GetString() const; + std::string to_string(); char* ToString(); char* EncodeBase64(); diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index ba67ffe8c..426a0f3dc 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -1839,82 +1839,53 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) { uint32_t el_index = 0; for (uint32_t i = 0; i < desc.element_count; ++i) { const auto& el = desc.elements[i]; - GLuint comp_count; - GLuint comp_size; + auto comp_count = GetVertexFormatComponentCount(el.format); GLenum comp_type; switch (el.format) { case VertexFormat::k_8_8_8_8: - comp_count = 4; - comp_size = 1; comp_type = el.is_signed ? GL_BYTE : GL_UNSIGNED_BYTE; break; case VertexFormat::k_2_10_10_10: - comp_count = 4; - comp_size = 4; comp_type = el.is_signed ? GL_INT_2_10_10_10_REV : GL_UNSIGNED_INT_2_10_10_10_REV; break; case VertexFormat::k_10_11_11: - comp_count = 3; - comp_size = 4; assert_false(el.is_signed); comp_type = GL_UNSIGNED_INT_10F_11F_11F_REV; break; /*case VertexFormat::k_11_11_10: break;*/ case VertexFormat::k_16_16: - comp_count = 2; - comp_size = 2; comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT; break; case VertexFormat::k_16_16_FLOAT: - comp_count = 2; - comp_size = 2; comp_type = GL_HALF_FLOAT; break; case VertexFormat::k_16_16_16_16: - comp_count = 4; - comp_size = 2; comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT; break; case VertexFormat::k_16_16_16_16_FLOAT: - comp_count = 4; - comp_size = 2; comp_type = GL_HALF_FLOAT; break; case VertexFormat::k_32: - comp_count = 1; - comp_size = 4; comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT; break; case VertexFormat::k_32_32: - comp_count = 2; - comp_size = 4; comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT; break; case VertexFormat::k_32_32_32_32: - comp_count = 4; - comp_size = 4; comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT; break; case VertexFormat::k_32_FLOAT: - comp_count = 1; - comp_size = 4; comp_type = GL_FLOAT; break; case VertexFormat::k_32_32_FLOAT: - comp_count = 2; - comp_size = 4; comp_type = GL_FLOAT; break; case VertexFormat::k_32_32_32_FLOAT: - comp_count = 3; - comp_size = 4; comp_type = GL_FLOAT; break; case VertexFormat::k_32_32_32_32_FLOAT: - comp_count = 4; - comp_size = 4; comp_type = GL_FLOAT; break; default: diff --git a/src/xenia/gpu/gl4/gl4_shader.cc b/src/xenia/gpu/gl4/gl4_shader.cc index d987f4a06..80702cf3a 100644 --- a/src/xenia/gpu/gl4/gl4_shader.cc +++ b/src/xenia/gpu/gl4/gl4_shader.cc @@ -10,6 +10,7 @@ #include #include +#include #include namespace xe { @@ -18,6 +19,9 @@ namespace gl4 { extern "C" GLEWContext* glewGetContext(); +// Stateful, but minimally. +thread_local GL4ShaderTranslator shader_translator_; + GL4Shader::GL4Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr, uint32_t dword_count) : Shader(shader_type, data_hash, dword_ptr, dword_count), program_(0) {} @@ -106,6 +110,13 @@ bool GL4Shader::PrepareVertexShader( //" gl_Position = oPos;\n" "}\n"; + std::string translated_source = + shader_translator_.TranslateVertexShader(this, program_cntl); + if (translated_source.empty()) { + PLOGE("Vertex shader failed translation"); + return false; + } + if (!CompileProgram(source)) { return false; } @@ -133,6 +144,13 @@ bool GL4Shader::PreparePixelShader( //" gl_FragDepth = 0.0;\n" "}\n"; + std::string translated_source = shader_translator_.TranslatePixelShader( + this, program_cntl, vertex_shader->alloc_counts()); + if (translated_source.empty()) { + PLOGE("Pixel shader failed translation"); + return false; + } + if (!CompileProgram(source)) { return false; } diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.cc b/src/xenia/gpu/gl4/gl4_shader_translator.cc new file mode 100644 index 000000000..d61437d49 --- /dev/null +++ b/src/xenia/gpu/gl4/gl4_shader_translator.cc @@ -0,0 +1,1662 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include + +namespace xe { +namespace gpu { +namespace gl4 { + +using namespace xe::gpu::ucode; +using namespace xe::gpu::xenos; + +static const char chan_names[] = { + 'x', 'y', 'z', 'w', + // these only apply to FETCH dst's, and we shouldn't be using them: + '0', '1', '?', '_', +}; + +const char* GetVertexFormatTypeName(const GL4Shader::BufferDescElement& el) { + switch (el.format) { + case VertexFormat::k_32: + return el.is_signed ? "int" : "uint"; + case VertexFormat::k_32_FLOAT: + return "float"; + case VertexFormat::k_16_16: + case VertexFormat::k_32_32: + if (el.is_normalized) { + return el.is_signed ? "snorm float2" : "unorm float2"; + } else { + return el.is_signed ? "int2" : "uint2"; + } + case VertexFormat::k_16_16_FLOAT: + case VertexFormat::k_32_32_FLOAT: + return "float2"; + case VertexFormat::k_10_11_11: + case VertexFormat::k_11_11_10: + return "int3"; // ? + case VertexFormat::k_32_32_32_FLOAT: + return "float3"; + case VertexFormat::k_8_8_8_8: + case VertexFormat::k_2_10_10_10: + case VertexFormat::k_16_16_16_16: + case VertexFormat::k_32_32_32_32: + if (el.is_normalized) { + return el.is_signed ? "snorm float4" : "unorm float4"; + } else { + return el.is_signed ? "int4" : "uint4"; + } + case VertexFormat::k_16_16_16_16_FLOAT: + case VertexFormat::k_32_32_32_32_FLOAT: + return "float4"; + default: + XELOGE("Unknown vertex format: %d", el.format); + assert_always(); + return "float4"; + } +} + +GL4ShaderTranslator::GL4ShaderTranslator() + : output_(kOutputCapacity), tex_fetch_index_(0), dwords_(nullptr) {} + +GL4ShaderTranslator::~GL4ShaderTranslator() = default; + +void GL4ShaderTranslator::Reset(GL4Shader* shader) { + output_.Reset(); + shader_type_ = shader->type(); + tex_fetch_index_ = 0; + dwords_ = shader->data(); +} + +std::string GL4ShaderTranslator::TranslateVertexShader( + GL4Shader* vertex_shader, const xe_gpu_program_cntl_t& program_cntl) { + Reset(vertex_shader); + + // Add constants buffers. + // We could optimize this by only including used buffers, but the compiler + // seems to do a good job of doing this for us. + // It also does read detection, so c[512] can end up c[4] in the asm - + // instead of doing this optimization ourselves we could maybe just query + // this from the compiler. + Append( + "cbuffer float_consts : register(b0) {\n" + " float4 c[512];\n" + "};\n"); + // TODO(benvanik): add bool/loop constants. + + AppendTextureHeader(vertex_shader->sampler_inputs()); + + // Transform utilities. We adjust the output position in various ways + // as we can't do this via D3D11 APIs. + Append( + "cbuffer vs_consts : register(b3) {\n" + " float4 window;\n" // x,y,w,h + " float4 viewport_z_enable;\n" // min,(max - min),?,enabled + " float4 viewport_size;\n" // x,y,w,h + "};" + "float4 applyViewport(float4 pos) {\n" + " if (viewport_z_enable.w) {\n" + //" pos.x = (pos.x + 1) * viewport_size.z * 0.5 + viewport_size.x;\n" + //" pos.y = (1 - pos.y) * viewport_size.w * 0.5 + viewport_size.y;\n" + //" pos.z = viewport_z_enable.x + pos.z * viewport_z_enable.y;\n" + // w? + " } else {\n" + " pos.xy = pos.xy / float2(window.z / 2.0, -window.w / 2.0) + " + "float2(-1.0, 1.0);\n" + " pos.zw = float2(0.0, 1.0);\n" + " }\n" + " pos.xy += window.xy;\n" + " return pos;\n" + "}\n"); + + // Add vertex shader input. + Append("struct VS_INPUT {\n"); + uint32_t el_index = 0; + const auto& buffer_inputs = vertex_shader->buffer_inputs(); + for (uint32_t n = 0; n < buffer_inputs.count; n++) { + const auto& input = buffer_inputs.descs[n]; + for (uint32_t m = 0; m < input.element_count; m++) { + const auto& el = input.elements[m]; + const char* type_name = GetVertexFormatTypeName(el); + const auto& fetch = el.vtx_fetch; + uint32_t fetch_slot = fetch.const_index * 3 + fetch.const_index_sel; + Append(" %s vf%u_%d : XE_VF%u;\n", type_name, fetch_slot, fetch.offset, + el_index); + el_index++; + } + } + Append("};\n"); + + // Add vertex shader output (pixel shader input). + const auto& alloc_counts = vertex_shader->alloc_counts(); + Append("struct VS_OUTPUT {\n"); + if (alloc_counts.positions) { + assert_true(alloc_counts.positions == 1); + Append(" float4 oPos : SV_POSITION;\n"); + } + if (alloc_counts.params) { + Append(" float4 o[%d] : XE_O;\n", kMaxInterpolators); + } + if (alloc_counts.point_size) { + Append(" float4 oPointSize : PSIZE;\n"); + } + Append("};\n"); + + // Vertex shader main() header. + Append( + "VS_OUTPUT main(VS_INPUT i) {\n" + " VS_OUTPUT o;\n"); + + // Always write position, as some shaders seem to only write certain values. + if (alloc_counts.positions) { + Append(" o.oPos = float4(0.0, 0.0, 0.0, 1.0);\n"); + } + if (alloc_counts.point_size) { + Append(" o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n"); + } + + // TODO(benvanik): remove this, if possible (though the compiler may be smart + // enough to do it for us). + if (alloc_counts.params) { + for (uint32_t n = 0; n < kMaxInterpolators; n++) { + Append(" o.o[%d] = float4(0.0, 0.0, 0.0, 0.0);\n", n); + } + } + + // Add temporaries for any registers we may use. + uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs; + for (uint32_t n = 0; n <= temp_regs; n++) { + Append(" float4 r%d = c[%d];\n", n, n); + } + Append(" float4 t;\n"); + + // Execute blocks. + const auto& execs = vertex_shader->execs(); + for (auto it = execs.begin(); it != execs.end(); ++it) { + const instr_cf_exec_t& cf = *it; + // TODO(benvanik): figure out how sequences/jmps/loops/etc work. + if (!TranslateExec(cf)) { + return ""; + } + } + + // main footer. + if (alloc_counts.positions) { + Append(" o.oPos = applyViewport(o.oPos);\n"); + } + Append( + " return o;\n" + "};\n"); + + return output_.to_string(); +} + +std::string GL4ShaderTranslator::TranslatePixelShader( + GL4Shader* pixel_shader, const xe_gpu_program_cntl_t& program_cntl, + const GL4Shader::AllocCounts& alloc_counts) { + Reset(pixel_shader); + + // We need an input VS to make decisions here. + // TODO(benvanik): do we need to pair VS/PS up and store the combination? + // If the same PS is used with different VS that output different amounts + // (and less than the number of required registers), things may die. + + // Add constants buffers. + // We could optimize this by only including used buffers, but the compiler + // seems to do a good job of doing this for us. + // It also does read detection, so c[512] can end up c[4] in the asm - + // instead of doing this optimization ourselves we could maybe just query + // this from the compiler. + Append( + "cbuffer float_consts : register(b0) {\n" + " float4 c[512];\n" + "};\n"); + // TODO(benvanik): add bool/loop constants. + + AppendTextureHeader(pixel_shader->sampler_inputs()); + + // Add vertex shader output (pixel shader input). + Append("struct VS_OUTPUT {\n"); + if (alloc_counts.positions) { + assert_true(alloc_counts.positions == 1); + Append(" float4 oPos : SV_POSITION;\n"); + } + if (alloc_counts.params) { + Append(" float4 o[%d] : XE_O;\n", kMaxInterpolators); + } + Append("};\n"); + + // Add pixel shader output. + Append("struct PS_OUTPUT {\n"); + for (uint32_t n = 0; n < alloc_counts.params; n++) { + Append(" float4 oC%d : SV_TARGET%d;\n", n, n); + if (program_cntl.ps_export_depth) { + // Is this per render-target? + Append(" float oD%d : SV_DEPTH%d;\n", n, n); + } + } + Append("};\n"); + + // Pixel shader main() header. + Append( + "PS_OUTPUT main(VS_OUTPUT i) {\n" + " PS_OUTPUT o;\n"); + for (uint32_t n = 0; n < alloc_counts.params; n++) { + Append(" o.oC%d = float4(1.0, 0.0, 0.0, 1.0);\n", n); + } + + // Add temporary registers. + uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs; + for (uint32_t n = 0; n <= std::max(15u, temp_regs); n++) { + Append(" float4 r%d = c[%d];\n", n, n + 256); + } + Append(" float4 t;\n"); + Append(" float s;\n"); // scalar result (used for RETAIN_PREV) + + // Bring registers local. + if (alloc_counts.params) { + for (uint32_t n = 0; n < kMaxInterpolators; n++) { + Append(" r%d = i.o[%d];\n", n, n); + } + } + + // Execute blocks. + const auto& execs = pixel_shader->execs(); + for (auto it = execs.begin(); it != execs.end(); ++it) { + const instr_cf_exec_t& cf = *it; + // TODO(benvanik): figure out how sequences/jmps/loops/etc work. + if (!TranslateExec(cf)) { + return ""; + } + } + + // main footer. + Append( + " return o;\n" + "}\n"); + + return output_.to_string(); +} + +void GL4ShaderTranslator::AppendTextureHeader( + const GL4Shader::SamplerInputs& sampler_inputs) { + bool fetch_setup[32] = {false}; + + // 1 texture per constant slot, 1 sampler per fetch. + for (uint32_t n = 0; n < sampler_inputs.count; n++) { + const auto& input = sampler_inputs.descs[n]; + const auto& fetch = input.tex_fetch; + + // Add texture, if needed. + if (!fetch_setup[fetch.const_idx]) { + fetch_setup[fetch.const_idx] = true; + const char* texture_type = nullptr; + switch (fetch.dimension) { + case DIMENSION_1D: + texture_type = "Texture1D"; + break; + default: + case DIMENSION_2D: + texture_type = "Texture2D"; + break; + case DIMENSION_3D: + texture_type = "Texture3D"; + break; + case DIMENSION_CUBE: + texture_type = "TextureCube"; + break; + } + Append("%s x_texture_%d;\n", texture_type, fetch.const_idx); + } + + // Add sampler. + Append("SamplerState x_sampler_%d;\n", n); + } +} + +void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type, + uint32_t swiz, uint32_t negate, + uint32_t abs_constants) { + if (negate) { + Append("-"); + } + if (type) { + // Register. + if (num & 0x80) { + Append("abs("); + } + Append("r%u", num & 0x7F); + if (num & 0x80) { + Append(")"); + } + } else { + // Constant. + if (abs_constants) { + Append("abs("); + } + Append("c[%u]", is_pixel_shader() ? num + 256 : num); + if (abs_constants) { + Append(")"); + } + } + if (swiz) { + Append("."); + for (int i = 0; i < 4; i++) { + Append("%c", chan_names[(swiz + i) & 0x3]); + swiz >>= 2; + } + } +} + +void GL4ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) { + if (!dst_exp) { + // Register. + Append("r%u", num); + } else { + // Export. + switch (shader_type_) { + case ShaderType::kVertex: + switch (num) { + case 62: + Append("o.oPos"); + break; + case 63: + Append("o.oPointSize"); + break; + default: + // Varying. + Append("o.o[%u]", num); + ; + break; + } + break; + case ShaderType::kPixel: + switch (num) { + case 0: + Append("o.oC0"); + break; + default: + // TODO(benvanik): other render targets? + // TODO(benvanik): depth? + assert_always(); + break; + } + break; + } + } +} + +void GL4ShaderTranslator::AppendDestReg(uint32_t num, uint32_t mask, + uint32_t dst_exp) { + if (mask != 0xF) { + // If masking, store to a temporary variable and clean it up later. + Append("t"); + } else { + // Store directly to output. + AppendDestRegName(num, dst_exp); + } +} + +void GL4ShaderTranslator::AppendDestRegPost(uint32_t num, uint32_t mask, + uint32_t dst_exp) { + if (mask != 0xF) { + // Masking. + Append(" "); + AppendDestRegName(num, dst_exp); + Append(" = float4("); + for (int i = 0; i < 4; i++) { + // TODO(benvanik): mask out values? mix in old value as temp? + // Append("%c", (mask & 0x1) ? chan_names[i] : 'w'); + if (!(mask & 0x1)) { + AppendDestRegName(num, dst_exp); + } else { + Append("t"); + } + Append(".%c", chan_names[i]); + mask >>= 1; + if (i < 3) { + Append(", "); + } + } + Append(");\n"); + } +} + +void GL4ShaderTranslator::PrintSrcReg(uint32_t num, uint32_t type, + uint32_t swiz, uint32_t negate, + uint32_t abs_constants) { + if (negate) { + Append("-"); + } + if (type) { + if (num & 0x80) { + Append("|"); + } + Append("R%u", num & 0x7F); + if (num & 0x80) { + Append("|"); + } + } else { + if (abs_constants) { + Append("|"); + } + num += is_pixel_shader() ? 256 : 0; + Append("C%u", num); + if (abs_constants) { + Append("|"); + } + } + if (swiz) { + Append("."); + for (int i = 0; i < 4; i++) { + Append("%c", chan_names[(swiz + i) & 0x3]); + swiz >>= 2; + } + } +} + +void GL4ShaderTranslator::PrintDstReg(uint32_t num, uint32_t mask, + uint32_t dst_exp) { + Append("%s%u", dst_exp ? "export" : "R", num); + if (mask != 0xf) { + Append("."); + for (int i = 0; i < 4; i++) { + Append("%c", (mask & 0x1) ? chan_names[i] : '_'); + mask >>= 1; + } + } +} + +void GL4ShaderTranslator::PrintExportComment(uint32_t num) { + const char* name = nullptr; + switch (shader_type_) { + case ShaderType::kVertex: + switch (num) { + case 62: + name = "gl_Position"; + break; + case 63: + name = "gl_PointSize"; + break; + } + break; + case ShaderType::kPixel: + switch (num) { + case 0: + name = "gl_FragColor"; + break; + } + break; + } + /* if we had a symbol table here, we could look + * up the name of the varying.. + */ + if (name) { + Append("\t; %s", name); + } +} + +bool GL4ShaderTranslator::TranslateALU_ADDv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + Append("("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(" + "); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(")"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_MULv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + Append("("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(" * "); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(")"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_MAXv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + if (alu.src1_reg == alu.src2_reg && alu.src1_sel == alu.src2_sel && + alu.src1_swiz == alu.src2_swiz && + alu.src1_reg_negate == alu.src2_reg_negate) { + // This is a mov. + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + } else { + Append("max("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(", "); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(")"); + } + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_MINv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + Append("min("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(", "); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(")"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_SETXXv(const instr_alu_t& alu, + const char* op) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + Append("float4(("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(").x %s (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(").x ? 1.0 : 0.0, ("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(").y %s (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(").y ? 1.0 : 0.0, ("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(").z %s (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(").z ? 1.0 : 0.0, ("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(").w %s (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(").w ? 1.0 : 0.0)"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} +bool GL4ShaderTranslator::TranslateALU_SETEv(const instr_alu_t& alu) { + return TranslateALU_SETXXv(alu, "=="); +} +bool GL4ShaderTranslator::TranslateALU_SETGTv(const instr_alu_t& alu) { + return TranslateALU_SETXXv(alu, ">"); +} +bool GL4ShaderTranslator::TranslateALU_SETGTEv(const instr_alu_t& alu) { + return TranslateALU_SETXXv(alu, ">="); +} +bool GL4ShaderTranslator::TranslateALU_SETNEv(const instr_alu_t& alu) { + return TranslateALU_SETXXv(alu, "!="); +} + +bool GL4ShaderTranslator::TranslateALU_FRACv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + Append("frac("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(")"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_TRUNCv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + Append("trunc("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(")"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_FLOORv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + Append("floor("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(")"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_MULADDv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + Append("mad("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(", "); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(", "); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + Append(")"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_CNDXXv(const instr_alu_t& alu, + const char* op) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + // TODO(benvanik): check argument order - could be 3 as compare and 1 and 2 as + // values. + Append("float4(("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(").x %s 0.0 ? (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(").x : ("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + Append(").x, ("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(").y %s 0.0 ? (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(").y : ("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + Append(").y, ("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(").z %s 0.0 ? (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(").z : ("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + Append(").z, ("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(").w %s 0.0 ? (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(").w : ("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + Append(").w)"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} +bool GL4ShaderTranslator::TranslateALU_CNDEv(const instr_alu_t& alu) { + return TranslateALU_CNDXXv(alu, "=="); +} +bool GL4ShaderTranslator::TranslateALU_CNDGTEv(const instr_alu_t& alu) { + return TranslateALU_CNDXXv(alu, ">="); +} +bool GL4ShaderTranslator::TranslateALU_CNDGTv(const instr_alu_t& alu) { + return TranslateALU_CNDXXv(alu, ">"); +} + +bool GL4ShaderTranslator::TranslateALU_DOT4v(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + Append("dot("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(", "); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(")"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_DOT3v(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + Append("dot(float4("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(").xyz, float4("); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(").xyz)"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + Append("dot(float4("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(").xy, float4("); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, + alu.abs_constants); + Append(").xy) + "); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + Append(".x"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} + +// CUBEv + +bool GL4ShaderTranslator::TranslateALU_MAX4v(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + Append(" = "); + if (alu.vector_clamp) { + Append("saturate("); + } + Append("max("); + Append("max("); + Append("max("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(".x, "); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(".y), "); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(".z), "); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, + alu.abs_constants); + Append(".w)"); + if (alu.vector_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return true; +} + +// ... + +bool GL4ShaderTranslator::TranslateALU_MAXs(const instr_alu_t& alu) { + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + Append(" = "); + if (alu.scalar_clamp) { + Append("saturate("); + } + if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) { + // This is a mov. + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + } else { + Append("max("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + Append(".x, "); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + Append(".y).xxxx"); + } + if (alu.scalar_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_MINs(const instr_alu_t& alu) { + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + Append(" = "); + if (alu.scalar_clamp) { + Append("saturate("); + } + Append("min("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + Append(".x, "); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + Append(".y).xxxx"); + if (alu.scalar_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_SETXXs(const instr_alu_t& alu, + const char* op) { + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + Append(" = "); + if (alu.scalar_clamp) { + Append("saturate("); + } + Append("(("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + Append(".x %s 0.0) ? 1.0 : 0.0).xxxx", op); + if (alu.scalar_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + return true; +} +bool GL4ShaderTranslator::TranslateALU_SETEs(const instr_alu_t& alu) { + return TranslateALU_SETXXs(alu, "=="); +} +bool GL4ShaderTranslator::TranslateALU_SETGTs(const instr_alu_t& alu) { + return TranslateALU_SETXXs(alu, ">"); +} +bool GL4ShaderTranslator::TranslateALU_SETGTEs(const instr_alu_t& alu) { + return TranslateALU_SETXXs(alu, ">="); +} +bool GL4ShaderTranslator::TranslateALU_SETNEs(const instr_alu_t& alu) { + return TranslateALU_SETXXs(alu, "!="); +} + +bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) { + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + Append(" = "); + if (alu.scalar_clamp) { + Append("saturate("); + } + Append("(1.0 / "); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, + alu.abs_constants); + Append(")"); + if (alu.scalar_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) { + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + Append(" = "); + if (alu.scalar_clamp) { + Append("saturate("); + } + uint32_t src3_swiz = alu.src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + uint32_t reg2 = + (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); + Append("("); + AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants); + Append(".%c * ", chan_names[swiz_a]); + AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants); + Append(".%c", chan_names[swiz_b]); + Append(").xxxx"); + if (alu.scalar_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + return true; +} +bool GL4ShaderTranslator::TranslateALU_MUL_CONST_1(const instr_alu_t& alu) { + return TranslateALU_MUL_CONST_0(alu); +} + +bool GL4ShaderTranslator::TranslateALU_ADD_CONST_0(const instr_alu_t& alu) { + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + Append(" = "); + if (alu.scalar_clamp) { + Append("saturate("); + } + uint32_t src3_swiz = alu.src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + uint32_t reg2 = + (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); + Append("("); + AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants); + Append(".%c + ", chan_names[swiz_a]); + AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants); + Append(".%c", chan_names[swiz_b]); + Append(").xxxx"); + if (alu.scalar_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + return true; +} +bool GL4ShaderTranslator::TranslateALU_ADD_CONST_1(const instr_alu_t& alu) { + return TranslateALU_ADD_CONST_0(alu); +} + +bool GL4ShaderTranslator::TranslateALU_SUB_CONST_0(const instr_alu_t& alu) { + AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + Append(" = "); + if (alu.scalar_clamp) { + Append("saturate("); + } + uint32_t src3_swiz = alu.src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + uint32_t reg2 = + (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); + Append("("); + AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants); + Append(".%c - ", chan_names[swiz_a]); + AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants); + Append(".%c", chan_names[swiz_b]); + Append(").xxxx"); + if (alu.scalar_clamp) { + Append(")"); + } + Append(";\n"); + AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, + alu.export_data); + return true; +} +bool GL4ShaderTranslator::TranslateALU_SUB_CONST_1(const instr_alu_t& alu) { + return TranslateALU_SUB_CONST_0(alu); +} + +bool GL4ShaderTranslator::TranslateALU_RETAIN_PREV(const instr_alu_t& alu) { + // TODO(benvanik): pull out prev value in s. + return false; +} + +typedef bool (GL4ShaderTranslator::*TranslateFn)(const instr_alu_t& alu); +typedef struct { + uint32_t num_srcs; + const char* name; + TranslateFn fn; +} TranslateInfo; +#define ALU_INSTR(opc, num_srcs) \ + { num_srcs, #opc, nullptr } +#define ALU_INSTR_IMPL(opc, num_srcs) \ + { num_srcs, #opc, &GL4ShaderTranslator::TranslateALU_##opc } + +bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) { + static TranslateInfo vector_alu_instrs[0x20] = { + ALU_INSTR_IMPL(ADDv, 2), // 0 + ALU_INSTR_IMPL(MULv, 2), // 1 + ALU_INSTR_IMPL(MAXv, 2), // 2 + ALU_INSTR_IMPL(MINv, 2), // 3 + ALU_INSTR_IMPL(SETEv, 2), // 4 + ALU_INSTR_IMPL(SETGTv, 2), // 5 + ALU_INSTR_IMPL(SETGTEv, 2), // 6 + ALU_INSTR_IMPL(SETNEv, 2), // 7 + ALU_INSTR_IMPL(FRACv, 1), // 8 + ALU_INSTR_IMPL(TRUNCv, 1), // 9 + ALU_INSTR_IMPL(FLOORv, 1), // 10 + ALU_INSTR_IMPL(MULADDv, 3), // 11 + ALU_INSTR_IMPL(CNDEv, 3), // 12 + ALU_INSTR_IMPL(CNDGTEv, 3), // 13 + ALU_INSTR_IMPL(CNDGTv, 3), // 14 + ALU_INSTR_IMPL(DOT4v, 2), // 15 + ALU_INSTR_IMPL(DOT3v, 2), // 16 + ALU_INSTR_IMPL(DOT2ADDv, 3), // 17 -- ??? + ALU_INSTR(CUBEv, 2), // 18 + ALU_INSTR_IMPL(MAX4v, 1), // 19 + ALU_INSTR(PRED_SETE_PUSHv, 2), // 20 + ALU_INSTR(PRED_SETNE_PUSHv, 2), // 21 + ALU_INSTR(PRED_SETGT_PUSHv, 2), // 22 + ALU_INSTR(PRED_SETGTE_PUSHv, 2), // 23 + ALU_INSTR(KILLEv, 2), // 24 + ALU_INSTR(KILLGTv, 2), // 25 + ALU_INSTR(KILLGTEv, 2), // 26 + ALU_INSTR(KILLNEv, 2), // 27 + ALU_INSTR(DSTv, 2), // 28 + ALU_INSTR(MOVAv, 1), // 29 + }; + static TranslateInfo scalar_alu_instrs[0x40] = { + ALU_INSTR(ADDs, 1), // 0 + ALU_INSTR(ADD_PREVs, 1), // 1 + ALU_INSTR(MULs, 1), // 2 + ALU_INSTR(MUL_PREVs, 1), // 3 + ALU_INSTR(MUL_PREV2s, 1), // 4 + ALU_INSTR_IMPL(MAXs, 1), // 5 + ALU_INSTR_IMPL(MINs, 1), // 6 + ALU_INSTR_IMPL(SETEs, 1), // 7 + ALU_INSTR_IMPL(SETGTs, 1), // 8 + ALU_INSTR_IMPL(SETGTEs, 1), // 9 + ALU_INSTR_IMPL(SETNEs, 1), // 10 + ALU_INSTR(FRACs, 1), // 11 + ALU_INSTR(TRUNCs, 1), // 12 + ALU_INSTR(FLOORs, 1), // 13 + ALU_INSTR(EXP_IEEE, 1), // 14 + ALU_INSTR(LOG_CLAMP, 1), // 15 + ALU_INSTR(LOG_IEEE, 1), // 16 + ALU_INSTR(RECIP_CLAMP, 1), // 17 + ALU_INSTR(RECIP_FF, 1), // 18 + ALU_INSTR_IMPL(RECIP_IEEE, 1), // 19 + ALU_INSTR(RECIPSQ_CLAMP, 1), // 20 + ALU_INSTR(RECIPSQ_FF, 1), // 21 + ALU_INSTR(RECIPSQ_IEEE, 1), // 22 + ALU_INSTR(MOVAs, 1), // 23 + ALU_INSTR(MOVA_FLOORs, 1), // 24 + ALU_INSTR(SUBs, 1), // 25 + ALU_INSTR(SUB_PREVs, 1), // 26 + ALU_INSTR(PRED_SETEs, 1), // 27 + ALU_INSTR(PRED_SETNEs, 1), // 28 + ALU_INSTR(PRED_SETGTs, 1), // 29 + ALU_INSTR(PRED_SETGTEs, 1), // 30 + ALU_INSTR(PRED_SET_INVs, 1), // 31 + ALU_INSTR(PRED_SET_POPs, 1), // 32 + ALU_INSTR(PRED_SET_CLRs, 1), // 33 + ALU_INSTR(PRED_SET_RESTOREs, 1), // 34 + ALU_INSTR(KILLEs, 1), // 35 + ALU_INSTR(KILLGTs, 1), // 36 + ALU_INSTR(KILLGTEs, 1), // 37 + ALU_INSTR(KILLNEs, 1), // 38 + ALU_INSTR(KILLONEs, 1), // 39 + ALU_INSTR(SQRT_IEEE, 1), // 40 + {0, 0, false}, + ALU_INSTR_IMPL(MUL_CONST_0, 2), // 42 + ALU_INSTR_IMPL(MUL_CONST_1, 2), // 43 + ALU_INSTR_IMPL(ADD_CONST_0, 2), // 44 + ALU_INSTR_IMPL(ADD_CONST_1, 2), // 45 + ALU_INSTR_IMPL(SUB_CONST_0, 2), // 46 + ALU_INSTR_IMPL(SUB_CONST_1, 2), // 47 + ALU_INSTR(SIN, 1), // 48 + ALU_INSTR(COS, 1), // 49 + ALU_INSTR(RETAIN_PREV, 1), // 50 + }; +#undef ALU_INSTR +#undef ALU_INSTR_IMPL + + if (!alu->scalar_write_mask && !alu->vector_write_mask) { + Append(" // \n"); + return true; + } + + if (alu->vector_write_mask) { + // Disassemble vector op. + const auto& iv = vector_alu_instrs[alu->vector_opc]; + Append(" // %sALU:\t", sync ? "(S)" : " "); + Append("%s", iv.name); + if (alu->pred_select & 0x2) { + // seems to work similar to conditional execution in ARM instruction + // set, so let's use a similar syntax for now: + Append((alu->pred_select & 0x1) ? "EQ" : "NE"); + } + Append("\t"); + PrintDstReg(alu->vector_dest, alu->vector_write_mask, alu->export_data); + Append(" = "); + if (iv.num_srcs == 3) { + PrintSrcReg(alu->src3_reg, alu->src3_sel, alu->src3_swiz, + alu->src3_reg_negate, alu->abs_constants); + Append(", "); + } + PrintSrcReg(alu->src1_reg, alu->src1_sel, alu->src1_swiz, + alu->src1_reg_negate, alu->abs_constants); + if (iv.num_srcs > 1) { + Append(", "); + PrintSrcReg(alu->src2_reg, alu->src2_sel, alu->src2_swiz, + alu->src2_reg_negate, alu->abs_constants); + } + if (alu->vector_clamp) { + Append(" CLAMP"); + } + if (alu->export_data) { + PrintExportComment(alu->vector_dest); + } + Append("\n"); + + // Translate vector op. + if (iv.fn) { + Append(" "); + if (!(this->*iv.fn)(*alu)) { + return false; + } + } else { + Append(" // \n"); + } + } + + if (alu->scalar_write_mask || !alu->vector_write_mask) { + // 2nd optional scalar op: + + // Disassemble scalar op. + const auto& is = scalar_alu_instrs[alu->scalar_opc]; + Append(" // "); + Append("\t"); + if (is.name) { + Append("\t \t%s\t", is.name); + } else { + Append("\t \tOP(%u)\t", alu->scalar_opc); + } + PrintDstReg(get_alu_scalar_dest(*alu), alu->scalar_write_mask, + alu->export_data); + Append(" = "); + if (is.num_srcs == 2) { + // ADD_CONST_0 dest, [const], [reg] + uint32_t src3_swiz = alu->src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + PrintSrcReg(alu->src3_reg, 0, 0, alu->src3_reg_negate, + alu->abs_constants); + Append(".%c", chan_names[swiz_a]); + Append(", "); + uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | + (alu->src3_sel << 1); + PrintSrcReg(reg2, 1, 0, alu->src3_reg_negate, alu->abs_constants); + Append(".%c", chan_names[swiz_b]); + } else { + PrintSrcReg(alu->src3_reg, alu->src3_sel, alu->src3_swiz, + alu->src3_reg_negate, alu->abs_constants); + } + if (alu->scalar_clamp) { + Append(" CLAMP"); + } + if (alu->export_data) { + PrintExportComment(get_alu_scalar_dest(*alu)); + } + Append("\n"); + + // Translate scalar op. + if (is.fn) { + Append(" "); + if (!(this->*is.fn)(*alu)) { + return false; + } + } else { + Append(" // \n"); + } + } + + return true; +} + +void GL4ShaderTranslator::PrintDestFecth(uint32_t dst_reg, uint32_t dst_swiz) { + Append("\tR%u.", dst_reg); + for (int i = 0; i < 4; i++) { + Append("%c", chan_names[dst_swiz & 0x7]); + dst_swiz >>= 3; + } +} + +void GL4ShaderTranslator::AppendFetchDest(uint32_t dst_reg, uint32_t dst_swiz) { + Append("r%u.", dst_reg); + for (int i = 0; i < 4; i++) { + Append("%c", chan_names[dst_swiz & 0x7]); + dst_swiz >>= 3; + } +} + +bool GL4ShaderTranslator::TranslateExec(const instr_cf_exec_t& cf) { + static const struct { + const char* name; + } cf_instructions[] = { +#define INSTR(opc, fxn) \ + { #opc } + INSTR(NOP, print_cf_nop), INSTR(EXEC, print_cf_exec), + INSTR(EXEC_END, print_cf_exec), INSTR(COND_EXEC, print_cf_exec), + INSTR(COND_EXEC_END, print_cf_exec), INSTR(COND_PRED_EXEC, print_cf_exec), + INSTR(COND_PRED_EXEC_END, print_cf_exec), + INSTR(LOOP_START, print_cf_loop), INSTR(LOOP_END, print_cf_loop), + INSTR(COND_CALL, print_cf_jmp_call), INSTR(RETURN, print_cf_jmp_call), + INSTR(COND_JMP, print_cf_jmp_call), INSTR(ALLOC, print_cf_alloc), + INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec), + INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec), + INSTR(MARK_VS_FETCH_DONE, print_cf_nop), // ?? +#undef INSTR + }; + + Append(" // %s ADDR(0x%x) CNT(0x%x)", cf_instructions[cf.opc].name, + cf.address, cf.count); + if (cf.yeild) { + Append(" YIELD"); + } + uint8_t vc = cf.vc_hi | (cf.vc_lo << 2); + if (vc) { + Append(" VC(0x%x)", vc); + } + if (cf.bool_addr) { + Append(" BOOL_ADDR(0x%x)", cf.bool_addr); + } + if (cf.address_mode == ABSOLUTE_ADDR) { + Append(" ABSOLUTE_ADDR"); + } + if (cf.is_cond_exec()) { + Append(" COND(%d)", cf.condition); + } + Append("\n"); + + uint32_t sequence = cf.serialize; + for (uint32_t i = 0; i < cf.count; i++) { + uint32_t alu_off = (cf.address + i); + int sync = sequence & 0x2; + if (sequence & 0x1) { + const instr_fetch_t* fetch = + (const instr_fetch_t*)(dwords_ + alu_off * 3); + switch (fetch->opc) { + case VTX_FETCH: + if (!TranslateVertexFetch(&fetch->vtx, sync)) { + return false; + } + break; + case TEX_FETCH: + if (!TranslateTextureFetch(&fetch->tex, sync)) { + return false; + } + break; + case TEX_GET_BORDER_COLOR_FRAC: + case TEX_GET_COMP_TEX_LOD: + case TEX_GET_GRADIENTS: + case TEX_GET_WEIGHTS: + case TEX_SET_TEX_LOD: + case TEX_SET_GRADIENTS_H: + case TEX_SET_GRADIENTS_V: + default: + assert_always(); + break; + } + } else { + const instr_alu_t* alu = (const instr_alu_t*)(dwords_ + alu_off * 3); + if (!TranslateALU(alu, sync)) { + return false; + } + } + sequence >>= 2; + } + + return true; +} + +bool GL4ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx, + int sync) { + static const struct { + const char* name; + } fetch_types[0xff] = { +#define TYPE(id) \ + { #id } + TYPE(FMT_1_REVERSE), // 0 + {0}, + TYPE(FMT_8), // 2 + {0}, + {0}, + {0}, + TYPE(FMT_8_8_8_8), // 6 + TYPE(FMT_2_10_10_10), // 7 + {0}, + {0}, + TYPE(FMT_8_8), // 10 + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + TYPE(FMT_16), // 24 + TYPE(FMT_16_16), // 25 + TYPE(FMT_16_16_16_16), // 26 + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + TYPE(FMT_32), // 33 + TYPE(FMT_32_32), // 34 + TYPE(FMT_32_32_32_32), // 35 + TYPE(FMT_32_FLOAT), // 36 + TYPE(FMT_32_32_FLOAT), // 37 + TYPE(FMT_32_32_32_32_FLOAT), // 38 + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + TYPE(FMT_32_32_32_FLOAT), // 57 +#undef TYPE + }; + + // Disassemble. + Append(" // %sFETCH:\t", sync ? "(S)" : " "); + if (vtx->pred_select) { + Append(vtx->pred_condition ? "EQ" : "NE"); + } + PrintDestFecth(vtx->dst_reg, vtx->dst_swiz); + Append(" = R%u.", vtx->src_reg); + Append("%c", chan_names[vtx->src_swiz & 0x3]); + if (fetch_types[vtx->format].name) { + Append(" %s", fetch_types[vtx->format].name); + } else { + Append(" TYPE(0x%x)", vtx->format); + } + Append(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED"); + if (!vtx->num_format_all) { + Append(" NORMALIZED"); + } + Append(" STRIDE(%u)", vtx->stride); + if (vtx->offset) { + Append(" OFFSET(%u)", vtx->offset); + } + Append(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel); + if (true) { + // XXX + Append(" src_reg_am=%u", vtx->src_reg_am); + Append(" dst_reg_am=%u", vtx->dst_reg_am); + Append(" num_format_all=%u", vtx->num_format_all); + Append(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all); + Append(" exp_adjust_all=%u", vtx->exp_adjust_all); + } + Append("\n"); + + // Translate. + Append(" "); + Append("r%u.xyzw", vtx->dst_reg); + Append(" = float4("); + uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel; + // TODO(benvanik): detect xyzw = xyzw, etc. + // TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc + uint32_t component_count = + GetVertexFormatComponentCount(static_cast(vtx->format)); + uint32_t dst_swiz = vtx->dst_swiz; + for (int i = 0; i < 4; i++) { + if ((dst_swiz & 0x7) == 4) { + Append("0.0"); + } else if ((dst_swiz & 0x7) == 5) { + Append("1.0"); + } else if ((dst_swiz & 0x7) == 6) { + // ? + Append("?"); + } else if ((dst_swiz & 0x7) == 7) { + Append("r%u.%c", vtx->dst_reg, chan_names[i]); + } else { + Append("i.vf%u_%d.%c", fetch_slot, vtx->offset, + chan_names[dst_swiz & 0x3]); + } + if (i < 3) { + Append(", "); + } + dst_swiz >>= 3; + } + Append(");\n"); + return true; +} + +bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex, + int sync) { + int src_component_count = 0; + switch (tex->dimension) { + case DIMENSION_1D: + src_component_count = 1; + break; + default: + case DIMENSION_2D: + src_component_count = 2; + break; + case DIMENSION_3D: + src_component_count = 3; + break; + case DIMENSION_CUBE: + src_component_count = 3; + break; + } + + // Disassemble. + static const char* filter[] = { + "POINT", // TEX_FILTER_POINT + "LINEAR", // TEX_FILTER_LINEAR + "BASEMAP", // TEX_FILTER_BASEMAP + }; + static const char* aniso_filter[] = { + "DISABLED", // ANISO_FILTER_DISABLED + "MAX_1_1", // ANISO_FILTER_MAX_1_1 + "MAX_2_1", // ANISO_FILTER_MAX_2_1 + "MAX_4_1", // ANISO_FILTER_MAX_4_1 + "MAX_8_1", // ANISO_FILTER_MAX_8_1 + "MAX_16_1", // ANISO_FILTER_MAX_16_1 + }; + static const char* arbitrary_filter[] = { + "2x4_SYM", // ARBITRARY_FILTER_2X4_SYM + "2x4_ASYM", // ARBITRARY_FILTER_2X4_ASYM + "4x2_SYM", // ARBITRARY_FILTER_4X2_SYM + "4x2_ASYM", // ARBITRARY_FILTER_4X2_ASYM + "4x4_SYM", // ARBITRARY_FILTER_4X4_SYM + "4x4_ASYM", // ARBITRARY_FILTER_4X4_ASYM + }; + static const char* sample_loc[] = { + "CENTROID", // SAMPLE_CENTROID + "CENTER", // SAMPLE_CENTER + }; + uint32_t src_swiz = tex->src_swiz; + Append(" // %sFETCH:\t", sync ? "(S)" : " "); + if (tex->pred_select) { + Append(tex->pred_condition ? "EQ" : "NE"); + } + PrintDestFecth(tex->dst_reg, tex->dst_swiz); + Append(" = R%u.", tex->src_reg); + for (int i = 0; i < src_component_count; i++) { + Append("%c", chan_names[src_swiz & 0x3]); + src_swiz >>= 2; + } + Append(" CONST(%u)", tex->const_idx); + if (tex->fetch_valid_only) { + Append(" VALID_ONLY"); + } + if (tex->tx_coord_denorm) { + Append(" DENORM"); + } + if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST) { + Append(" MAG(%s)", filter[tex->mag_filter]); + } + if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST) { + Append(" MIN(%s)", filter[tex->min_filter]); + } + if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST) { + Append(" MIP(%s)", filter[tex->mip_filter]); + } + if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST) { + Append(" ANISO(%s)", aniso_filter[tex->aniso_filter]); + } + if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST) { + Append(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]); + } + if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST) { + Append(" VOL_MAG(%s)", filter[tex->vol_mag_filter]); + } + if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST) { + Append(" VOL_MIN(%s)", filter[tex->vol_min_filter]); + } + if (!tex->use_comp_lod) { + Append(" LOD(%u)", tex->use_comp_lod); + Append(" LOD_BIAS(%u)", tex->lod_bias); + } + if (tex->use_reg_lod) { + Append(" REG_LOD(%u)", tex->use_reg_lod); + } + if (tex->use_reg_gradients) { + Append(" USE_REG_GRADIENTS"); + } + Append(" LOCATION(%s)", sample_loc[tex->sample_location]); + if (tex->offset_x || tex->offset_y || tex->offset_z) { + Append(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z); + } + Append("\n"); + + // Translate. + Append(" t = "); + Append("x_texture_%d.Sample(x_sampler_%d, r%u.", tex->const_idx, + tex_fetch_index_++, // hacky way to line up to tex buffers + tex->src_reg); + src_swiz = tex->src_swiz; + for (int i = 0; i < src_component_count; i++) { + Append("%c", chan_names[src_swiz & 0x3]); + src_swiz >>= 2; + } + Append(");\n"); + + Append(" r%u.xyzw = float4(", tex->dst_reg); + uint32_t dst_swiz = tex->dst_swiz; + for (int i = 0; i < 4; i++) { + if (i) { + Append(", "); + } + if ((dst_swiz & 0x7) == 4) { + Append("0.0"); + } else if ((dst_swiz & 0x7) == 5) { + Append("1.0"); + } else if ((dst_swiz & 0x7) == 6) { + // ? + Append("?"); + } else if ((dst_swiz & 0x7) == 7) { + Append("r%u.%c", tex->dst_reg, chan_names[i]); + } else { + Append("t.%c", chan_names[dst_swiz & 0x3]); + } + dst_swiz >>= 3; + } + Append(");\n"); + return true; +} + +} // namespace gl4 +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.h b/src/xenia/gpu/gl4/gl4_shader_translator.h new file mode 100644 index 000000000..22a9cdfbb --- /dev/null +++ b/src/xenia/gpu/gl4/gl4_shader_translator.h @@ -0,0 +1,123 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_GL4_GL4_SHADER_TRANSLATOR_H_ +#define XENIA_GPU_GL4_GL4_SHADER_TRANSLATOR_H_ + +#include + +#include +#include +#include +#include +#include +#include + +namespace xe { +namespace gpu { +namespace gl4 { + +class GL4ShaderTranslator { + public: + static const uint32_t kMaxInterpolators = 16; + + GL4ShaderTranslator(); + ~GL4ShaderTranslator(); + + std::string TranslateVertexShader( + GL4Shader* vertex_shader, + const xenos::xe_gpu_program_cntl_t& program_cntl); + std::string TranslatePixelShader( + GL4Shader* pixel_shader, const xenos::xe_gpu_program_cntl_t& program_cntl, + const GL4Shader::AllocCounts& alloc_counts); + + protected: + ShaderType shader_type_; + uint32_t tex_fetch_index_; + const uint32_t* dwords_; + + static const int kOutputCapacity = 64 * 1024; + alloy::StringBuffer output_; + + bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; } + bool is_pixel_shader() const { return shader_type_ == ShaderType::kPixel; } + + void Reset(GL4Shader* shader); + void Append(const char* format, ...) { + va_list args; + va_start(args, format); + output_.AppendVarargs(format, args); + va_end(args); + } + + void AppendTextureHeader(const GL4Shader::SamplerInputs& sampler_inputs); + + void AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate, + uint32_t abs); + void AppendDestRegName(uint32_t num, uint32_t dst_exp); + void AppendDestReg(uint32_t num, uint32_t mask, uint32_t dst_exp); + void AppendDestRegPost(uint32_t num, uint32_t mask, uint32_t dst_exp); + void PrintSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate, + uint32_t abs); + void PrintDstReg(uint32_t num, uint32_t mask, uint32_t dst_exp); + void PrintExportComment(uint32_t num); + + bool TranslateALU(const ucode::instr_alu_t* alu, int sync); + bool TranslateALU_ADDv(const ucode::instr_alu_t& alu); + bool TranslateALU_MULv(const ucode::instr_alu_t& alu); + bool TranslateALU_MAXv(const ucode::instr_alu_t& alu); + bool TranslateALU_MINv(const ucode::instr_alu_t& alu); + bool TranslateALU_SETXXv(const ucode::instr_alu_t& alu, const char* op); + bool TranslateALU_SETEv(const ucode::instr_alu_t& alu); + bool TranslateALU_SETGTv(const ucode::instr_alu_t& alu); + bool TranslateALU_SETGTEv(const ucode::instr_alu_t& alu); + bool TranslateALU_SETNEv(const ucode::instr_alu_t& alu); + bool TranslateALU_FRACv(const ucode::instr_alu_t& alu); + bool TranslateALU_TRUNCv(const ucode::instr_alu_t& alu); + bool TranslateALU_FLOORv(const ucode::instr_alu_t& alu); + bool TranslateALU_MULADDv(const ucode::instr_alu_t& alu); + bool TranslateALU_CNDXXv(const ucode::instr_alu_t& alu, const char* op); + bool TranslateALU_CNDEv(const ucode::instr_alu_t& alu); + bool TranslateALU_CNDGTEv(const ucode::instr_alu_t& alu); + bool TranslateALU_CNDGTv(const ucode::instr_alu_t& alu); + bool TranslateALU_DOT4v(const ucode::instr_alu_t& alu); + bool TranslateALU_DOT3v(const ucode::instr_alu_t& alu); + bool TranslateALU_DOT2ADDv(const ucode::instr_alu_t& alu); + // CUBEv + bool TranslateALU_MAX4v(const ucode::instr_alu_t& alu); + // ... + bool TranslateALU_MAXs(const ucode::instr_alu_t& alu); + bool TranslateALU_MINs(const ucode::instr_alu_t& alu); + bool TranslateALU_SETXXs(const ucode::instr_alu_t& alu, const char* op); + bool TranslateALU_SETEs(const ucode::instr_alu_t& alu); + bool TranslateALU_SETGTs(const ucode::instr_alu_t& alu); + bool TranslateALU_SETGTEs(const ucode::instr_alu_t& alu); + bool TranslateALU_SETNEs(const ucode::instr_alu_t& alu); + bool TranslateALU_RECIP_IEEE(const ucode::instr_alu_t& alu); + bool TranslateALU_MUL_CONST_0(const ucode::instr_alu_t& alu); + bool TranslateALU_MUL_CONST_1(const ucode::instr_alu_t& alu); + bool TranslateALU_ADD_CONST_0(const ucode::instr_alu_t& alu); + bool TranslateALU_ADD_CONST_1(const ucode::instr_alu_t& alu); + bool TranslateALU_SUB_CONST_0(const ucode::instr_alu_t& alu); + bool TranslateALU_SUB_CONST_1(const ucode::instr_alu_t& alu); + bool TranslateALU_RETAIN_PREV(const ucode::instr_alu_t& alu); + + void PrintDestFecth(uint32_t dst_reg, uint32_t dst_swiz); + void AppendFetchDest(uint32_t dst_reg, uint32_t dst_swiz); + + bool TranslateExec(const ucode::instr_cf_exec_t& cf); + bool TranslateVertexFetch(const ucode::instr_fetch_vtx_t* vtx, int sync); + bool TranslateTextureFetch(const ucode::instr_fetch_tex_t* tex, int sync); +}; + +} // namespace gl4 +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_GL4_GL4_SHADER_TRANSLATOR_H_ diff --git a/src/xenia/gpu/gl4/sources.gypi b/src/xenia/gpu/gl4/sources.gypi index 1bba6e3ba..3f0c349ce 100644 --- a/src/xenia/gpu/gl4/sources.gypi +++ b/src/xenia/gpu/gl4/sources.gypi @@ -12,6 +12,8 @@ 'gl4_graphics_system.h', 'gl4_shader.cc', 'gl4_shader.h', + 'gl4_shader_translator.cc', + 'gl4_shader_translator.h', 'gl_context.cc', 'gl_context.h', ], diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 05438657c..820080133 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -30,6 +30,8 @@ class Shader { return translated_disassembly_; } + const uint32_t* data() const { return data_.data(); } + struct BufferDescElement { ucode::instr_fetch_vtx_t vtx_fetch; xenos::VertexFormat format; diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 668f94aae..f23ec50f4 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -140,6 +140,36 @@ enum class VertexFormat : uint32_t { k_32_32_32_32_FLOAT = 38, k_32_32_32_FLOAT = 57, }; +inline int GetVertexFormatComponentCount(VertexFormat format) { + switch (format) { + case VertexFormat::k_32: + case VertexFormat::k_32_FLOAT: + return 1; + break; + case VertexFormat::k_16_16: + case VertexFormat::k_16_16_FLOAT: + case VertexFormat::k_32_32: + case VertexFormat::k_32_32_FLOAT: + return 2; + break; + case VertexFormat::k_10_11_11: + case VertexFormat::k_11_11_10: + case VertexFormat::k_32_32_32_FLOAT: + return 3; + break; + case VertexFormat::k_8_8_8_8: + case VertexFormat::k_2_10_10_10: + case VertexFormat::k_16_16_16_16: + case VertexFormat::k_16_16_16_16_FLOAT: + case VertexFormat::k_32_32_32_32: + case VertexFormat::k_32_32_32_32_FLOAT: + return 4; + break; + default: + assert_unhandled_case(format); + return 0; + } +} #define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \ (((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \