From 51a80026291acc7ae3686686332bb5213f1a720a Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 6 Dec 2015 00:48:41 -0800 Subject: [PATCH] Moving GL backend to new shader translator. This seems to make a lot of things better, but may also break things. Cleanup to follow. --- src/xenia/gpu/gl4/gl4_command_processor.cc | 78 +- src/xenia/gpu/gl4/gl4_command_processor.h | 7 +- src/xenia/gpu/gl4/gl4_shader.cc | 281 +-- src/xenia/gpu/gl4/gl4_shader.h | 10 +- src/xenia/gpu/gl4/gl4_shader_translator.cc | 2027 -------------------- src/xenia/gpu/gl4/gl4_shader_translator.h | 165 -- src/xenia/gpu/gl4/ucode.h | 549 ------ src/xenia/gpu/gl4/ucode_disassembler.cc | 780 -------- src/xenia/gpu/gl4/ucode_disassembler.h | 28 - src/xenia/gpu/glsl_shader_translator.cc | 117 +- src/xenia/gpu/glsl_shader_translator.h | 2 + src/xenia/gpu/sampler_info.cc | 27 +- src/xenia/gpu/sampler_info.h | 4 +- src/xenia/gpu/shader.cc | 256 +-- src/xenia/gpu/shader.h | 68 +- src/xenia/gpu/shader_translator.cc | 81 +- src/xenia/gpu/shader_translator.h | 45 +- src/xenia/gpu/shader_translator_disasm.cc | 6 +- src/xenia/gpu/trace_viewer.cc | 99 +- src/xenia/gpu/trace_viewer.h | 10 +- src/xenia/gpu/ucode.h | 23 + src/xenia/gpu/xenos.h | 85 +- 22 files changed, 453 insertions(+), 4295 deletions(-) delete mode 100644 src/xenia/gpu/gl4/gl4_shader_translator.cc delete mode 100644 src/xenia/gpu/gl4/gl4_shader_translator.h delete mode 100644 src/xenia/gpu/gl4/ucode.h delete mode 100644 src/xenia/gpu/gl4/ucode_disassembler.cc delete mode 100644 src/xenia/gpu/gl4/ucode_disassembler.h diff --git a/src/xenia/gpu/gl4/gl4_command_processor.cc b/src/xenia/gpu/gl4/gl4_command_processor.cc index b1162a4b9..e94c2f33b 100644 --- a/src/xenia/gpu/gl4/gl4_command_processor.cc +++ b/src/xenia/gpu/gl4/gl4_command_processor.cc @@ -54,6 +54,7 @@ GL4CommandProcessor::CachedPipeline::~CachedPipeline() { GL4CommandProcessor::GL4CommandProcessor(GL4GraphicsSystem* graphics_system, kernel::KernelState* kernel_state) : CommandProcessor(graphics_system, kernel_state), + shader_translator_(GlslShaderTranslator::Dialect::kGL45), draw_batcher_(graphics_system_->register_file()), scratch_buffer_(kScratchBufferCapacity, kScratchBufferAlignment) {} @@ -490,16 +491,12 @@ Shader* GL4CommandProcessor::LoadShader(ShaderType shader_type, // Perform translation. // If this fails the shader will be marked as invalid and ignored later. - if (shader_type == ShaderType::kVertex) { - shader_ptr->PrepareVertexShader(&shader_translator_); - } else { - shader_ptr->PreparePixelShader(&shader_translator_); - } + shader_ptr->Prepare(&shader_translator_); XELOGGPU("Set %s shader at %0.8X (%db):\n%s", shader_type == ShaderType::kVertex ? "vertex" : "pixel", guest_address, dword_count * 4, - shader_ptr->ucode_disassembly().c_str()); + shader_ptr->translated_shader()->ucode_disassembly().c_str()); } return shader_ptr; } @@ -782,8 +779,7 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::UpdateRenderTargets() { // Note that write mask may be more permissive than we want, so we mix that // with the actual targets the pixel shader writes to. GLenum draw_buffers[4] = {GL_NONE, GL_NONE, GL_NONE, GL_NONE}; - const auto& shader_targets = - active_pixel_shader_->alloc_counts().color_targets; + auto pixel_shader = active_pixel_shader_->translated_shader(); GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget}; if (enable_mode == ModeControl::kColorDepth) { uint32_t color_info[4] = { @@ -793,7 +789,7 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::UpdateRenderTargets() { // A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE for (int n = 0; n < xe::countof(color_info); n++) { uint32_t write_mask = (regs.rb_color_mask >> (n * 4)) & 0xF; - if (!write_mask || !shader_targets[n]) { + if (!write_mask || !pixel_shader->writes_color_target(n)) { // Unused, so keep disabled and set to wildcard so we'll take any // framebuffer that has it. continue; @@ -1366,14 +1362,14 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateVertexBuffers() { auto& regs = *register_file_; assert_not_null(active_vertex_shader_); - const auto& buffer_inputs = active_vertex_shader_->buffer_inputs(); - for (uint32_t buffer_index = 0; buffer_index < buffer_inputs.count; - ++buffer_index) { - const auto& desc = buffer_inputs.descs[buffer_index]; - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6; + const auto& vertex_bindings = + active_vertex_shader_->translated_shader()->vertex_bindings(); + for (const auto& vertex_binding : vertex_bindings) { + int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + + (vertex_binding.fetch_constant / 3) * 6; const auto group = reinterpret_cast(®s.values[r]); const xe_gpu_vertex_fetch_t* fetch = nullptr; - switch (desc.fetch_slot % 3) { + switch (vertex_binding.fetch_constant % 3) { case 0: fetch = &group->vertex_fetch_0; break; @@ -1405,17 +1401,21 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateVertexBuffers() { // TODO(benvanik): if we could find a way to avoid this, we could use // multidraw without flushing. - glVertexArrayVertexBuffer(vertex_shader->vao(), buffer_index, - scratch_buffer_.handle(), allocation.offset, - desc.stride_words * 4); + glVertexArrayVertexBuffer( + vertex_shader->vao(), + static_cast(vertex_binding.binding_index), + scratch_buffer_.handle(), allocation.offset, + vertex_binding.stride_words * 4); scratch_buffer_.Commit(std::move(allocation)); } else { // TODO(benvanik): if we could find a way to avoid this, we could use // multidraw without flushing. - glVertexArrayVertexBuffer(vertex_shader->vao(), buffer_index, - scratch_buffer_.handle(), allocation.offset, - desc.stride_words * 4); + glVertexArrayVertexBuffer( + vertex_shader->vao(), + static_cast(vertex_binding.binding_index), + scratch_buffer_.handle(), allocation.offset, + vertex_binding.stride_words * 4); } } @@ -1434,14 +1434,14 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSamplers() { bool has_setup_sampler[32] = {false}; // Vertex texture samplers. - const auto& vertex_sampler_inputs = active_vertex_shader_->sampler_inputs(); - for (size_t i = 0; i < vertex_sampler_inputs.count; ++i) { - const auto& desc = vertex_sampler_inputs.descs[i]; - if (has_setup_sampler[desc.fetch_slot]) { + const auto& vertex_sampler_inputs = + active_vertex_shader_->translated_shader()->texture_bindings(); + for (auto& texture_binding : vertex_sampler_inputs) { + if (has_setup_sampler[texture_binding.fetch_constant]) { continue; } - has_setup_sampler[desc.fetch_slot] = true; - auto status = PopulateSampler(desc); + has_setup_sampler[texture_binding.fetch_constant] = true; + auto status = PopulateSampler(texture_binding); if (status == UpdateStatus::kError) { return status; } else if (status == UpdateStatus::kMismatch) { @@ -1450,14 +1450,14 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSamplers() { } // Pixel shader texture sampler. - const auto& pixel_sampler_inputs = active_pixel_shader_->sampler_inputs(); - for (size_t i = 0; i < pixel_sampler_inputs.count; ++i) { - const auto& desc = pixel_sampler_inputs.descs[i]; - if (has_setup_sampler[desc.fetch_slot]) { + const auto& pixel_sampler_inputs = + active_pixel_shader_->translated_shader()->texture_bindings(); + for (auto& texture_binding : pixel_sampler_inputs) { + if (has_setup_sampler[texture_binding.fetch_constant]) { continue; } - has_setup_sampler[desc.fetch_slot] = true; - auto status = PopulateSampler(desc); + has_setup_sampler[texture_binding.fetch_constant] = true; + auto status = PopulateSampler(texture_binding); if (status == UpdateStatus::kError) { return UpdateStatus::kError; } else if (status == UpdateStatus::kMismatch) { @@ -1469,15 +1469,16 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSamplers() { } GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSampler( - const Shader::SamplerDesc& desc) { + const TranslatedShader::TextureBinding& texture_binding) { auto& regs = *register_file_; - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + desc.fetch_slot * 6; + int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + + texture_binding.fetch_constant * 6; auto group = reinterpret_cast(®s.values[r]); auto& fetch = group->texture_fetch; // Reset slot. // If we fail, we still draw but with an invalid texture. - draw_batcher_.set_texture_sampler(desc.fetch_slot, 0); + draw_batcher_.set_texture_sampler(texture_binding.fetch_constant, 0); if (FLAGS_disable_textures) { return UpdateStatus::kCompatible; @@ -1495,7 +1496,8 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSampler( return UpdateStatus::kCompatible; // invalid texture used } SamplerInfo sampler_info; - if (!SamplerInfo::Prepare(fetch, desc.tex_fetch, &sampler_info)) { + if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr, + &sampler_info)) { XELOGE("Unable to parse sampler info"); return UpdateStatus::kCompatible; // invalid texture used } @@ -1511,7 +1513,7 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSampler( } // Shaders will use bindless to fetch right from it. - draw_batcher_.set_texture_sampler(desc.fetch_slot, + draw_batcher_.set_texture_sampler(texture_binding.fetch_constant, entry_view->texture_sampler_handle); return UpdateStatus::kCompatible; diff --git a/src/xenia/gpu/gl4/gl4_command_processor.h b/src/xenia/gpu/gl4/gl4_command_processor.h index 699bab8ff..4373d7a21 100644 --- a/src/xenia/gpu/gl4/gl4_command_processor.h +++ b/src/xenia/gpu/gl4/gl4_command_processor.h @@ -24,8 +24,8 @@ #include "xenia/gpu/command_processor.h" #include "xenia/gpu/gl4/draw_batcher.h" #include "xenia/gpu/gl4/gl4_shader.h" -#include "xenia/gpu/gl4/gl4_shader_translator.h" #include "xenia/gpu/gl4/texture_cache.h" +#include "xenia/gpu/glsl_shader_translator.h" #include "xenia/gpu/register_file.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/xthread.h" @@ -123,13 +123,14 @@ class GL4CommandProcessor : public CommandProcessor { UpdateStatus PopulateIndexBuffer(IndexBufferInfo* index_buffer_info); UpdateStatus PopulateVertexBuffers(); UpdateStatus PopulateSamplers(); - UpdateStatus PopulateSampler(const Shader::SamplerDesc& desc); + UpdateStatus PopulateSampler( + const TranslatedShader::TextureBinding& texture_binding); bool IssueCopy() override; CachedFramebuffer* GetFramebuffer(GLuint color_targets[4], GLuint depth_target); - GL4ShaderTranslator shader_translator_; + GlslShaderTranslator shader_translator_; std::vector> all_shaders_; std::unordered_map shader_cache_; CachedFramebuffer* active_framebuffer_ = nullptr; diff --git a/src/xenia/gpu/gl4/gl4_shader.cc b/src/xenia/gpu/gl4/gl4_shader.cc index 66230c7e9..37805daaf 100644 --- a/src/xenia/gpu/gl4/gl4_shader.cc +++ b/src/xenia/gpu/gl4/gl4_shader.cc @@ -13,7 +13,6 @@ #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/gpu/gl4/gl4_gpu_flags.h" -#include "xenia/gpu/gl4/gl4_shader_translator.h" #include "xenia/gpu/gpu_flags.h" namespace xe { @@ -31,143 +30,68 @@ GL4Shader::~GL4Shader() { glDeleteVertexArrays(1, &vao_); } -std::string GL4Shader::GetHeader() { - static const std::string header = - "#version 450\n" - "#extension all : warn\n" - "#extension GL_ARB_bindless_texture : require\n" - "#extension GL_ARB_explicit_uniform_location : require\n" - "#extension GL_ARB_shader_draw_parameters : require\n" - "#extension GL_ARB_shader_storage_buffer_object : require\n" - "#extension GL_ARB_shading_language_420pack : require\n" - "#extension GL_ARB_fragment_coord_conventions : require\n" - "#define FLT_MAX 3.402823466e+38\n" - "precision highp float;\n" - "precision highp int;\n" - "layout(std140, column_major) uniform;\n" - "layout(std430, column_major) buffer;\n" - "\n" - // This must match DrawBatcher::CommonHeader. - "struct StateData {\n" - " vec4 window_scale;\n" - " vec4 vtx_fmt;\n" - " vec4 alpha_test;\n" - // TODO(benvanik): variable length. - " uvec2 texture_samplers[32];\n" - " vec4 float_consts[512];\n" - " int bool_consts[8];\n" - " int loop_consts[32];\n" - "};\n" - "layout(binding = 0) buffer State {\n" - " StateData states[];\n" - "};\n" - "\n" - "struct VertexData {\n" - " vec4 o[16];\n" - "};\n"; - return header; -} +bool GL4Shader::Prepare(ShaderTranslator* shader_translator) { + if (!Shader::Prepare(shader_translator)) { + return false; + } -std::string GL4Shader::GetFooter() { - // http://www.nvidia.com/object/cube_map_ogl_tutorial.html - // http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf - // src0 = Rn.zzxy, src1 = Rn.yxzz - // dst.W = FaceId; - // dst.Z = 2.0f * MajorAxis; - // dst.Y = S cube coordinate; - // dst.X = T cube coordinate; - /* - major axis - direction target sc tc ma - ---------- ------------------------------------ --- --- --- - +rx GL_TEXTURE_CUBE_MAP_POSITIVE_X_EXT=0 -rz -ry rx - -rx GL_TEXTURE_CUBE_MAP_NEGATIVE_X_EXT=1 +rz -ry rx - +ry GL_TEXTURE_CUBE_MAP_POSITIVE_Y_EXT=2 +rx +rz ry - -ry GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT=3 +rx -rz ry - +rz GL_TEXTURE_CUBE_MAP_POSITIVE_Z_EXT=4 +rx -ry rz - -rz GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT=5 -rx -ry rz - */ - static const std::string footer = - "vec4 cube(vec4 src0, vec4 src1) {\n" - " vec3 src = vec3(src1.y, src1.x, src1.z);\n" - " vec3 abs_src = abs(src);\n" - " int face_id;\n" - " float sc;\n" - " float tc;\n" - " float ma;\n" - " if (abs_src.x > abs_src.y && abs_src.x > abs_src.z) {\n" - " if (src.x > 0.0) {\n" - " face_id = 0; sc = -abs_src.z; tc = -abs_src.y; ma = abs_src.x;\n" - " } else {\n" - " face_id = 1; sc = abs_src.z; tc = -abs_src.y; ma = abs_src.x;\n" - " }\n" - " } else if (abs_src.y > abs_src.x && abs_src.y > abs_src.z) {\n" - " if (src.y > 0.0) {\n" - " face_id = 2; sc = abs_src.x; tc = abs_src.z; ma = abs_src.y;\n" - " } else {\n" - " face_id = 3; sc = abs_src.x; tc = -abs_src.z; ma = abs_src.y;\n" - " }\n" - " } else {\n" - " if (src.z > 0.0) {\n" - " face_id = 4; sc = abs_src.x; tc = -abs_src.y; ma = abs_src.z;\n" - " } else {\n" - " face_id = 5; sc = -abs_src.x; tc = -abs_src.y; ma = abs_src.z;\n" - " }\n" - " }\n" - " float s = (sc / ma + 1.0) / 2.0;\n" - " float t = (tc / ma + 1.0) / 2.0;\n" - " return vec4(t, s, 2.0 * ma, float(face_id));\n" - "}\n"; - return footer; + // Build static vertex array descriptor. + if (!PrepareVertexArrayObject()) { + XELOGE("Unable to prepare vertex shader array object"); + return false; + } + + if (!CompileProgram()) { + return false; + } + + return true; } bool GL4Shader::PrepareVertexArrayObject() { glCreateVertexArrays(1, &vao_); - uint32_t el_index = 0; - for (uint32_t buffer_index = 0; buffer_index < buffer_inputs_.count; - ++buffer_index) { - const auto& desc = buffer_inputs_.descs[buffer_index]; - - for (uint32_t i = 0; i < desc.element_count; ++i, ++el_index) { - const auto& el = desc.elements[i]; - auto comp_count = xenos::GetVertexFormatComponentCount(el.format); + for (const auto& vertex_binding : translated_shader_->vertex_bindings()) { + for (const auto& attrib : vertex_binding.attributes) { + auto comp_count = GetVertexFormatComponentCount( + attrib.fetch_instr.attributes.data_format); GLenum comp_type; - switch (el.format) { + bool is_signed = attrib.fetch_instr.attributes.is_signed; + switch (attrib.fetch_instr.attributes.data_format) { case VertexFormat::k_8_8_8_8: - comp_type = el.is_signed ? GL_BYTE : GL_UNSIGNED_BYTE; + comp_type = is_signed ? GL_BYTE : GL_UNSIGNED_BYTE; break; case VertexFormat::k_2_10_10_10: - comp_type = el.is_signed ? GL_INT_2_10_10_10_REV - : GL_UNSIGNED_INT_2_10_10_10_REV; + comp_type = is_signed ? GL_INT_2_10_10_10_REV + : GL_UNSIGNED_INT_2_10_10_10_REV; break; case VertexFormat::k_10_11_11: - // assert_false(el.is_signed); + // assert_false(is_signed); XELOGW("Signed k_10_11_11 vertex format not supported by GL"); comp_type = GL_UNSIGNED_INT_10F_11F_11F_REV; break; /*case VertexFormat::k_11_11_10: break;*/ case VertexFormat::k_16_16: - comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT; + comp_type = is_signed ? GL_SHORT : GL_UNSIGNED_SHORT; break; case VertexFormat::k_16_16_FLOAT: comp_type = GL_HALF_FLOAT; break; case VertexFormat::k_16_16_16_16: - comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT; + comp_type = is_signed ? GL_SHORT : GL_UNSIGNED_SHORT; break; case VertexFormat::k_16_16_16_16_FLOAT: comp_type = GL_HALF_FLOAT; break; case VertexFormat::k_32: - comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT; + comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT; break; case VertexFormat::k_32_32: - comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT; + comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT; break; case VertexFormat::k_32_32_32_32: - comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT; + comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT; break; case VertexFormat::k_32_FLOAT: comp_type = GL_FLOAT; @@ -182,145 +106,27 @@ bool GL4Shader::PrepareVertexArrayObject() { comp_type = GL_FLOAT; break; default: - assert_unhandled_case(el.format); + assert_unhandled_case(attrib.fetch_instr.attributes.data_format); return false; } - glEnableVertexArrayAttrib(vao_, el_index); - glVertexArrayAttribBinding(vao_, el_index, buffer_index); - glVertexArrayAttribFormat(vao_, el_index, comp_count, comp_type, - el.is_normalized, el.offset_words * 4); + glEnableVertexArrayAttrib(vao_, attrib.attrib_index); + glVertexArrayAttribBinding(vao_, attrib.attrib_index, + vertex_binding.binding_index); + glVertexArrayAttribFormat(vao_, attrib.attrib_index, comp_count, + comp_type, + !attrib.fetch_instr.attributes.is_integer, + attrib.fetch_instr.attributes.offset * 4); } } return true; } -bool GL4Shader::PrepareVertexShader(GL4ShaderTranslator* shader_translator) { - if (is_valid_) { - return is_valid_; - } - is_valid_ = false; - - // Build static vertex array descriptor. - if (!PrepareVertexArrayObject()) { - XELOGE("Unable to prepare vertex shader array object"); - return false; - } - std::string apply_transform = - "vec4 applyTransform(const in StateData state, vec4 pos) {\n" - " if (state.vtx_fmt.w == 0.0) {\n" - " // w is 1/W0, so fix it.\n" - " pos.w = 1.0 / pos.w;\n" - " }\n" - " if (state.vtx_fmt.x != 0.0) {\n" - " // Already multiplied by 1/W0, so pull it out.\n" - " pos.xy /= pos.w;\n" - " }\n" - " if (state.vtx_fmt.z != 0.0) {\n" - " // Already multiplied by 1/W0, so pull it out.\n" - " pos.z /= pos.w;\n" - " }\n" - " pos.xy *= state.window_scale.xy;\n" - " return pos;\n" - "}\n"; - std::string source = - GetHeader() + apply_transform + - "out gl_PerVertex {\n" - " vec4 gl_Position;\n" - " float gl_PointSize;\n" - " float gl_ClipDistance[];\n" - "};\n" - "layout(location = 0) flat out uint draw_id;\n" - "layout(location = 1) out VertexData vtx;\n" - "void processVertex(const in StateData state);\n" - "void main() {\n" + - (alloc_counts().positions ? " gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n" - : "") + - (alloc_counts().point_size ? " gl_PointSize = 1.0;\n" : "") + - " for (int i = 0; i < vtx.o.length(); ++i) {\n" - " vtx.o[i] = vec4(0.0, 0.0, 0.0, 0.0);\n" - " }\n" - " const StateData state = states[gl_DrawIDARB];\n" - " processVertex(state);\n" - " gl_Position = applyTransform(state, gl_Position);\n" - " draw_id = gl_DrawIDARB;\n" - "}\n" + - GetFooter(); - - std::string translated_source = - shader_translator->TranslateVertexShader(this); - if (translated_source.empty()) { - XELOGE("Vertex shader failed translation"); - return false; - } - source += translated_source; - - if (!CompileProgram(source)) { - return false; - } - - is_valid_ = true; - return true; -} - -bool GL4Shader::PreparePixelShader(GL4ShaderTranslator* shader_translator) { - if (is_valid_) { - return is_valid_; - } - is_valid_ = false; - - std::string source = - GetHeader() + - "layout(origin_upper_left, pixel_center_integer) in vec4 gl_FragCoord;\n" - "layout(location = 0) flat in uint draw_id;\n" - "layout(location = 1) in VertexData vtx;\n" - "layout(location = 0) out vec4 oC[4];\n" - "void processFragment(const in StateData state);\n" - "void applyAlphaTest(int alpha_func, float alpha_ref) {\n" - " bool passes = false;\n" - " switch (alpha_func) {\n" - " case 0: break;\n" - " case 1: if (oC[0].a < alpha_ref) passes = true; break;\n" - " case 2: if (oC[0].a == alpha_ref) passes = true; break;\n" - " case 3: if (oC[0].a <= alpha_ref) passes = true; break;\n" - " case 4: if (oC[0].a > alpha_ref) passes = true; break;\n" - " case 5: if (oC[0].a != alpha_ref) passes = true; break;\n" - " case 6: if (oC[0].a >= alpha_ref) passes = true; break;\n" - " case 7: passes = true; break;\n" - " };\n" - " if (!passes) discard;\n" - "}\n" - "void main() {\n" + - " const StateData state = states[draw_id];\n" - " processFragment(state);\n" - " if (state.alpha_test.x != 0.0) {\n" - " applyAlphaTest(int(state.alpha_test.y), state.alpha_test.z);\n" - " }\n" - "}\n" + - GetFooter(); - - std::string translated_source = shader_translator->TranslatePixelShader(this); - if (translated_source.empty()) { - XELOGE("Pixel shader failed translation"); - return false; - } - - source += translated_source; - - if (!CompileProgram(source)) { - return false; - } - - is_valid_ = true; - return true; -} - -bool GL4Shader::CompileProgram(std::string source) { +bool GL4Shader::CompileProgram() { assert_zero(program_); - translated_disassembly_ = std::move(source); - const char* source_str = translated_disassembly_.c_str(); + auto source_str = translated_shader_->GetBinaryString(); // Save to disk, if we asked for it. auto base_path = FLAGS_dump_shaders.c_str(); @@ -349,18 +155,19 @@ bool GL4Shader::CompileProgram(std::string source) { // Note that we put the translated source first so we get good line numbers. f = fopen(file_name, "w"); if (f) { - fprintf(f, "%s", translated_disassembly_.c_str()); + fprintf(f, "%s", source_str.c_str()); fprintf(f, "/*\n"); - fprintf(f, "%s", ucode_disassembly_.c_str()); + fprintf(f, "%s", translated_shader_->ucode_disassembly().c_str()); fprintf(f, " */\n"); fclose(f); } } + auto source_str_ptr = source_str.c_str(); program_ = glCreateShaderProgramv(shader_type_ == ShaderType::kVertex ? GL_VERTEX_SHADER : GL_FRAGMENT_SHADER, - 1, &source_str); + 1, &source_str_ptr); if (!program_) { XELOGE("Unable to create shader program"); return false; diff --git a/src/xenia/gpu/gl4/gl4_shader.h b/src/xenia/gpu/gl4/gl4_shader.h index 7e173dcf6..5782997ec 100644 --- a/src/xenia/gpu/gl4/gl4_shader.h +++ b/src/xenia/gpu/gl4/gl4_shader.h @@ -13,14 +13,13 @@ #include #include "xenia/gpu/shader.h" +#include "xenia/gpu/shader_translator.h" #include "xenia/ui/gl/gl_context.h" namespace xe { namespace gpu { namespace gl4 { -class GL4ShaderTranslator; - class GL4Shader : public Shader { public: GL4Shader(ShaderType shader_type, uint64_t data_hash, @@ -30,14 +29,11 @@ class GL4Shader : public Shader { GLuint program() const { return program_; } GLuint vao() const { return vao_; } - bool PrepareVertexShader(GL4ShaderTranslator* shader_translator); - bool PreparePixelShader(GL4ShaderTranslator* shader_translator); + bool Prepare(ShaderTranslator* shader_translator); protected: - std::string GetHeader(); - std::string GetFooter(); bool PrepareVertexArrayObject(); - bool CompileProgram(std::string source); + bool CompileProgram(); GLuint program_; GLuint vao_; diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.cc b/src/xenia/gpu/gl4/gl4_shader_translator.cc deleted file mode 100644 index 9920c1681..000000000 --- a/src/xenia/gpu/gl4/gl4_shader_translator.cc +++ /dev/null @@ -1,2027 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/gl4/gl4_shader_translator.h" - -#include - -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/gpu/gpu_flags.h" - -namespace xe { -namespace gpu { -namespace gl4 { - -using namespace xe::gpu::gl4::ucode; - -#define Append(...) output_.AppendFormat(__VA_ARGS__) - -static const char chan_names[] = { - 'x', 'y', 'z', 'w', - // these only apply to FETCH dst's, and we shouldn't be using them: - '0', '1', '?', '_', -}; - -const char* GetVertexFormatTypeName(const GL4Shader::BufferDescElement& el) { - switch (el.format) { - case VertexFormat::k_32: - case VertexFormat::k_32_FLOAT: - return "float"; - case VertexFormat::k_16_16: - case VertexFormat::k_32_32: - case VertexFormat::k_16_16_FLOAT: - case VertexFormat::k_32_32_FLOAT: - return "vec2"; - case VertexFormat::k_10_11_11: - case VertexFormat::k_11_11_10: - case VertexFormat::k_32_32_32_FLOAT: - return "vec3"; - case VertexFormat::k_8_8_8_8: - case VertexFormat::k_2_10_10_10: - case VertexFormat::k_16_16_16_16: - case VertexFormat::k_32_32_32_32: - case VertexFormat::k_16_16_16_16_FLOAT: - case VertexFormat::k_32_32_32_32_FLOAT: - return "vec4"; - default: - XELOGE("Unknown vertex format: %d", el.format); - assert_always(); - return "vec4"; - } -} - -GL4ShaderTranslator::GL4ShaderTranslator() : output_(kOutputCapacity) {} - -GL4ShaderTranslator::~GL4ShaderTranslator() = default; - -void GL4ShaderTranslator::Reset(GL4Shader* shader) { - output_.Reset(); - shader_type_ = shader->type(); - dwords_ = shader->data(); -} - -std::string GL4ShaderTranslator::TranslateVertexShader( - GL4Shader* vertex_shader) { - Reset(vertex_shader); - - // Add vertex shader input. - uint32_t el_index = 0; - const auto& buffer_inputs = vertex_shader->buffer_inputs(); - for (uint32_t n = 0; n < buffer_inputs.count; n++) { - const auto& input = buffer_inputs.descs[n]; - for (uint32_t m = 0; m < input.element_count; m++) { - const auto& el = input.elements[m]; - const char* type_name = GetVertexFormatTypeName(el); - const auto& fetch = el.vtx_fetch; - uint32_t fetch_slot = fetch.const_index * 3 + fetch.const_index_sel; - Append("layout(location = %d) in %s vf%u_%d;\n", el_index, type_name, - fetch_slot, fetch.offset); - el_index++; - } - } - - // Vertex shader main() header. - Append("void processVertex(const in StateData state) {\n"); - - // Add temporaries for any registers we may use. - for (uint32_t n = 0; n < 64; n++) { - Append(" vec4 r%d = state.float_consts[%d];\n", n, n); - } - -#if FLOW_CONTROL - // Add temporary integer registers for loops that we may use. - // Each loop uses an address, counter, and constant. - // TODO(benvanik): Implement only for the used loops in the shader. - for (uint32_t n = 0; n < 32; n++) { - Append(" int i%d_cnt = 0;\n", n); - Append(" int i%d_addr = 0;\n", n); - } -#endif // FLOW_CONTROL - - Append(" vec4 t;\n"); - Append(" vec4 pv;\n"); // Previous Vector result. - Append(" float ps;\n"); // Previous Scalar result (used for RETAIN_PREV). - Append(" bool p = false;\n"); // Predicate temp, clause-local. - Append(" int a0 = 0;\n"); // Address register. - - // Execute blocks. - TranslateBlocks(vertex_shader); - - Append("}\n"); - return output_.to_string(); -} - -std::string GL4ShaderTranslator::TranslatePixelShader(GL4Shader* pixel_shader) { - Reset(pixel_shader); - - // We need an input VS to make decisions here. - // TODO(benvanik): do we need to pair VS/PS up and store the combination? - // If the same PS is used with different VS that output different amounts - // (and less than the number of required registers), things may die. - - // Pixel shader main() header. - Append("void processFragment(const in StateData state) {\n"); - - // Add temporary registers. - for (uint32_t n = 0; n < 64; n++) { - Append(" vec4 r%d = state.float_consts[%d];\n", n, n + 256); - } - Append(" vec4 t;\n"); - Append(" vec4 pv;\n"); // Previous Vector result. - Append(" float ps;\n"); // Previous Scalar result (used for RETAIN_PREV). - Append(" bool p = false;\n"); // Predicate temp, clause-local. - Append(" int a0 = 0;\n"); // Address register. - - // Bring registers local. - for (uint32_t n = 0; n < kMaxInterpolators; n++) { - Append(" r%d = vtx.o[%d];\n", n, n); - } - - // Execute blocks. - TranslateBlocks(pixel_shader); - - Append("}\n"); - return output_.to_string(); -} - -void GL4ShaderTranslator::AppendSrcReg(const ucode::instr_alu_t& op, int i) { - switch (i) { - case 1: { - int const_slot = 0; - AppendSrcReg(op, op.src1_reg, op.src1_sel, op.src1_swiz, - op.src1_reg_negate, const_slot); - break; - } - case 2: { - int const_slot = op.src1_sel ? 0 : 1; - AppendSrcReg(op, op.src2_reg, op.src2_sel, op.src2_swiz, - op.src2_reg_negate, const_slot); - break; - } - case 3: { - int const_slot = (op.src1_sel && op.src2_sel) ? 0 : 1; - AppendSrcReg(op, op.src3_reg, op.src3_sel, op.src3_swiz, - op.src3_reg_negate, const_slot); - break; - } - } -} - -void GL4ShaderTranslator::AppendSrcReg(const ucode::instr_alu_t& op, - uint32_t num, uint32_t type, - uint32_t swiz, uint32_t negate, - int const_slot) { - if (negate) { - Append("-"); - } - if (type) { - // Register. - if (num & 0x80) { - Append("abs("); - } - Append("r%u", num & 0x7F); - if (num & 0x80) { - Append(")"); - } - } else { - // Constant. - if (op.abs_constants) { - Append("abs("); - } - Append("state.float_consts["); -#if FLOW_CONTROL - // NOTE(dariosamo): Some games don't seem to take into account the relative - // a0 - // offset even when they should due to const_slot being a different value. - if (op.const_0_rel_abs || op.const_1_rel_abs) { -#else - if ((const_slot == 0 && op.const_0_rel_abs) || - (const_slot == 1 && op.const_1_rel_abs)) { -#endif - if (op.relative_addr) { - assert_true(num < 256); - Append("a0 + %u", is_pixel_shader() ? num + 256 : num); - } else { - Append("a0"); - } - } else { - assert_true(num < 256); - Append("%u", is_pixel_shader() ? num + 256 : num); - } - Append("]"); - if (op.abs_constants) { - Append(")"); - } - } - if (swiz) { - Append("."); - for (int i = 0; i < 4; i++) { - Append("%c", chan_names[(swiz + i) & 0x3]); - swiz >>= 2; - } - } -} - -void GL4ShaderTranslator::PrintSrcReg(uint32_t num, uint32_t type, - uint32_t swiz, uint32_t negate, - uint32_t abs_constants) { - if (negate) { - Append("-"); - } - if (type) { - if (num & 0x80) { - Append("|"); - } - Append("R%u", num & 0x7F); - if (num & 0x80) { - Append("|"); - } - } else { - if (abs_constants) { - Append("|"); - } - num += is_pixel_shader() ? 256 : 0; - Append("C%u", num); - if (abs_constants) { - Append("|"); - } - } - if (swiz) { - Append("."); - for (int i = 0; i < 4; i++) { - Append("%c", chan_names[(swiz + i) & 0x3]); - swiz >>= 2; - } - } -} - -void GL4ShaderTranslator::PrintVectorDstReg(const ucode::instr_alu_t& alu) { - Append("%s%u", alu.export_data ? "export" : "R", alu.vector_dest); - auto mask = alu.scalar_write_mask; - if (mask != 0xf) { - Append("."); - for (int i = 0; i < 4; i++) { - Append("%c", (mask & 0x1) ? chan_names[i] : '_'); - mask >>= 1; - } - } -} - -void GL4ShaderTranslator::PrintScalarDstReg(const ucode::instr_alu_t& alu) { - Append("%s%u", alu.export_data ? "export" : "R", - alu.export_data ? alu.vector_dest : alu.scalar_dest); - auto mask = alu.scalar_write_mask; - if (mask != 0xf) { - Append("."); - for (int i = 0; i < 4; i++) { - Append("%c", (mask & 0x1) ? chan_names[i] : '_'); - mask >>= 1; - } - } -} - -void GL4ShaderTranslator::PrintExportComment(uint32_t num) { - const char* name = nullptr; - switch (shader_type_) { - case ShaderType::kVertex: - switch (num) { - case 62: - name = "gl_Position"; - break; - case 63: - name = "gl_PointSize"; - break; - default: - name = "??"; - break; - } - break; - case ShaderType::kPixel: - switch (num) { - case 0: - name = "gl_FragColor"; - break; - default: - name = "??"; - break; - } - break; - } - /* if we had a symbol table here, we could look - * up the name of the varying.. - */ - if (name) { - Append("\t; %s", name); - } -} - -void GL4ShaderTranslator::BeginAppendVectorOp(const ucode::instr_alu_t& op) { - Append(" pv = ("); -} - -void GL4ShaderTranslator::AppendVectorOpSrcReg(const ucode::instr_alu_t& op, - int i) { - AppendSrcReg(op, i); -} - -void GL4ShaderTranslator::EndAppendVectorOp(const ucode::instr_alu_t& op, - uint32_t append_flags) { - Append(");\n"); - if (op.vector_clamp) { - Append(" pv = clamp(pv, 0.0, 1.0);\n"); - } - - // Special case exports. - // TODO(benvanik): special write that only chooses one field -- what field? x? - if (op.export_data) { - switch (shader_type_) { - case ShaderType::kVertex: - switch (op.vector_dest) { - case 63: - // Append(" gl_PointSize = pv.x;\n"); - assert_zero(op.vector_write_mask); - return; - } - break; - case ShaderType::kPixel: - switch (op.vector_dest) { - case 61: - // Append(" gl_FragDepth = pv.x;\n"); - assert_zero(op.vector_write_mask); - return; - } - break; - } - } - - if (op.export_data) { - // Export; this does some weird stuff to do special consts 0 and 1. - uint32_t write_mask = op.vector_write_mask; - uint32_t const_1_mask = op.scalar_write_mask; - for (int i = 0; i < 4; ++i, write_mask >>= 1, const_1_mask >>= 1) { - if (write_mask & 0x1) { - Append(" "); - AppendOpDestRegName(op, op.vector_dest); - Append(".%c = ", chan_names[i]); - if (const_1_mask & 0x1) { - // Special export of constant 1. - Append("1.0"); - } else { - // Normal source from calculated pv. - Append("pv.%c", chan_names[i]); - } - Append(";\n"); - } else if (op.scalar_dest_rel) { - // Special export of constant value 0. - Append(" "); - AppendOpDestRegName(op, op.vector_dest); - Append(".%c = 0.0;\n", chan_names[i]); - } - } - } else { - // Normal reg; just mask. - uint32_t write_mask = op.vector_write_mask; - for (int i = 0; i < 4; ++i, write_mask >>= 1) { - if (write_mask & 0x1) { - Append(" "); - AppendOpDestRegName(op, op.vector_dest); - Append(".%c = pv.%c;\n", chan_names[i], chan_names[i]); - } - } - } -} - -void GL4ShaderTranslator::BeginAppendScalarOp(const ucode::instr_alu_t& op) { - Append(" ps = ("); -} - -void GL4ShaderTranslator::AppendScalarOpSrcReg(const ucode::instr_alu_t& op, - int i) { - AppendSrcReg(op, i); -} - -void GL4ShaderTranslator::EndAppendScalarOp(const ucode::instr_alu_t& op, - uint32_t append_flags) { - Append(").x;\n"); - if (op.scalar_clamp) { - Append(" ps = clamp(ps, 0.0, 1.0);\n"); - } - - uint32_t dest_num; - uint32_t write_mask; - if (op.export_data) { - dest_num = op.vector_dest; - write_mask = op.scalar_write_mask & ~op.vector_write_mask; - } else { - dest_num = op.scalar_dest; - write_mask = op.scalar_write_mask; - } - - // Mask out certain fields. - for (int i = 0; i < 4; ++i, write_mask >>= 1) { - if (write_mask & 0x1) { - Append(" "); - AppendOpDestRegName(op, dest_num); - Append(".%c = ps;\n", chan_names[i]); - } - } -} - -void GL4ShaderTranslator::AppendOpDestRegName(const ucode::instr_alu_t& op, - uint32_t dest_num) { - if (!op.export_data) { - // Register. - // TODO(benvanik): relative? abs? etc - Append("r%u", dest_num); - } else { - // Export. - switch (shader_type_) { - case ShaderType::kVertex: - switch (dest_num) { - case 62: - Append("gl_Position"); - break; - case 63: - Append("gl_PointSize"); - break; - default: - // Varying. - Append("vtx.o[%u]", dest_num); - break; - } - break; - case ShaderType::kPixel: - switch (dest_num) { - case 0: - case 63: // ? masked? - Append("oC[0]"); - break; - case 1: - Append("oC[1]"); - break; - case 2: - Append("oC[2]"); - break; - case 3: - Append("oC[3]"); - break; - case 61: - Append("gl_FragDepth"); - break; - default: - // TODO(benvanik): other render targets? - assert_always(); - break; - } - break; - } - } -} - -bool GL4ShaderTranslator::TranslateALU_ADDv(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - AppendVectorOpSrcReg(alu, 1); - Append(" + "); - AppendVectorOpSrcReg(alu, 2); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_MULv(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - AppendVectorOpSrcReg(alu, 1); - Append(" * "); - AppendVectorOpSrcReg(alu, 2); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_MAXv(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - if (alu.src1_reg == alu.src2_reg && alu.src1_sel == alu.src2_sel && - alu.src1_swiz == alu.src2_swiz && - alu.src1_reg_negate == alu.src2_reg_negate) { - // This is a mov. - AppendVectorOpSrcReg(alu, 1); - } else { - Append("max("); - AppendVectorOpSrcReg(alu, 1); - Append(", "); - AppendVectorOpSrcReg(alu, 2); - Append(")"); - } - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_MINv(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - Append("min("); - AppendVectorOpSrcReg(alu, 1); - Append(", "); - AppendVectorOpSrcReg(alu, 2); - Append(")"); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_SETXXv(const ucode::instr_alu_t& alu, - const char* op) { - BeginAppendVectorOp(alu); - Append("vec4(("); - AppendVectorOpSrcReg(alu, 1); - Append(").x %s (", op); - AppendVectorOpSrcReg(alu, 2); - Append(").x ? 1.0 : 0.0, ("); - AppendVectorOpSrcReg(alu, 1); - Append(").y %s (", op); - AppendVectorOpSrcReg(alu, 2); - Append(").y ? 1.0 : 0.0, ("); - AppendVectorOpSrcReg(alu, 1); - Append(").z %s (", op); - AppendVectorOpSrcReg(alu, 2); - Append(").z ? 1.0 : 0.0, ("); - AppendVectorOpSrcReg(alu, 1); - Append(").w %s (", op); - AppendVectorOpSrcReg(alu, 2); - Append(").w ? 1.0 : 0.0)"); - EndAppendVectorOp(alu); - return true; -} -bool GL4ShaderTranslator::TranslateALU_SETEv(const ucode::instr_alu_t& alu) { - return TranslateALU_SETXXv(alu, "=="); -} -bool GL4ShaderTranslator::TranslateALU_SETGTv(const ucode::instr_alu_t& alu) { - return TranslateALU_SETXXv(alu, ">"); -} -bool GL4ShaderTranslator::TranslateALU_SETGTEv(const ucode::instr_alu_t& alu) { - return TranslateALU_SETXXv(alu, ">="); -} -bool GL4ShaderTranslator::TranslateALU_SETNEv(const ucode::instr_alu_t& alu) { - return TranslateALU_SETXXv(alu, "!="); -} - -bool GL4ShaderTranslator::TranslateALU_FRACv(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - Append("fract("); - AppendVectorOpSrcReg(alu, 1); - Append(")"); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_TRUNCv(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - Append("trunc("); - AppendVectorOpSrcReg(alu, 1); - Append(")"); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_FLOORv(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - Append("floor("); - AppendVectorOpSrcReg(alu, 1); - Append(")"); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_MULADDv(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - Append("("); - AppendVectorOpSrcReg(alu, 1); - Append(" * "); - AppendVectorOpSrcReg(alu, 2); - Append(") + "); - AppendVectorOpSrcReg(alu, 3); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_CNDXXv(const ucode::instr_alu_t& alu, - const char* op) { - BeginAppendVectorOp(alu); - // TODO(benvanik): check argument order - could be 3 as compare and 1 and 2 as - // values. - Append("vec4(("); - AppendVectorOpSrcReg(alu, 1); - Append(").x %s 0.0 ? (", op); - AppendVectorOpSrcReg(alu, 2); - Append(").x : ("); - AppendVectorOpSrcReg(alu, 3); - Append(").x, ("); - AppendVectorOpSrcReg(alu, 1); - Append(").y %s 0.0 ? (", op); - AppendVectorOpSrcReg(alu, 2); - Append(").y : ("); - AppendVectorOpSrcReg(alu, 3); - Append(").y, ("); - AppendVectorOpSrcReg(alu, 1); - Append(").z %s 0.0 ? (", op); - AppendVectorOpSrcReg(alu, 2); - Append(").z : ("); - AppendVectorOpSrcReg(alu, 3); - Append(").z, ("); - AppendVectorOpSrcReg(alu, 1); - Append(").w %s 0.0 ? (", op); - AppendVectorOpSrcReg(alu, 2); - Append(").w : ("); - AppendVectorOpSrcReg(alu, 3); - Append(").w)"); - EndAppendVectorOp(alu); - return true; -} -bool GL4ShaderTranslator::TranslateALU_CNDEv(const ucode::instr_alu_t& alu) { - return TranslateALU_CNDXXv(alu, "=="); -} -bool GL4ShaderTranslator::TranslateALU_CNDGTEv(const ucode::instr_alu_t& alu) { - return TranslateALU_CNDXXv(alu, ">="); -} -bool GL4ShaderTranslator::TranslateALU_CNDGTv(const ucode::instr_alu_t& alu) { - return TranslateALU_CNDXXv(alu, ">"); -} - -bool GL4ShaderTranslator::TranslateALU_DOT4v(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - Append("dot("); - AppendVectorOpSrcReg(alu, 1); - Append(", "); - AppendVectorOpSrcReg(alu, 2); - Append(").xxxx"); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_DOT3v(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - Append("dot(vec4("); - AppendVectorOpSrcReg(alu, 1); - Append(").xyz, vec4("); - AppendVectorOpSrcReg(alu, 2); - Append(").xyz).xxxx"); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_DOT2ADDv(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - Append("dot(vec4("); - AppendVectorOpSrcReg(alu, 1); - Append(").xy, vec4("); - AppendVectorOpSrcReg(alu, 2); - Append(").xy).xxxx + "); - AppendVectorOpSrcReg(alu, 3); - Append(".xxxx"); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_CUBEv(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - Append("cube("); - AppendVectorOpSrcReg(alu, 1); - Append(", "); - AppendVectorOpSrcReg(alu, 2); - Append(")"); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_MAX4v(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - Append("max("); - Append("max("); - Append("max("); - AppendVectorOpSrcReg(alu, 1); - Append(".x, "); - AppendVectorOpSrcReg(alu, 1); - Append(".y), "); - AppendVectorOpSrcReg(alu, 1); - Append(".z), "); - AppendVectorOpSrcReg(alu, 1); - Append(".w).xxxx"); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_PRED_SETXX_PUSHv( - const ucode::instr_alu_t& alu, const char* op) { - Append(" p = (("); - AppendVectorOpSrcReg(alu, 1); - Append(".w == 0.0) && ("); - AppendVectorOpSrcReg(alu, 2); - Append(".w %s 0.0)) ? true : false;\n", op); - BeginAppendVectorOp(alu); - Append("(("); - AppendVectorOpSrcReg(alu, 1); - Append(".x == 0.0) && ("); - AppendVectorOpSrcReg(alu, 2); - Append(".x %s 0.0)) ? vec4(0.0) : ", op); - AppendVectorOpSrcReg(alu, 1); - Append(" + vec4(1.0)"); - EndAppendVectorOp(alu); - return true; -} -bool GL4ShaderTranslator::TranslateALU_PRED_SETE_PUSHv( - const ucode::instr_alu_t& alu) { - return TranslateALU_PRED_SETXX_PUSHv(alu, "=="); -} -bool GL4ShaderTranslator::TranslateALU_PRED_SETNE_PUSHv( - const ucode::instr_alu_t& alu) { - return TranslateALU_PRED_SETXX_PUSHv(alu, "!="); -} -bool GL4ShaderTranslator::TranslateALU_PRED_SETGT_PUSHv( - const ucode::instr_alu_t& alu) { - return TranslateALU_PRED_SETXX_PUSHv(alu, ">"); -} -bool GL4ShaderTranslator::TranslateALU_PRED_SETGTE_PUSHv( - const ucode::instr_alu_t& alu) { - return TranslateALU_PRED_SETXX_PUSHv(alu, ">="); -} - -bool GL4ShaderTranslator::TranslateALU_DSTv(const ucode::instr_alu_t& alu) { - BeginAppendVectorOp(alu); - Append("vec4(1.0, ("); - AppendVectorOpSrcReg(alu, 1); - Append(".y * "); - AppendVectorOpSrcReg(alu, 2); - Append(".y), "); - AppendVectorOpSrcReg(alu, 1); - Append(".z, "); - AppendVectorOpSrcReg(alu, 2); - Append(".w)"); - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_MOVAv(const ucode::instr_alu_t& alu) { - Append(" a0 = clamp(int(floor("); - AppendVectorOpSrcReg(alu, 1); - Append(".w + 0.5)), -256, 255);\n"); - BeginAppendVectorOp(alu); - if (alu.src1_reg == alu.src2_reg && alu.src1_sel == alu.src2_sel && - alu.src1_swiz == alu.src2_swiz && - alu.src1_reg_negate == alu.src2_reg_negate) { - // This is a mov. - AppendVectorOpSrcReg(alu, 1); - } else { - Append("max("); - AppendVectorOpSrcReg(alu, 1); - Append(", "); - AppendVectorOpSrcReg(alu, 2); - Append(")"); - } - EndAppendVectorOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_ADDs(const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - AppendScalarOpSrcReg(alu, 3); - Append(".x + "); - AppendScalarOpSrcReg(alu, 3); - Append(".y"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_ADD_PREVs( - const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - AppendSrcReg(alu, 3); - Append(".x + ps"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_MULs(const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - AppendScalarOpSrcReg(alu, 3); - Append(".x * "); - AppendScalarOpSrcReg(alu, 3); - Append(".y"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_MUL_PREVs( - const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - AppendSrcReg(alu, 3); - Append(".x * ps"); - EndAppendScalarOp(alu); - return true; -} - -// ... - -bool GL4ShaderTranslator::TranslateALU_MAXs(const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) { - // This is a mov. - AppendScalarOpSrcReg(alu, 3); - } else { - Append("max("); - AppendScalarOpSrcReg(alu, 3); - Append(".x, "); - AppendScalarOpSrcReg(alu, 3); - Append(".y)"); - } - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_MINs(const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - Append("min("); - AppendScalarOpSrcReg(alu, 3); - Append(".x, "); - AppendScalarOpSrcReg(alu, 3); - Append(".y)"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_SETXXs(const ucode::instr_alu_t& alu, - const char* op) { - BeginAppendScalarOp(alu); - Append("("); - AppendScalarOpSrcReg(alu, 3); - Append(".x %s 0.0) ? 1.0 : 0.0", op); - EndAppendScalarOp(alu); - return true; -} -bool GL4ShaderTranslator::TranslateALU_SETEs(const ucode::instr_alu_t& alu) { - return TranslateALU_SETXXs(alu, "=="); -} -bool GL4ShaderTranslator::TranslateALU_SETGTs(const ucode::instr_alu_t& alu) { - return TranslateALU_SETXXs(alu, ">"); -} -bool GL4ShaderTranslator::TranslateALU_SETGTEs(const ucode::instr_alu_t& alu) { - return TranslateALU_SETXXs(alu, ">="); -} -bool GL4ShaderTranslator::TranslateALU_SETNEs(const ucode::instr_alu_t& alu) { - return TranslateALU_SETXXs(alu, "!="); -} - -bool GL4ShaderTranslator::TranslateALU_FRACs(const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - Append("fract("); - AppendScalarOpSrcReg(alu, 3); - Append(".x)"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_TRUNCs(const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - Append("trunc("); - AppendScalarOpSrcReg(alu, 3); - Append(".x)"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_FLOORs(const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - Append("floor("); - AppendScalarOpSrcReg(alu, 3); - Append(".x)"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_EXP_IEEE(const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - Append("pow(2.0, "); - AppendScalarOpSrcReg(alu, 3); - Append(".x)"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_LOG_CLAMP( - const ucode::instr_alu_t& alu) { - Append(" ps = log2("); - AppendScalarOpSrcReg(alu, 3); - Append(".x);"); - BeginAppendScalarOp(alu); - Append("isinf(ps) && ps < 0.0 ? -FLT_MAX : ps"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - Append("log2("); - AppendScalarOpSrcReg(alu, 3); - Append(".x)"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_RECIP_CLAMP( - const ucode::instr_alu_t& alu) { - // if result == -inf result = -flt_max - // if result == +inf result = flt_max - BeginAppendScalarOp(alu); - Append("1.0 / "); - AppendScalarOpSrcReg(alu, 3); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_RECIP_FF(const ucode::instr_alu_t& alu) { - // if result == -inf result = -zero - // if result == +inf result = zero - BeginAppendScalarOp(alu); - Append("1.0 / "); - AppendScalarOpSrcReg(alu, 3); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE( - const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - Append("1.0 / "); - AppendScalarOpSrcReg(alu, 3); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_RECIPSQ_CLAMP( - const ucode::instr_alu_t& alu) { - // if result == -inf result = -flt_max - // if result == +inf result = flt_max - BeginAppendScalarOp(alu); - Append("inversesqrt("); - AppendScalarOpSrcReg(alu, 3); - Append(".x)"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_RECIPSQ_FF( - const ucode::instr_alu_t& alu) { - // if result == -inf result = -zero - // if result == +inf result = zero - BeginAppendScalarOp(alu); - Append("inversesqrt("); - AppendScalarOpSrcReg(alu, 3); - Append(".x)"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_RECIPSQ_IEEE( - const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - Append("inversesqrt("); - AppendScalarOpSrcReg(alu, 3); - Append(".x)"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_MOVAs(const ucode::instr_alu_t& alu) { - Append(" a0 = clamp(int(floor("); - AppendScalarOpSrcReg(alu, 3); - Append(".x + 0.5)), -256, 255);\n"); - BeginAppendScalarOp(alu); - if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) { - // This is a mov. - AppendScalarOpSrcReg(alu, 3); - } else { - Append("max("); - AppendScalarOpSrcReg(alu, 3); - Append(".x, "); - AppendScalarOpSrcReg(alu, 3); - Append(".y)"); - } - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_MOVA_FLOORs( - const ucode::instr_alu_t& alu) { - Append(" a0 = clamp(int(floor("); - AppendScalarOpSrcReg(alu, 3); - Append(".x)), -256, 255);\n"); - BeginAppendScalarOp(alu); - if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) { - // This is a mov. - AppendScalarOpSrcReg(alu, 3); - } else { - Append("max("); - AppendScalarOpSrcReg(alu, 3); - Append(".x, "); - AppendScalarOpSrcReg(alu, 3); - Append(".y)"); - } - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_SUBs(const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - AppendScalarOpSrcReg(alu, 3); - Append(".x - "); - AppendScalarOpSrcReg(alu, 3); - Append(".y"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_SUB_PREVs( - const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - AppendScalarOpSrcReg(alu, 3); - Append(".x - ps"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_PRED_SETXXs( - const ucode::instr_alu_t& alu, const char* op) { - Append(" p = "); - AppendScalarOpSrcReg(alu, 3); - Append(".x %s 0.0;\n", op); - BeginAppendScalarOp(alu); - Append("(p ? 0.0 : 1.0).xxxx"); - EndAppendScalarOp(alu); - return true; -} -bool GL4ShaderTranslator::TranslateALU_PRED_SETEs( - const ucode::instr_alu_t& alu) { - return TranslateALU_PRED_SETXXs(alu, "=="); -} -bool GL4ShaderTranslator::TranslateALU_PRED_SETNEs( - const ucode::instr_alu_t& alu) { - return TranslateALU_PRED_SETXXs(alu, "!="); -} -bool GL4ShaderTranslator::TranslateALU_PRED_SETGTs( - const ucode::instr_alu_t& alu) { - return TranslateALU_PRED_SETXXs(alu, ">"); -} -bool GL4ShaderTranslator::TranslateALU_PRED_SETGTEs( - const ucode::instr_alu_t& alu) { - return TranslateALU_PRED_SETXXs(alu, ">="); -} - -bool GL4ShaderTranslator::TranslateALU_PRED_SET_INVs( - const ucode::instr_alu_t& alu) { - Append(" ps = "); - AppendScalarOpSrcReg(alu, 3); - Append(".x;\n"); - Append(" if (ps == 1.0) { p = true; ps = 0.0; }\n"); - Append(" else { p = false; ps = (ps == 0.0) ? 1.0 : ps; }\n"); - BeginAppendScalarOp(alu); - Append("ps"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_PRED_SET_POPs( - const ucode::instr_alu_t& alu) { - Append(" ps = "); - AppendScalarOpSrcReg(alu, 3); - Append(".x - 1.0;\n"); - Append(" if (ps <= 0.0) { p = true; ps = 0.0; }\n"); - Append(" else { p = false; }\n"); - BeginAppendScalarOp(alu); - Append("ps"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_SQRT_IEEE( - const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - Append("sqrt("); - AppendScalarOpSrcReg(alu, 3); - Append(".x)"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_MUL_CONST_0( - const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - uint32_t src3_swiz = alu.src3_swiz & ~0x3C; - uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; - uint32_t swiz_b = (src3_swiz & 0x3); - uint32_t reg2 = - (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); - // TODO(benvanik): const slot? - int const_slot = (alu.src1_sel || alu.src2_sel) ? 1 : 0; - AppendSrcReg(alu, alu.src3_reg, 0, 0, alu.src3_reg_negate, 0); - Append(".%c * ", chan_names[swiz_a]); - AppendSrcReg(alu, reg2, 1, 0, alu.src3_reg_negate, const_slot); - Append(".%c", chan_names[swiz_b]); - EndAppendScalarOp(alu); - return true; -} -bool GL4ShaderTranslator::TranslateALU_MUL_CONST_1( - const ucode::instr_alu_t& alu) { - return TranslateALU_MUL_CONST_0(alu); -} - -bool GL4ShaderTranslator::TranslateALU_ADD_CONST_0( - const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - uint32_t src3_swiz = alu.src3_swiz & ~0x3C; - uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; - uint32_t swiz_b = (src3_swiz & 0x3); - uint32_t reg2 = - (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); - // TODO(benvanik): const slot? - int const_slot = (alu.src1_sel || alu.src2_sel) ? 1 : 0; - AppendSrcReg(alu, alu.src3_reg, 0, 0, alu.src3_reg_negate, 0); - Append(".%c + ", chan_names[swiz_a]); - AppendSrcReg(alu, reg2, 1, 0, alu.src3_reg_negate, const_slot); - Append(".%c", chan_names[swiz_b]); - EndAppendScalarOp(alu); - return true; -} -bool GL4ShaderTranslator::TranslateALU_ADD_CONST_1( - const ucode::instr_alu_t& alu) { - return TranslateALU_ADD_CONST_0(alu); -} - -bool GL4ShaderTranslator::TranslateALU_SUB_CONST_0( - const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - uint32_t src3_swiz = alu.src3_swiz & ~0x3C; - uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; - uint32_t swiz_b = (src3_swiz & 0x3); - uint32_t reg2 = - (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); - // TODO(benvanik): const slot? - int const_slot = (alu.src1_sel || alu.src2_sel) ? 1 : 0; - AppendSrcReg(alu, alu.src3_reg, 0, 0, alu.src3_reg_negate, 0); - Append(".%c - ", chan_names[swiz_a]); - AppendSrcReg(alu, reg2, 1, 0, alu.src3_reg_negate, const_slot); - Append(".%c", chan_names[swiz_b]); - EndAppendScalarOp(alu); - return true; -} -bool GL4ShaderTranslator::TranslateALU_SUB_CONST_1( - const ucode::instr_alu_t& alu) { - // Handled as switch on scalar_opc. - return TranslateALU_SUB_CONST_0(alu); -} - -bool GL4ShaderTranslator::TranslateALU_SIN(const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - Append("sin("); - AppendScalarOpSrcReg(alu, 3); - Append(".x)"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_COS(const ucode::instr_alu_t& alu) { - BeginAppendScalarOp(alu); - Append("cos("); - AppendScalarOpSrcReg(alu, 3); - Append(".x)"); - EndAppendScalarOp(alu); - return true; -} - -bool GL4ShaderTranslator::TranslateALU_RETAIN_PREV( - const ucode::instr_alu_t& alu) { - // TODO(benvanik): figure out how this is used. - // It seems like vector writes to export regs will use this to write 1's to - // components (like w in position). - BeginAppendScalarOp(alu); - Append("ps"); - EndAppendScalarOp(alu); - return true; -} - -typedef bool (GL4ShaderTranslator::*TranslateFn)(const ucode::instr_alu_t& alu); -typedef struct { - uint32_t num_srcs; - const char* name; - TranslateFn fn; -} TranslateInfo; -#define ALU_INSTR(opc, num_srcs) \ - { num_srcs, #opc, nullptr } -#define ALU_INSTR_IMPL(opc, num_srcs) \ - { num_srcs, #opc, &GL4ShaderTranslator::TranslateALU_##opc } - -bool GL4ShaderTranslator::TranslateALU(const ucode::instr_alu_t* alu, - int sync) { - static TranslateInfo vector_alu_instrs[0x20] = { - ALU_INSTR_IMPL(ADDv, 2), // 0 - ALU_INSTR_IMPL(MULv, 2), // 1 - ALU_INSTR_IMPL(MAXv, 2), // 2 - ALU_INSTR_IMPL(MINv, 2), // 3 - ALU_INSTR_IMPL(SETEv, 2), // 4 - ALU_INSTR_IMPL(SETGTv, 2), // 5 - ALU_INSTR_IMPL(SETGTEv, 2), // 6 - ALU_INSTR_IMPL(SETNEv, 2), // 7 - ALU_INSTR_IMPL(FRACv, 1), // 8 - ALU_INSTR_IMPL(TRUNCv, 1), // 9 - ALU_INSTR_IMPL(FLOORv, 1), // 10 - ALU_INSTR_IMPL(MULADDv, 3), // 11 - ALU_INSTR_IMPL(CNDEv, 3), // 12 - ALU_INSTR_IMPL(CNDGTEv, 3), // 13 - ALU_INSTR_IMPL(CNDGTv, 3), // 14 - ALU_INSTR_IMPL(DOT4v, 2), // 15 - ALU_INSTR_IMPL(DOT3v, 2), // 16 - ALU_INSTR_IMPL(DOT2ADDv, 3), // 17 -- ??? - ALU_INSTR_IMPL(CUBEv, 2), // 18 - ALU_INSTR_IMPL(MAX4v, 1), // 19 - ALU_INSTR_IMPL(PRED_SETE_PUSHv, 2), // 20 - ALU_INSTR_IMPL(PRED_SETNE_PUSHv, 2), // 21 - ALU_INSTR_IMPL(PRED_SETGT_PUSHv, 2), // 22 - ALU_INSTR_IMPL(PRED_SETGTE_PUSHv, 2), // 23 - ALU_INSTR(KILLEv, 2), // 24 - ALU_INSTR(KILLGTv, 2), // 25 - ALU_INSTR(KILLGTEv, 2), // 26 - ALU_INSTR(KILLNEv, 2), // 27 - ALU_INSTR_IMPL(DSTv, 2), // 28 - ALU_INSTR_IMPL(MOVAv, 1), // 29 - }; - static TranslateInfo scalar_alu_instrs[0x40] = { - ALU_INSTR_IMPL(ADDs, 1), // 0 - ALU_INSTR_IMPL(ADD_PREVs, 1), // 1 - ALU_INSTR_IMPL(MULs, 1), // 2 - ALU_INSTR_IMPL(MUL_PREVs, 1), // 3 - ALU_INSTR(MUL_PREV2s, 1), // 4 - ALU_INSTR_IMPL(MAXs, 1), // 5 - ALU_INSTR_IMPL(MINs, 1), // 6 - ALU_INSTR_IMPL(SETEs, 1), // 7 - ALU_INSTR_IMPL(SETGTs, 1), // 8 - ALU_INSTR_IMPL(SETGTEs, 1), // 9 - ALU_INSTR_IMPL(SETNEs, 1), // 10 - ALU_INSTR_IMPL(FRACs, 1), // 11 - ALU_INSTR_IMPL(TRUNCs, 1), // 12 - ALU_INSTR_IMPL(FLOORs, 1), // 13 - ALU_INSTR_IMPL(EXP_IEEE, 1), // 14 - ALU_INSTR_IMPL(LOG_CLAMP, 1), // 15 - ALU_INSTR_IMPL(LOG_IEEE, 1), // 16 - ALU_INSTR_IMPL(RECIP_CLAMP, 1), // 17 - ALU_INSTR_IMPL(RECIP_FF, 1), // 18 - ALU_INSTR_IMPL(RECIP_IEEE, 1), // 19 - ALU_INSTR_IMPL(RECIPSQ_CLAMP, 1), // 20 - ALU_INSTR_IMPL(RECIPSQ_FF, 1), // 21 - ALU_INSTR_IMPL(RECIPSQ_IEEE, 1), // 22 - ALU_INSTR_IMPL(MOVAs, 1), // 23 - ALU_INSTR_IMPL(MOVA_FLOORs, 1), // 24 - ALU_INSTR_IMPL(SUBs, 1), // 25 - ALU_INSTR_IMPL(SUB_PREVs, 1), // 26 - ALU_INSTR_IMPL(PRED_SETEs, 1), // 27 - ALU_INSTR_IMPL(PRED_SETNEs, 1), // 28 - ALU_INSTR_IMPL(PRED_SETGTs, 1), // 29 - ALU_INSTR_IMPL(PRED_SETGTEs, 1), // 30 - ALU_INSTR_IMPL(PRED_SET_INVs, 1), // 31 - ALU_INSTR_IMPL(PRED_SET_POPs, 1), // 32 - ALU_INSTR(PRED_SET_CLRs, 1), // 33 - ALU_INSTR(PRED_SET_RESTOREs, 1), // 34 - ALU_INSTR(KILLEs, 1), // 35 - ALU_INSTR(KILLGTs, 1), // 36 - ALU_INSTR(KILLGTEs, 1), // 37 - ALU_INSTR(KILLNEs, 1), // 38 - ALU_INSTR(KILLONEs, 1), // 39 - ALU_INSTR_IMPL(SQRT_IEEE, 1), // 40 - {0, 0, false}, // - ALU_INSTR_IMPL(MUL_CONST_0, 2), // 42 - ALU_INSTR_IMPL(MUL_CONST_1, 2), // 43 - ALU_INSTR_IMPL(ADD_CONST_0, 2), // 44 - ALU_INSTR_IMPL(ADD_CONST_1, 2), // 45 - ALU_INSTR_IMPL(SUB_CONST_0, 2), // 46 - ALU_INSTR_IMPL(SUB_CONST_1, 2), // 47 - ALU_INSTR_IMPL(SIN, 1), // 48 - ALU_INSTR_IMPL(COS, 1), // 49 - ALU_INSTR_IMPL(RETAIN_PREV, 1), // 50 - }; -#undef ALU_INSTR -#undef ALU_INSTR_IMPL - - // If not an export we can fast kill if there is no write mask. - if (alu->vector_write_mask || (alu->export_data && alu->scalar_dest_rel)) { - // Disassemble vector op. - const auto& iv = vector_alu_instrs[alu->vector_opc]; - Append(" // %sALU:\t", sync ? "(S)" : " "); - Append("%s", iv.name); - if (alu->pred_select) { - // seems to work similar to conditional execution in ARM instruction - // set, so let's use a similar syntax for now: - Append(alu->pred_condition ? "EQ" : "NE"); - } - Append("\t"); - PrintVectorDstReg(*alu); - Append(" = "); - if (iv.num_srcs == 3) { - PrintSrcReg(alu->src3_reg, alu->src3_sel, alu->src3_swiz, - alu->src3_reg_negate, alu->abs_constants); - Append(", "); - } - PrintSrcReg(alu->src1_reg, alu->src1_sel, alu->src1_swiz, - alu->src1_reg_negate, alu->abs_constants); - if (iv.num_srcs > 1) { - Append(", "); - PrintSrcReg(alu->src2_reg, alu->src2_sel, alu->src2_swiz, - alu->src2_reg_negate, alu->abs_constants); - } - if (alu->vector_clamp) { - Append(" CLAMP"); - } - if (alu->export_data) { - PrintExportComment(alu->vector_dest); - } - Append("\n"); - - // Translate vector op. - if (iv.fn) { - if (!(this->*iv.fn)(*alu)) { - return false; - } - } else { - assert_always(); - Append(" // \n"); - } - } - - // TODO(benvanik): see if there's a better way to no-op this. - if (true) { // alu->scalar_write_mask || alu->export_data) { - // 2nd optional scalar op: - - // Disassemble scalar op. - const auto& is = scalar_alu_instrs[alu->scalar_opc]; - Append(" // "); - Append("\t"); - if (is.name) { - Append("\t \t%s\t", is.name); - } else { - Append("\t \tOP(%u)\t", alu->scalar_opc); - } - PrintScalarDstReg(*alu); - Append(" = "); - if (is.num_srcs == 2) { - // ADD_CONST_0 dest, [const], [reg] - uint32_t src3_swiz = alu->src3_swiz & ~0x3C; - uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; - uint32_t swiz_b = (src3_swiz & 0x3); - PrintSrcReg(alu->src3_reg, 0, 0, alu->src3_reg_negate, - alu->abs_constants); - Append(".%c", chan_names[swiz_a]); - Append(", "); - uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | - (alu->src3_sel << 1); - PrintSrcReg(reg2, 1, 0, alu->src3_reg_negate, alu->abs_constants); - Append(".%c", chan_names[swiz_b]); - } else { - PrintSrcReg(alu->src3_reg, alu->src3_sel, alu->src3_swiz, - alu->src3_reg_negate, alu->abs_constants); - } - if (alu->scalar_clamp) { - Append(" CLAMP"); - } - if (alu->export_data) { - PrintExportComment(alu->scalar_dest); - } - Append("\n"); - - // Translate scalar op. - if (is.fn) { - if (!(this->*is.fn)(*alu)) { - return false; - } - } else { - assert_always(); - Append(" // \n"); - } - } - - return true; -} - -void GL4ShaderTranslator::PrintDestFetch(uint32_t dst_reg, uint32_t dst_swiz) { - Append("\tR%u.", dst_reg); - for (int i = 0; i < 4; i++) { - Append("%c", chan_names[dst_swiz & 0x7]); - dst_swiz >>= 3; - } -} - -void GL4ShaderTranslator::AppendFetchDest(uint32_t dst_reg, uint32_t dst_swiz) { - Append("r%u.", dst_reg); - for (int i = 0; i < 4; i++) { - Append("%c", chan_names[dst_swiz & 0x7]); - dst_swiz >>= 3; - } -} - -void GL4ShaderTranslator::AppendPredPre(bool is_cond_cf, uint32_t cf_condition, - uint32_t pred_select, - uint32_t condition) { - if (pred_select && (!is_cond_cf || cf_condition != condition)) { - Append(" if (%cp) {\n", condition ? ' ' : '!'); - } -} - -void GL4ShaderTranslator::AppendPredPost(bool is_cond_cf, uint32_t cf_condition, - uint32_t pred_select, - uint32_t condition) { - if (pred_select && (!is_cond_cf || cf_condition != condition)) { - Append(" }\n"); - } -} - -bool GL4ShaderTranslator::TranslateBlocks(GL4Shader* shader) { - Append(" int pc = 0;\n"); - -#if FLOW_CONTROL - Append(" while (pc != 0xFFFF) {\n"); - Append(" switch (pc) {\n"); - - // Start here; fall through to begin. - Append(" case 0:\n"); -#endif // FLOW_CONTROL - - // Process all execution blocks. - ucode::instr_cf_t cfa; - ucode::instr_cf_t cfb; - auto data = shader->data(); - bool needs_break = false; - for (uint32_t idx = 0; idx < shader->dword_count(); idx += 3) { - uint32_t dword_0 = data[idx + 0]; - uint32_t dword_1 = data[idx + 1]; - uint32_t dword_2 = data[idx + 2]; - cfa.dword_0 = dword_0; - cfa.dword_1 = dword_1 & 0xFFFF; - cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16); - cfb.dword_1 = dword_2 >> 16; - if (cfa.opc == ALLOC) { - // ? - } else if (cfa.is_exec()) { - if (needs_break) { -#if FLOW_CONTROL - Append(" break;\n"); -#endif // FLOW_CONTROL - needs_break = false; - } - TranslateExec(cfa.exec); - needs_break = true; - } else if (cfa.opc == COND_JMP) { - TranslateJmp(cfa.jmp_call); - } -#if FLOW_CONTROL - else if (cfa.opc == LOOP_START) { - TranslateLoopStart(cfa.loop); - } -#endif // FLOW_CONTROL - - if (cfb.opc == ALLOC) { - // ? - } else if (cfb.is_exec()) { - if (needs_break) { -#if FLOW_CONTROL - Append(" break;\n"); -#endif // FLOW_CONTROL - needs_break = false; - } - needs_break = true; - TranslateExec(cfb.exec); - } else if (cfb.opc == COND_JMP) { - TranslateJmp(cfb.jmp_call); - } -#if FLOW_CONTROL - else if (cfb.opc == LOOP_END) { - TranslateLoopEnd(cfb.loop); - } -#endif // FLOW_CONTROL - - if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) { - break; - } - } - -#if FLOW_CONTROL - if (needs_break) { - Append(" break;\n"); - needs_break = false; - } - - // Fall-through and exit. - Append(" default:\n"); - Append(" pc = 0xFFFF;\n"); - Append(" break;\n"); - - Append("};\n"); - Append("}\n"); -#endif // FLOW_CONTROL - - return true; -} - -static const struct { - const char* name; -} cf_instructions[] = { -#define INSTR(opc, fxn) \ - { #opc } - INSTR(NOP, print_cf_nop), // - INSTR(EXEC, print_cf_exec), // - INSTR(EXEC_END, print_cf_exec), // - INSTR(COND_EXEC, print_cf_exec), // - INSTR(COND_EXEC_END, print_cf_exec), // - INSTR(COND_PRED_EXEC, print_cf_exec), // - INSTR(COND_PRED_EXEC_END, print_cf_exec), // - INSTR(LOOP_START, print_cf_loop), // - INSTR(LOOP_END, print_cf_loop), // - INSTR(COND_CALL, print_cf_jmp_call), // - INSTR(RETURN, print_cf_jmp_call), // - INSTR(COND_JMP, print_cf_jmp_call), // - INSTR(ALLOC, print_cf_alloc), // - INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec), // - INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec), // - INSTR(MARK_VS_FETCH_DONE, print_cf_nop), // ?? -#undef INSTR -}; - -bool GL4ShaderTranslator::TranslateExec(const ucode::instr_cf_exec_t& cf) { - Append(" // %s ADDR(0x%x) CNT(0x%x)", cf_instructions[cf.opc].name, - cf.address, cf.count); - if (cf.yeild) { - Append(" YIELD"); - } - uint8_t vc = cf.vc_hi | (cf.vc_lo << 2); - if (vc) { - Append(" VC(0x%x)", vc); - } - if (cf.is_cond_exec()) { - Append(" BOOL_ADDR(0x%x)", cf.bool_addr); - } - if (cf.address_mode == ABSOLUTE_ADDR) { - Append(" ABSOLUTE_ADDR"); - } - if (cf.is_cond_exec()) { - Append(" COND(%d)", cf.pred_condition); - } - Append("\n"); - -#if FLOW_CONTROL - Append(" case 0x%x:\n", cf.address); -#endif // FLOW_CONTROL - - if (cf.is_cond_exec()) { - if (cf.opc == COND_EXEC_PRED_CLEAN || cf.opc == COND_EXEC_PRED_CLEAN_END) { - Append(" p = (state.bool_consts[%d] & (1 << %d)) != 0;\n", - cf.bool_addr / 32, cf.bool_addr % 32); - } - Append(" if(%cp) {\n", cf.pred_condition ? ' ' : '!'); - } - - uint32_t sequence = cf.serialize; - for (uint32_t i = 0; i < cf.count; i++) { - uint32_t alu_off = (cf.address + i); - int sync = sequence & 0x2; - if (sequence & 0x1) { - const instr_fetch_t* fetch = - (const instr_fetch_t*)(dwords_ + alu_off * 3); - switch (fetch->opc) { - case VTX_FETCH: - AppendPredPre(cf.is_cond_exec(), cf.pred_condition, - fetch->vtx.pred_select, fetch->vtx.pred_condition); - if (!TranslateVertexFetch(&fetch->vtx, sync)) { - return false; - } - AppendPredPost(cf.is_cond_exec(), cf.pred_condition, - fetch->vtx.pred_select, fetch->vtx.pred_condition); - break; - case TEX_FETCH: - AppendPredPre(cf.is_cond_exec(), cf.pred_condition, - fetch->tex.pred_select, fetch->tex.pred_condition); - if (!TranslateTextureFetch(&fetch->tex, sync)) { - return false; - } - AppendPredPost(cf.is_cond_exec(), cf.pred_condition, - fetch->tex.pred_select, fetch->tex.pred_condition); - break; - case TEX_GET_BORDER_COLOR_FRAC: - case TEX_GET_COMP_TEX_LOD: - case TEX_GET_GRADIENTS: - case TEX_GET_WEIGHTS: - case TEX_SET_TEX_LOD: - case TEX_SET_GRADIENTS_H: - case TEX_SET_GRADIENTS_V: - default: - assert_always(); - break; - } - } else { - const ucode::instr_alu_t* alu = - (const ucode::instr_alu_t*)(dwords_ + alu_off * 3); - AppendPredPre(cf.is_cond_exec(), cf.pred_condition, alu->pred_select, - alu->pred_condition); - if (!TranslateALU(alu, sync)) { - return false; - } - AppendPredPost(cf.is_cond_exec(), cf.pred_condition, alu->pred_select, - alu->pred_condition); - } - sequence >>= 2; - } - - if (cf.is_cond_exec()) { - Append(" }\n"); - } - - if (cf.opc == EXEC_END) { - Append(" pc = 0xFFFF;\n"); - } else { - Append(" pc = 0x%x;\n", cf.address + cf.count); - } - - return true; -} - -bool GL4ShaderTranslator::TranslateJmp(const ucode::instr_cf_jmp_call_t& cf) { - assert_true(cf.direction == 0); - assert_true(cf.address_mode == 0); - Append(" // %s", cf_instructions[cf.opc].name); - Append(" ADDR(0x%x) DIR(%d)", cf.address, cf.direction); - if (cf.address_mode == ABSOLUTE_ADDR) { - Append(" ABSOLUTE_ADDR"); - } - if (cf.force_call) { - Append(" FORCE_CALL"); - } else { - if (!cf.predicated_jmp) { - Append(" BOOL_ADDR(0x%x)", cf.bool_addr); - } - Append(" COND(%d)", cf.condition); - } - Append("\n"); - - if (!cf.force_call) { - if (!cf.predicated_jmp) { - Append(" p = (state.bool_consts[%d] & (1 << %d)) != 0;\n", - cf.bool_addr / 32, cf.bool_addr % 32); - } - Append(" if(%cp) {\n", cf.condition ? ' ' : '!'); - } - if (cf.address_mode == ABSOLUTE_ADDR) { - Append(" pc = 0x%x;\n", cf.address); - } else { - Append(" pc = pc + 0x%x;\n", cf.address); - } - if (!cf.force_call) { -#if FLOW_CONTROL - Append(" break;\n"); -#endif // FLOW_CONTROL - Append(" }\n"); - } - - return true; -} - -bool GL4ShaderTranslator::TranslateLoopStart(const ucode::instr_cf_loop_t& cf) { - Append(" // %s", cf_instructions[cf.opc].name); - Append(" ADDR(0x%x) LOOP ID(%d)", cf.address, cf.loop_id); - if (cf.address_mode == ABSOLUTE_ADDR) { - Append(" ABSOLUTE_ADDR"); - } - Append("\n"); - Append(" i%d_addr = pc;\n", cf.loop_id); - Append(" i%d_cnt = 0;\n", cf.loop_id); - return true; -} - -bool GL4ShaderTranslator::TranslateLoopEnd(const ucode::instr_cf_loop_t& cf) { - Append(" // %s", cf_instructions[cf.opc].name); - Append(" ADDR(0x%x) LOOP ID(%d)\n", cf.address, cf.loop_id); - Append(" i%d_cnt = i%d_cnt + 1;\n", cf.loop_id, cf.loop_id); - Append(" pc = (i%d_cnt < state.loop_consts[%d]) ? i%d_addr : pc;\n", - cf.loop_id, cf.loop_id, cf.loop_id); - return true; -} - -bool GL4ShaderTranslator::TranslateVertexFetch( - const ucode::instr_fetch_vtx_t* vtx, int sync) { - static const struct { - const char* name; - } fetch_types[0xff] = { -#define TYPE(id) \ - { #id } - TYPE(FMT_1_REVERSE), // 0 - {0}, - TYPE(FMT_8), // 2 - {0}, - {0}, - {0}, - TYPE(FMT_8_8_8_8), // 6 - TYPE(FMT_2_10_10_10), // 7 - {0}, - {0}, - TYPE(FMT_8_8), // 10 - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - TYPE(FMT_16), // 24 - TYPE(FMT_16_16), // 25 - TYPE(FMT_16_16_16_16), // 26 - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - TYPE(FMT_32), // 33 - TYPE(FMT_32_32), // 34 - TYPE(FMT_32_32_32_32), // 35 - TYPE(FMT_32_FLOAT), // 36 - TYPE(FMT_32_32_FLOAT), // 37 - TYPE(FMT_32_32_32_32_FLOAT), // 38 - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - TYPE(FMT_32_32_32_FLOAT), // 57 -#undef TYPE - }; - - // Disassemble. - Append(" // %sFETCH:\t", sync ? "(S)" : " "); - if (vtx->pred_select) { - Append(vtx->pred_condition ? "EQ" : "NE"); - } - PrintDestFetch(vtx->dst_reg, vtx->dst_swiz); - Append(" = R%u.", vtx->src_reg); - Append("%c", chan_names[vtx->src_swiz & 0x3]); - if (fetch_types[vtx->format].name) { - Append(" %s", fetch_types[vtx->format].name); - } else { - Append(" TYPE(0x%x)", vtx->format); - } - Append(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED"); - if (!vtx->num_format_all) { - Append(" NORMALIZED"); - } - Append(" STRIDE(%u)", vtx->stride); - if (vtx->offset) { - Append(" OFFSET(%u)", vtx->offset); - } - Append(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel); - if (true) { - // XXX - Append(" src_reg_am=%u", vtx->src_reg_am); - Append(" dst_reg_am=%u", vtx->dst_reg_am); - Append(" num_format_all=%u", vtx->num_format_all); - Append(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all); - Append(" exp_adjust_all=%u", vtx->exp_adjust_all); - } - Append("\n"); - - // Translate. - Append(" "); - Append("r%u.xyzw", vtx->dst_reg); - Append(" = vec4("); - uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel; - // TODO(benvanik): detect xyzw = xyzw, etc. - // TODO(benvanik): detect and set as rN = vec4(samp.xyz, 1.0); / etc - // uint32_t component_count = - // GetVertexFormatComponentCount(static_cast(vtx->format)); - uint32_t dst_swiz = vtx->dst_swiz; - for (int i = 0; i < 4; i++) { - if ((dst_swiz & 0x7) == 4) { - Append("0.0"); - } else if ((dst_swiz & 0x7) == 5) { - Append("1.0"); - } else if ((dst_swiz & 0x7) == 6) { - // ? - Append("?"); - } else if ((dst_swiz & 0x7) == 7) { - Append("r%u.%c", vtx->dst_reg, chan_names[i]); - } else { - Append("vf%u_%d.%c", fetch_slot, vtx->offset, chan_names[dst_swiz & 0x3]); - } - if (i < 3) { - Append(", "); - } - dst_swiz >>= 3; - } - Append(");\n"); - return true; -} - -bool GL4ShaderTranslator::TranslateTextureFetch( - const ucode::instr_fetch_tex_t* tex, int sync) { - int src_component_count = 0; - const char* sampler_type; - switch (tex->dimension) { - case DIMENSION_1D: - src_component_count = 1; - sampler_type = "sampler1D"; - break; - case DIMENSION_2D: - src_component_count = 2; - sampler_type = "sampler2D"; - break; - case DIMENSION_3D: - src_component_count = 3; - sampler_type = "sampler3D"; - break; - case DIMENSION_CUBE: - src_component_count = 3; - sampler_type = "samplerCube"; - break; - default: - assert_unhandled_case(tex->dimension); - return false; - } - - // Disassemble. - static const char* filter[] = { - "POINT", // TEX_FILTER_POINT - "LINEAR", // TEX_FILTER_LINEAR - "BASEMAP", // TEX_FILTER_BASEMAP - }; - static const char* aniso_filter[] = { - "DISABLED", // ANISO_FILTER_DISABLED - "MAX_1_1", // ANISO_FILTER_MAX_1_1 - "MAX_2_1", // ANISO_FILTER_MAX_2_1 - "MAX_4_1", // ANISO_FILTER_MAX_4_1 - "MAX_8_1", // ANISO_FILTER_MAX_8_1 - "MAX_16_1", // ANISO_FILTER_MAX_16_1 - }; - static const char* arbitrary_filter[] = { - "2x4_SYM", // ARBITRARY_FILTER_2X4_SYM - "2x4_ASYM", // ARBITRARY_FILTER_2X4_ASYM - "4x2_SYM", // ARBITRARY_FILTER_4X2_SYM - "4x2_ASYM", // ARBITRARY_FILTER_4X2_ASYM - "4x4_SYM", // ARBITRARY_FILTER_4X4_SYM - "4x4_ASYM", // ARBITRARY_FILTER_4X4_ASYM - }; - static const char* sample_loc[] = { - "CENTROID", // SAMPLE_CENTROID - "CENTER", // SAMPLE_CENTER - }; - uint32_t src_swiz = tex->src_swiz; - Append(" // %sFETCH:\t", sync ? "(S)" : " "); - if (tex->pred_select) { - Append(tex->pred_condition ? "EQ" : "NE"); - } - PrintDestFetch(tex->dst_reg, tex->dst_swiz); - Append(" = R%u.", tex->src_reg); - for (int i = 0; i < src_component_count; i++) { - Append("%c", chan_names[src_swiz & 0x3]); - src_swiz >>= 2; - } - Append(" CONST(%u)", tex->const_idx); - if (tex->fetch_valid_only) { - Append(" VALID_ONLY"); - } - if (tex->tx_coord_denorm) { - Append(" DENORM"); - } - if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST) { - Append(" MAG(%s)", filter[tex->mag_filter]); - } - if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST) { - Append(" MIN(%s)", filter[tex->min_filter]); - } - if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST) { - Append(" MIP(%s)", filter[tex->mip_filter]); - } - if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST) { - Append(" ANISO(%s)", aniso_filter[tex->aniso_filter]); - } - if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST) { - Append(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]); - } - if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST) { - Append(" VOL_MAG(%s)", filter[tex->vol_mag_filter]); - } - if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST) { - Append(" VOL_MIN(%s)", filter[tex->vol_min_filter]); - } - if (!tex->use_comp_lod) { - Append(" LOD(%u)", tex->use_comp_lod); - Append(" LOD_BIAS(%u)", tex->lod_bias); - } - if (tex->use_reg_lod) { - Append(" REG_LOD(%u)", tex->use_reg_lod); - } - if (tex->use_reg_gradients) { - Append(" USE_REG_GRADIENTS"); - } - Append(" LOCATION(%s)", sample_loc[tex->sample_location]); - if (tex->offset_x || tex->offset_y || tex->offset_z) { - Append(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z); - } - Append("\n"); - - // Translate. - // TODO(benvanik): if sampler == null, set to invalid color. - Append(" if (state.texture_samplers[%d].x != 0) {\n", tex->const_idx & 0xF); - if (tex->dimension == DIMENSION_CUBE) { - Append(" t.xyz = r%u.", tex->src_reg); - src_swiz = tex->src_swiz; - for (int i = 0; i < src_component_count; i++) { - Append("%c", chan_names[src_swiz & 0x3]); - src_swiz >>= 2; - } - Append(";\n"); - // TODO(benvanik): undo CUBEv logic on t? (s,t,faceid) - Append(" t = texture(%s(state.texture_samplers[%d]), t.xyz);\n", - sampler_type, tex->const_idx & 0xF); - } else { - Append(" t = texture("); - Append("%s(state.texture_samplers[%d])", sampler_type, - tex->const_idx & 0xF); - Append(", r%u.", tex->src_reg); - src_swiz = tex->src_swiz; - for (int i = 0; i < src_component_count; i++) { - Append("%c", chan_names[src_swiz & 0x3]); - src_swiz >>= 2; - } - Append(");\n"); - } - Append(" } else {\n"); - Append(" t = vec4(r%u.", tex->src_reg); - src_swiz = tex->src_swiz; - for (int i = 0; i < src_component_count; i++) { - Append("%c", chan_names[src_swiz & 0x3]); - src_swiz >>= 2; - } - switch (src_component_count) { - case 1: - Append(", 0.0, 0.0, 1.0);\n"); - break; - case 2: - Append(", 0.0, 1.0);\n"); - break; - case 3: - Append(", 1.0);\n"); - break; - } - Append(" }\n"); - - Append(" r%u.xyzw = vec4(", tex->dst_reg); - uint32_t dst_swiz = tex->dst_swiz; - for (int i = 0; i < 4; i++) { - if (i) { - Append(", "); - } - if ((dst_swiz & 0x7) == 4) { - Append("0.0"); - } else if ((dst_swiz & 0x7) == 5) { - Append("1.0"); - } else if ((dst_swiz & 0x7) == 6) { - // ? - Append("?"); - assert_always(); - } else if ((dst_swiz & 0x7) == 7) { - Append("r%u.%c", tex->dst_reg, chan_names[i]); - } else { - Append("t.%c", chan_names[dst_swiz & 0x3]); - } - dst_swiz >>= 3; - } - Append(");\n"); - return true; -} - -} // namespace gl4 -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.h b/src/xenia/gpu/gl4/gl4_shader_translator.h deleted file mode 100644 index 1533e4062..000000000 --- a/src/xenia/gpu/gl4/gl4_shader_translator.h +++ /dev/null @@ -1,165 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_GL4_GL4_SHADER_TRANSLATOR_H_ -#define XENIA_GPU_GL4_GL4_SHADER_TRANSLATOR_H_ - -#include - -#include "xenia/base/string_buffer.h" -#include "xenia/gpu/gl4/gl4_shader.h" -#include "xenia/gpu/ucode.h" -#include "xenia/gpu/xenos.h" -#include "xenia/ui/gl/gl_context.h" - -namespace xe { -namespace gpu { -namespace gl4 { - -class GL4ShaderTranslator { - public: - static const uint32_t kMaxInterpolators = 16; - - GL4ShaderTranslator(); - ~GL4ShaderTranslator(); - - std::string TranslateVertexShader(GL4Shader* vertex_shader); - std::string TranslatePixelShader(GL4Shader* pixel_shader); - - protected: - ShaderType shader_type_; - const uint32_t* dwords_ = nullptr; - - static const int kOutputCapacity = 64 * 1024; - StringBuffer output_; - - bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; } - bool is_pixel_shader() const { return shader_type_ == ShaderType::kPixel; } - - void Reset(GL4Shader* shader); - - void AppendSrcReg(const ucode::instr_alu_t& op, int i); - void AppendSrcReg(const ucode::instr_alu_t& op, uint32_t num, uint32_t type, - uint32_t swiz, uint32_t negate, int const_slot); - void PrintSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate, - uint32_t abs); - void PrintVectorDstReg(const ucode::instr_alu_t& alu); - void PrintScalarDstReg(const ucode::instr_alu_t& alu); - void PrintExportComment(uint32_t num); - - bool TranslateALU(const ucode::instr_alu_t* alu, int sync); - bool TranslateALU_ADDv(const ucode::instr_alu_t& alu); - bool TranslateALU_MULv(const ucode::instr_alu_t& alu); - bool TranslateALU_MAXv(const ucode::instr_alu_t& alu); - bool TranslateALU_MINv(const ucode::instr_alu_t& alu); - bool TranslateALU_SETXXv(const ucode::instr_alu_t& alu, const char* op); - bool TranslateALU_SETEv(const ucode::instr_alu_t& alu); - bool TranslateALU_SETNEv(const ucode::instr_alu_t& alu); - bool TranslateALU_SETGTv(const ucode::instr_alu_t& alu); - bool TranslateALU_SETGTEv(const ucode::instr_alu_t& alu); - bool TranslateALU_FRACv(const ucode::instr_alu_t& alu); - bool TranslateALU_TRUNCv(const ucode::instr_alu_t& alu); - bool TranslateALU_FLOORv(const ucode::instr_alu_t& alu); - bool TranslateALU_MULADDv(const ucode::instr_alu_t& alu); - bool TranslateALU_CNDXXv(const ucode::instr_alu_t& alu, const char* op); - bool TranslateALU_CNDEv(const ucode::instr_alu_t& alu); - bool TranslateALU_CNDGTEv(const ucode::instr_alu_t& alu); - bool TranslateALU_CNDGTv(const ucode::instr_alu_t& alu); - bool TranslateALU_DOT4v(const ucode::instr_alu_t& alu); - bool TranslateALU_DOT3v(const ucode::instr_alu_t& alu); - bool TranslateALU_DOT2ADDv(const ucode::instr_alu_t& alu); - bool TranslateALU_CUBEv(const ucode::instr_alu_t& alu); - bool TranslateALU_MAX4v(const ucode::instr_alu_t& alu); - bool TranslateALU_PRED_SETXX_PUSHv(const ucode::instr_alu_t& alu, - const char* op); - bool TranslateALU_PRED_SETE_PUSHv(const ucode::instr_alu_t& alu); - bool TranslateALU_PRED_SETNE_PUSHv(const ucode::instr_alu_t& alu); - bool TranslateALU_PRED_SETGT_PUSHv(const ucode::instr_alu_t& alu); - bool TranslateALU_PRED_SETGTE_PUSHv(const ucode::instr_alu_t& alu); - bool TranslateALU_DSTv(const ucode::instr_alu_t& alu); - bool TranslateALU_MOVAv(const ucode::instr_alu_t& alu); - bool TranslateALU_ADDs(const ucode::instr_alu_t& alu); - bool TranslateALU_ADD_PREVs(const ucode::instr_alu_t& alu); - bool TranslateALU_MULs(const ucode::instr_alu_t& alu); - bool TranslateALU_MUL_PREVs(const ucode::instr_alu_t& alu); - // ... - bool TranslateALU_MAXs(const ucode::instr_alu_t& alu); - bool TranslateALU_MINs(const ucode::instr_alu_t& alu); - bool TranslateALU_SETXXs(const ucode::instr_alu_t& alu, const char* op); - bool TranslateALU_SETEs(const ucode::instr_alu_t& alu); - bool TranslateALU_SETGTs(const ucode::instr_alu_t& alu); - bool TranslateALU_SETGTEs(const ucode::instr_alu_t& alu); - bool TranslateALU_SETNEs(const ucode::instr_alu_t& alu); - bool TranslateALU_FRACs(const ucode::instr_alu_t& alu); - bool TranslateALU_TRUNCs(const ucode::instr_alu_t& alu); - bool TranslateALU_FLOORs(const ucode::instr_alu_t& alu); - bool TranslateALU_EXP_IEEE(const ucode::instr_alu_t& alu); - bool TranslateALU_LOG_CLAMP(const ucode::instr_alu_t& alu); - bool TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu); - bool TranslateALU_RECIP_CLAMP(const ucode::instr_alu_t& alu); - bool TranslateALU_RECIP_FF(const ucode::instr_alu_t& alu); - bool TranslateALU_RECIP_IEEE(const ucode::instr_alu_t& alu); - bool TranslateALU_RECIPSQ_CLAMP(const ucode::instr_alu_t& alu); - bool TranslateALU_RECIPSQ_FF(const ucode::instr_alu_t& alu); - bool TranslateALU_RECIPSQ_IEEE(const ucode::instr_alu_t& alu); - bool TranslateALU_MOVAs(const ucode::instr_alu_t& alu); - bool TranslateALU_MOVA_FLOORs(const ucode::instr_alu_t& alu); - bool TranslateALU_SUBs(const ucode::instr_alu_t& alu); - bool TranslateALU_SUB_PREVs(const ucode::instr_alu_t& alu); - bool TranslateALU_PRED_SETXXs(const ucode::instr_alu_t& alu, const char* op); - bool TranslateALU_PRED_SETEs(const ucode::instr_alu_t& alu); - bool TranslateALU_PRED_SETNEs(const ucode::instr_alu_t& alu); - bool TranslateALU_PRED_SETGTs(const ucode::instr_alu_t& alu); - bool TranslateALU_PRED_SETGTEs(const ucode::instr_alu_t& alu); - bool TranslateALU_PRED_SET_INVs(const ucode::instr_alu_t& alu); - bool TranslateALU_PRED_SET_POPs(const ucode::instr_alu_t& alu); - bool TranslateALU_SQRT_IEEE(const ucode::instr_alu_t& alu); - bool TranslateALU_MUL_CONST_0(const ucode::instr_alu_t& alu); - bool TranslateALU_MUL_CONST_1(const ucode::instr_alu_t& alu); - bool TranslateALU_ADD_CONST_0(const ucode::instr_alu_t& alu); - bool TranslateALU_ADD_CONST_1(const ucode::instr_alu_t& alu); - bool TranslateALU_SUB_CONST_0(const ucode::instr_alu_t& alu); - bool TranslateALU_SUB_CONST_1(const ucode::instr_alu_t& alu); - bool TranslateALU_SIN(const ucode::instr_alu_t& alu); - bool TranslateALU_COS(const ucode::instr_alu_t& alu); - bool TranslateALU_RETAIN_PREV(const ucode::instr_alu_t& alu); - - struct AppendFlag {}; - void BeginAppendVectorOp(const ucode::instr_alu_t& op); - void AppendVectorOpSrcReg(const ucode::instr_alu_t& op, int i); - void EndAppendVectorOp(const ucode::instr_alu_t& op, - uint32_t append_flags = 0); - void BeginAppendScalarOp(const ucode::instr_alu_t& op); - void AppendScalarOpSrcReg(const ucode::instr_alu_t& op, int i); - void EndAppendScalarOp(const ucode::instr_alu_t& op, - uint32_t append_flags = 0); - void AppendOpDestRegName(const ucode::instr_alu_t& op, uint32_t dest_num); - - void PrintDestFetch(uint32_t dst_reg, uint32_t dst_swiz); - void AppendFetchDest(uint32_t dst_reg, uint32_t dst_swiz); - - void AppendPredPre(bool is_cond_cf, uint32_t cf_condition, - uint32_t pred_select, uint32_t condition); - void AppendPredPost(bool is_cond_cf, uint32_t cf_condition, - uint32_t pred_select, uint32_t condition); - - bool TranslateBlocks(GL4Shader* shader); - bool TranslateExec(const ucode::instr_cf_exec_t& cf); - bool TranslateJmp(const ucode::instr_cf_jmp_call_t& cf); - bool TranslateLoopStart(const ucode::instr_cf_loop_t& cf); - bool TranslateLoopEnd(const ucode::instr_cf_loop_t& cf); - bool TranslateVertexFetch(const ucode::instr_fetch_vtx_t* vtx, int sync); - bool TranslateTextureFetch(const ucode::instr_fetch_tex_t* tex, int sync); -}; - -} // namespace gl4 -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_GL4_GL4_SHADER_TRANSLATOR_H_ diff --git a/src/xenia/gpu/gl4/ucode.h b/src/xenia/gpu/gl4/ucode.h deleted file mode 100644 index d8d6fb87e..000000000 --- a/src/xenia/gpu/gl4/ucode.h +++ /dev/null @@ -1,549 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_GL4_UCODE_H_ -#define XENIA_GPU_GL4_UCODE_H_ - -#include - -#include "xenia/base/assert.h" -#include "xenia/base/platform.h" -#include "xenia/gpu/xenos.h" - -// Closest AMD doc: -// http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf -// Microcode format differs, but most fields/enums are the same. - -// This code comes from the freedreno project: -// https://github.com/freedreno/freedreno/blob/master/includes/instr-a2xx.h -/* - * Copyright (c) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -namespace xe { -namespace gpu { -namespace gl4 { -namespace ucode { - -enum a2xx_sq_surfaceformat { - FMT_1_REVERSE = 0, - FMT_1 = 1, - FMT_8 = 2, - FMT_1_5_5_5 = 3, - FMT_5_6_5 = 4, - FMT_6_5_5 = 5, - FMT_8_8_8_8 = 6, - FMT_2_10_10_10 = 7, - FMT_8_A = 8, - FMT_8_B = 9, - FMT_8_8 = 10, - FMT_Cr_Y1_Cb_Y0 = 11, - FMT_Y1_Cr_Y0_Cb = 12, - FMT_5_5_5_1 = 13, - FMT_8_8_8_8_A = 14, - FMT_4_4_4_4 = 15, - FMT_10_11_11 = 16, - FMT_11_11_10 = 17, - FMT_DXT1 = 18, - FMT_DXT2_3 = 19, - FMT_DXT4_5 = 20, - FMT_24_8 = 22, - FMT_24_8_FLOAT = 23, - FMT_16 = 24, - FMT_16_16 = 25, - FMT_16_16_16_16 = 26, - FMT_16_EXPAND = 27, - FMT_16_16_EXPAND = 28, - FMT_16_16_16_16_EXPAND = 29, - FMT_16_FLOAT = 30, - FMT_16_16_FLOAT = 31, - FMT_16_16_16_16_FLOAT = 32, - FMT_32 = 33, - FMT_32_32 = 34, - FMT_32_32_32_32 = 35, - FMT_32_FLOAT = 36, - FMT_32_32_FLOAT = 37, - FMT_32_32_32_32_FLOAT = 38, - FMT_32_AS_8 = 39, - FMT_32_AS_8_8 = 40, - FMT_16_MPEG = 41, - FMT_16_16_MPEG = 42, - FMT_8_INTERLACED = 43, - FMT_32_AS_8_INTERLACED = 44, - FMT_32_AS_8_8_INTERLACED = 45, - FMT_16_INTERLACED = 46, - FMT_16_MPEG_INTERLACED = 47, - FMT_16_16_MPEG_INTERLACED = 48, - FMT_DXN = 49, - FMT_8_8_8_8_AS_16_16_16_16 = 50, - FMT_DXT1_AS_16_16_16_16 = 51, - FMT_DXT2_3_AS_16_16_16_16 = 52, - FMT_DXT4_5_AS_16_16_16_16 = 53, - FMT_2_10_10_10_AS_16_16_16_16 = 54, - FMT_10_11_11_AS_16_16_16_16 = 55, - FMT_11_11_10_AS_16_16_16_16 = 56, - FMT_32_32_32_FLOAT = 57, - FMT_DXT3A = 58, - FMT_DXT5A = 59, - FMT_CTX1 = 60, - FMT_DXT3A_AS_1_1_1_1 = 61, -}; - -/* - * ALU instructions: - */ - -typedef enum { - ADDs = 0, - ADD_PREVs = 1, - MULs = 2, - MUL_PREVs = 3, - MUL_PREV2s = 4, - MAXs = 5, - MINs = 6, - SETEs = 7, - SETGTs = 8, - SETGTEs = 9, - SETNEs = 10, - FRACs = 11, - TRUNCs = 12, - FLOORs = 13, - EXP_IEEE = 14, - LOG_CLAMP = 15, - LOG_IEEE = 16, - RECIP_CLAMP = 17, - RECIP_FF = 18, - RECIP_IEEE = 19, - RECIPSQ_CLAMP = 20, - RECIPSQ_FF = 21, - RECIPSQ_IEEE = 22, - MOVAs = 23, - MOVA_FLOORs = 24, - SUBs = 25, - SUB_PREVs = 26, - PRED_SETEs = 27, - PRED_SETNEs = 28, - PRED_SETGTs = 29, - PRED_SETGTEs = 30, - PRED_SET_INVs = 31, - PRED_SET_POPs = 32, - PRED_SET_CLRs = 33, - PRED_SET_RESTOREs = 34, - KILLEs = 35, - KILLGTs = 36, - KILLGTEs = 37, - KILLNEs = 38, - KILLONEs = 39, - SQRT_IEEE = 40, - MUL_CONST_0 = 42, - MUL_CONST_1 = 43, - ADD_CONST_0 = 44, - ADD_CONST_1 = 45, - SUB_CONST_0 = 46, - SUB_CONST_1 = 47, - SIN = 48, - COS = 49, - RETAIN_PREV = 50, -} instr_scalar_opc_t; - -typedef enum { - ADDv = 0, - MULv = 1, - MAXv = 2, - MINv = 3, - SETEv = 4, - SETGTv = 5, - SETGTEv = 6, - SETNEv = 7, - FRACv = 8, - TRUNCv = 9, - FLOORv = 10, - MULADDv = 11, - CNDEv = 12, - CNDGTEv = 13, - CNDGTv = 14, - DOT4v = 15, - DOT3v = 16, - DOT2ADDv = 17, - CUBEv = 18, - MAX4v = 19, - PRED_SETE_PUSHv = 20, - PRED_SETNE_PUSHv = 21, - PRED_SETGT_PUSHv = 22, - PRED_SETGTE_PUSHv = 23, - KILLEv = 24, - KILLGTv = 25, - KILLGTEv = 26, - KILLNEv = 27, - DSTv = 28, - MOVAv = 29, -} instr_vector_opc_t; - -XEPACKEDSTRUCT(instr_alu_t, { - /* dword0: */ - XEPACKEDSTRUCTANONYMOUS({ - uint32_t vector_dest : 6; - uint32_t vector_dest_rel : 1; - uint32_t abs_constants : 1; - uint32_t scalar_dest : 6; - uint32_t scalar_dest_rel : 1; - uint32_t export_data : 1; - uint32_t vector_write_mask : 4; - uint32_t scalar_write_mask : 4; - uint32_t vector_clamp : 1; - uint32_t scalar_clamp : 1; - uint32_t scalar_opc : 6; // instr_scalar_opc_t - }); - /* dword1: */ - XEPACKEDSTRUCTANONYMOUS({ - uint32_t src3_swiz : 8; - uint32_t src2_swiz : 8; - uint32_t src1_swiz : 8; - uint32_t src3_reg_negate : 1; - uint32_t src2_reg_negate : 1; - uint32_t src1_reg_negate : 1; - uint32_t pred_condition : 1; - uint32_t pred_select : 1; - uint32_t relative_addr : 1; - uint32_t const_1_rel_abs : 1; - uint32_t const_0_rel_abs : 1; - }); - /* dword2: */ - XEPACKEDSTRUCTANONYMOUS({ - uint32_t src3_reg : 8; - uint32_t src2_reg : 8; - uint32_t src1_reg : 8; - uint32_t vector_opc : 5; // instr_vector_opc_t - uint32_t src3_sel : 1; - uint32_t src2_sel : 1; - uint32_t src1_sel : 1; - }); -}); - -/* - * CF instructions: - */ - -typedef enum { - NOP = 0, - EXEC = 1, - EXEC_END = 2, - COND_EXEC = 3, - COND_EXEC_END = 4, - COND_PRED_EXEC = 5, - COND_PRED_EXEC_END = 6, - LOOP_START = 7, - LOOP_END = 8, - COND_CALL = 9, - RETURN = 10, - COND_JMP = 11, - ALLOC = 12, - COND_EXEC_PRED_CLEAN = 13, - COND_EXEC_PRED_CLEAN_END = 14, - MARK_VS_FETCH_DONE = 15, -} instr_cf_opc_t; - -typedef enum { - RELATIVE_ADDR = 0, - ABSOLUTE_ADDR = 1, -} instr_addr_mode_t; - -typedef enum { - SQ_NO_ALLOC = 0, - SQ_POSITION = 1, - SQ_PARAMETER_PIXEL = 2, - SQ_MEMORY = 3, -} instr_alloc_type_t; - -XEPACKEDSTRUCT(instr_cf_exec_t, { - XEPACKEDSTRUCTANONYMOUS({ - uint32_t address : 12; - uint32_t count : 3; - uint32_t yeild : 1; - uint32_t serialize : 12; - uint32_t vc_hi : 4; - }); - XEPACKEDSTRUCTANONYMOUS({ - uint32_t vc_lo : 2; /* vertex cache? */ - uint32_t bool_addr : 8; - uint32_t pred_condition : 1; - uint32_t address_mode : 1; // instr_addr_mode_t - uint32_t opc : 4; // instr_cf_opc_t - }); - bool is_cond_exec() const { - return (this->opc == COND_EXEC) || (this->opc == COND_EXEC_END) || - (this->opc == COND_PRED_EXEC) || (this->opc == COND_PRED_EXEC_END) || - (this->opc == COND_EXEC_PRED_CLEAN) || - (this->opc == COND_EXEC_PRED_CLEAN_END); - } -}); - -XEPACKEDSTRUCT(instr_cf_loop_t, { - XEPACKEDSTRUCTANONYMOUS({ - uint32_t address : 13; - uint32_t repeat : 1; - uint32_t reserved0 : 2; - uint32_t loop_id : 5; - uint32_t pred_break : 1; - uint32_t reserved1_hi : 10; - }); - XEPACKEDSTRUCTANONYMOUS({ - uint32_t reserved1_lo : 10; - uint32_t condition : 1; - uint32_t address_mode : 1; // instr_addr_mode_t - uint32_t opc : 4; // instr_cf_opc_t - }); -}); - -XEPACKEDSTRUCT(instr_cf_jmp_call_t, { - XEPACKEDSTRUCTANONYMOUS({ - uint32_t address : 13; - uint32_t force_call : 1; - uint32_t predicated_jmp : 1; - uint32_t reserved1_hi : 17; - }); - XEPACKEDSTRUCTANONYMOUS({ - uint32_t reserved1_lo : 1; - uint32_t direction : 1; - uint32_t bool_addr : 8; - uint32_t condition : 1; - uint32_t address_mode : 1; // instr_addr_mode_t - uint32_t opc : 4; // instr_cf_opc_t - }); -}); - -XEPACKEDSTRUCT(instr_cf_alloc_t, { - XEPACKEDSTRUCTANONYMOUS({ - uint32_t size : 3; - uint32_t reserved0_hi : 29; - }); - XEPACKEDSTRUCTANONYMOUS({ - uint32_t reserved0_lo : 8; - uint32_t no_serial : 1; - uint32_t buffer_select : 2; // instr_alloc_type_t - uint32_t alloc_mode : 1; - uint32_t opc : 4; // instr_cf_opc_t - }); -}); - -XEPACKEDUNION(instr_cf_t, { - instr_cf_exec_t exec; - instr_cf_loop_t loop; - instr_cf_jmp_call_t jmp_call; - instr_cf_alloc_t alloc; - XEPACKEDSTRUCTANONYMOUS({ - uint32_t: - 32; - uint32_t: - 12; - uint32_t opc : 4; // instr_cf_opc_t - }); - XEPACKEDSTRUCTANONYMOUS({ - uint32_t dword_0; - uint32_t dword_1; - }); - - bool is_exec() const { - return (this->opc == EXEC) || (this->opc == EXEC_END) || - (this->opc == COND_EXEC) || (this->opc == COND_EXEC_END) || - (this->opc == COND_PRED_EXEC) || (this->opc == COND_PRED_EXEC_END) || - (this->opc == COND_EXEC_PRED_CLEAN) || - (this->opc == COND_EXEC_PRED_CLEAN_END); - } - bool is_cond_exec() const { - return (this->opc == COND_EXEC) || (this->opc == COND_EXEC_END) || - (this->opc == COND_PRED_EXEC) || (this->opc == COND_PRED_EXEC_END) || - (this->opc == COND_EXEC_PRED_CLEAN) || - (this->opc == COND_EXEC_PRED_CLEAN_END); - } -}); - -/* - * FETCH instructions: - */ - -typedef enum { - VTX_FETCH = 0, - TEX_FETCH = 1, - TEX_GET_BORDER_COLOR_FRAC = 16, - TEX_GET_COMP_TEX_LOD = 17, - TEX_GET_GRADIENTS = 18, - TEX_GET_WEIGHTS = 19, - TEX_SET_TEX_LOD = 24, - TEX_SET_GRADIENTS_H = 25, - TEX_SET_GRADIENTS_V = 26, - TEX_RESERVED_4 = 27, -} instr_fetch_opc_t; - -typedef enum { - TEX_FILTER_POINT = 0, - TEX_FILTER_LINEAR = 1, - TEX_FILTER_BASEMAP = 2, /* only applicable for mip-filter */ - TEX_FILTER_USE_FETCH_CONST = 3, -} instr_tex_filter_t; - -typedef enum { - ANISO_FILTER_DISABLED = 0, - ANISO_FILTER_MAX_1_1 = 1, - ANISO_FILTER_MAX_2_1 = 2, - ANISO_FILTER_MAX_4_1 = 3, - ANISO_FILTER_MAX_8_1 = 4, - ANISO_FILTER_MAX_16_1 = 5, - ANISO_FILTER_USE_FETCH_CONST = 7, -} instr_aniso_filter_t; - -typedef enum { - ARBITRARY_FILTER_2X4_SYM = 0, - ARBITRARY_FILTER_2X4_ASYM = 1, - ARBITRARY_FILTER_4X2_SYM = 2, - ARBITRARY_FILTER_4X2_ASYM = 3, - ARBITRARY_FILTER_4X4_SYM = 4, - ARBITRARY_FILTER_4X4_ASYM = 5, - ARBITRARY_FILTER_USE_FETCH_CONST = 7, -} instr_arbitrary_filter_t; - -typedef enum { - SAMPLE_CENTROID = 0, - SAMPLE_CENTER = 1, -} instr_sample_loc_t; - -typedef enum { - DIMENSION_1D = 0, - DIMENSION_2D = 1, - DIMENSION_3D = 2, - DIMENSION_CUBE = 3, -} instr_dimension_t; - -typedef enum a2xx_sq_surfaceformat instr_surf_fmt_t; - -XEPACKEDSTRUCT(instr_fetch_tex_t, { - /* dword0: */ - XEPACKEDSTRUCTANONYMOUS({ - uint32_t opc : 5; // instr_fetch_opc_t - uint32_t src_reg : 6; - uint32_t src_reg_am : 1; - uint32_t dst_reg : 6; - uint32_t dst_reg_am : 1; - uint32_t fetch_valid_only : 1; - uint32_t const_idx : 5; - uint32_t tx_coord_denorm : 1; - uint32_t src_swiz : 6; // xyz - }); - /* dword1: */ - XEPACKEDSTRUCTANONYMOUS({ - uint32_t dst_swiz : 12; // xyzw - uint32_t mag_filter : 2; // instr_tex_filter_t - uint32_t min_filter : 2; // instr_tex_filter_t - uint32_t mip_filter : 2; // instr_tex_filter_t - uint32_t aniso_filter : 3; // instr_aniso_filter_t - uint32_t arbitrary_filter : 3; // instr_arbitrary_filter_t - uint32_t vol_mag_filter : 2; // instr_tex_filter_t - uint32_t vol_min_filter : 2; // instr_tex_filter_t - uint32_t use_comp_lod : 1; - uint32_t use_reg_lod : 1; - uint32_t unk : 1; - uint32_t pred_select : 1; - }); - /* dword2: */ - XEPACKEDSTRUCTANONYMOUS({ - uint32_t use_reg_gradients : 1; - uint32_t sample_location : 1; // instr_sample_loc_t - uint32_t lod_bias : 7; - uint32_t unused : 5; - uint32_t dimension : 2; // instr_dimension_t - uint32_t offset_x : 5; - uint32_t offset_y : 5; - uint32_t offset_z : 5; - uint32_t pred_condition : 1; - }); -}); - -XEPACKEDSTRUCT(instr_fetch_vtx_t, { - /* dword0: */ - XEPACKEDSTRUCTANONYMOUS({ - uint32_t opc : 5; // instr_fetch_opc_t - uint32_t src_reg : 6; - uint32_t src_reg_am : 1; - uint32_t dst_reg : 6; - uint32_t dst_reg_am : 1; - uint32_t must_be_one : 1; - uint32_t const_index : 5; - uint32_t const_index_sel : 2; - uint32_t reserved0 : 3; - uint32_t src_swiz : 2; - }); - /* dword1: */ - XEPACKEDSTRUCTANONYMOUS({ - uint32_t dst_swiz : 12; - uint32_t format_comp_all : 1; /* '1' for signed, '0' for unsigned? */ - uint32_t num_format_all : 1; /* '0' for normalized, '1' for unnormalized */ - uint32_t signed_rf_mode_all : 1; - uint32_t reserved1 : 1; - uint32_t format : 6; // instr_surf_fmt_t - uint32_t reserved2 : 1; - uint32_t exp_adjust_all : 7; - uint32_t reserved3 : 1; - uint32_t pred_select : 1; - }); - /* dword2: */ - XEPACKEDSTRUCTANONYMOUS({ - uint32_t stride : 8; - uint32_t offset : 23; - uint32_t pred_condition : 1; - }); -}); - -XEPACKEDUNION(instr_fetch_t, { - instr_fetch_tex_t tex; - instr_fetch_vtx_t vtx; - XEPACKEDSTRUCTANONYMOUS({ - /* dword0: */ - XEPACKEDSTRUCTANONYMOUS({ - uint32_t opc : 5; // instr_fetch_opc_t - uint32_t: - 27; - }); - /* dword1: */ - XEPACKEDSTRUCTANONYMOUS({ - uint32_t: - 32; - }); - /* dword2: */ - XEPACKEDSTRUCTANONYMOUS({ - uint32_t: - 32; - }); - }); -}); - -} // namespace ucode -} // namespace gl4 -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_GL4_UCODE_H_ diff --git a/src/xenia/gpu/gl4/ucode_disassembler.cc b/src/xenia/gpu/gl4/ucode_disassembler.cc deleted file mode 100644 index 2726f6594..000000000 --- a/src/xenia/gpu/gl4/ucode_disassembler.cc +++ /dev/null @@ -1,780 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -/* - * Copyright (c) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "xenia/gpu/gl4/ucode_disassembler.h" - -#include -#include -#include -#include - -#include "xenia/base/assert.h" -#include "xenia/base/string_buffer.h" -#include "xenia/gpu/gl4/ucode.h" - -namespace xe { -namespace gpu { -namespace gl4 { - -using namespace xe::gpu::gl4::ucode; -using namespace xe::gpu::xenos; - -static const char* levels[] = { - "", - "\t", - "\t\t", - "\t\t\t", - "\t\t\t\t", - "\t\t\t\t\t", - "\t\t\t\t\t\t", - "\t\t\t\t\t\t\t", - "\t\t\t\t\t\t\t\t", - "\t\t\t\t\t\t\t\t\t", - "x", - "x", - "x", - "x", - "x", - "x", -}; - -/* - * ALU instructions: - */ - -static const char chan_names[] = { - 'x', 'y', 'z', 'w', - /* these only apply to FETCH dst's: */ - '0', '1', '?', '_', -}; - -void print_srcreg(StringBuffer* output, uint32_t num, uint32_t type, - uint32_t swiz, uint32_t negate, uint32_t abs_constants, - bool const_rel, ShaderType shader_type) { - if (negate) { - output->Append('-'); - } - if (type) { - if (num & 0x80) { - output->Append("abs("); - } - output->AppendFormat("R%u", num & 0x7F); - if (num & 0x80) { - output->Append(')'); - } - } else { - if (abs_constants) { - output->Append('|'); - } - num += shader_type == ShaderType::kPixel ? 256 : 0; - - if (const_rel) { - output->AppendFormat("C[%u + a0]", num); - } else { - output->AppendFormat("C%u", num); - } - - if (abs_constants) { - output->Append('|'); - } - } - if (swiz) { - output->Append('.'); - for (int i = 0; i < 4; i++) { - output->Append(chan_names[(swiz + i) & 0x3]); - swiz >>= 2; - } - } -} - -void print_dstreg(StringBuffer* output, uint32_t num, uint32_t mask, - uint32_t dst_exp) { - output->AppendFormat("%s%u", dst_exp ? "export" : "R", num); - if (mask != 0xf) { - output->Append('.'); - for (int i = 0; i < 4; i++) { - output->Append((mask & 0x1) ? chan_names[i] : '_'); - mask >>= 1; - } - } -} - -void print_export_comment(StringBuffer* output, uint32_t num, ShaderType type) { - const char* name = NULL; - switch (type) { - case ShaderType::kVertex: - switch (num) { - case 62: - name = "gl_Position"; - break; - case 63: - name = "gl_PointSize"; - break; - } - break; - case ShaderType::kPixel: - switch (num) { - case 0: - name = "gl_FragColor"; - break; - } - break; - } - /* if we had a symbol table here, we could look - * up the name of the varying.. - */ - if (name) { - output->AppendFormat("\t; %s", name); - } -} - -#define INSTR(opc, num_srcs) \ - { num_srcs, #opc } -struct { - uint32_t num_srcs; - const char* name; -} vector_instructions[0x20] = - { - INSTR(ADDv, 2), // 0 - INSTR(MULv, 2), // 1 - INSTR(MAXv, 2), // 2 - INSTR(MINv, 2), // 3 - INSTR(SETEv, 2), // 4 - INSTR(SETGTv, 2), // 5 - INSTR(SETGTEv, 2), // 6 - INSTR(SETNEv, 2), // 7 - INSTR(FRACv, 1), // 8 - INSTR(TRUNCv, 1), // 9 - INSTR(FLOORv, 1), // 10 - INSTR(MULADDv, 3), // 11 - INSTR(CNDEv, 3), // 12 - INSTR(CNDGTEv, 3), // 13 - INSTR(CNDGTv, 3), // 14 - INSTR(DOT4v, 2), // 15 - INSTR(DOT3v, 2), // 16 - INSTR(DOT2ADDv, 3), // 17 -- ??? - INSTR(CUBEv, 2), // 18 - INSTR(MAX4v, 1), // 19 - INSTR(PRED_SETE_PUSHv, 2), // 20 - INSTR(PRED_SETNE_PUSHv, 2), // 21 - INSTR(PRED_SETGT_PUSHv, 2), // 22 - INSTR(PRED_SETGTE_PUSHv, 2), // 23 - INSTR(KILLEv, 2), // 24 - INSTR(KILLGTv, 2), // 25 - INSTR(KILLGTEv, 2), // 26 - INSTR(KILLNEv, 2), // 27 - INSTR(DSTv, 2), // 28 - INSTR(MOVAv, 1), // 29 -}, - scalar_instructions[0x40] = { - INSTR(ADDs, 1), // 0 - INSTR(ADD_PREVs, 1), // 1 - INSTR(MULs, 1), // 2 - INSTR(MUL_PREVs, 1), // 3 - INSTR(MUL_PREV2s, 1), // 4 - INSTR(MAXs, 1), // 5 - INSTR(MINs, 1), // 6 - INSTR(SETEs, 1), // 7 - INSTR(SETGTs, 1), // 8 - INSTR(SETGTEs, 1), // 9 - INSTR(SETNEs, 1), // 10 - INSTR(FRACs, 1), // 11 - INSTR(TRUNCs, 1), // 12 - INSTR(FLOORs, 1), // 13 - INSTR(EXP_IEEE, 1), // 14 - INSTR(LOG_CLAMP, 1), // 15 - INSTR(LOG_IEEE, 1), // 16 - INSTR(RECIP_CLAMP, 1), // 17 - INSTR(RECIP_FF, 1), // 18 - INSTR(RECIP_IEEE, 1), // 19 - INSTR(RECIPSQ_CLAMP, 1), // 20 - INSTR(RECIPSQ_FF, 1), // 21 - INSTR(RECIPSQ_IEEE, 1), // 22 - INSTR(MOVAs, 1), // 23 - INSTR(MOVA_FLOORs, 1), // 24 - INSTR(SUBs, 1), // 25 - INSTR(SUB_PREVs, 1), // 26 - INSTR(PRED_SETEs, 1), // 27 - INSTR(PRED_SETNEs, 1), // 28 - INSTR(PRED_SETGTs, 1), // 29 - INSTR(PRED_SETGTEs, 1), // 30 - INSTR(PRED_SET_INVs, 1), // 31 - INSTR(PRED_SET_POPs, 1), // 32 - INSTR(PRED_SET_CLRs, 1), // 33 - INSTR(PRED_SET_RESTOREs, 1), // 34 - INSTR(KILLEs, 1), // 35 - INSTR(KILLGTs, 1), // 36 - INSTR(KILLGTEs, 1), // 37 - INSTR(KILLNEs, 1), // 38 - INSTR(KILLONEs, 1), // 39 - INSTR(SQRT_IEEE, 1), // 40 - {0, 0}, // - INSTR(MUL_CONST_0, 2), // 42 - INSTR(MUL_CONST_1, 2), // 43 - INSTR(ADD_CONST_0, 2), // 44 - INSTR(ADD_CONST_1, 2), // 45 - INSTR(SUB_CONST_0, 2), // 46 - INSTR(SUB_CONST_1, 2), // 47 - INSTR(SIN, 1), // 48 - INSTR(COS, 1), // 49 - INSTR(RETAIN_PREV, 1), // 50 -#undef INSTR -}; - -int disasm_alu(StringBuffer* output, const uint32_t* dwords, uint32_t alu_off, - int level, int sync, ShaderType type) { - const instr_alu_t* alu = (const instr_alu_t*)dwords; - - output->Append(levels[level]); - output->AppendFormat("%02x: %08x %08x %08x\t", alu_off, dwords[0], dwords[1], - dwords[2]); - - output->AppendFormat(" %sALU:\t", sync ? "(S)" : " "); - - if (!alu->scalar_write_mask && !alu->vector_write_mask) { - output->Append(" \n"); - } - - if (alu->vector_write_mask) { - output->Append(vector_instructions[alu->vector_opc].name); - - if (alu->pred_select & 0x2) { - // seems to work similar to conditional execution in ARM instruction - // set, so let's use a similar syntax for now: - output->Append((alu->pred_select & 0x1) ? "EQ" : "NE"); - } - - output->Append("\t"); - - print_dstreg(output, alu->vector_dest, alu->vector_write_mask, - alu->export_data); - output->Append(" = "); - if (vector_instructions[alu->vector_opc].num_srcs == 3) { - print_srcreg(output, alu->src3_reg, alu->src3_sel, alu->src3_swiz, - alu->src3_reg_negate, alu->abs_constants, false, type); - output->Append(", "); - } - bool const_rel = alu->const_0_rel_abs && alu->relative_addr; - print_srcreg(output, alu->src1_reg, alu->src1_sel, alu->src1_swiz, - alu->src1_reg_negate, alu->abs_constants, const_rel, type); - if (vector_instructions[alu->vector_opc].num_srcs > 1) { - if (alu->src1_sel == 0) { - const_rel = alu->const_1_rel_abs && alu->relative_addr; - } - output->Append(", "); - print_srcreg(output, alu->src2_reg, alu->src2_sel, alu->src2_swiz, - alu->src2_reg_negate, alu->abs_constants, const_rel, type); - } - - if (alu->vector_clamp) { - output->Append(" CLAMP"); - } - if (alu->pred_select) { - output->AppendFormat(" COND(%d)", alu->pred_condition); - } - if (alu->export_data) { - print_export_comment(output, alu->vector_dest, type); - } - - output->Append('\n'); - } - - if (alu->scalar_write_mask || !alu->vector_write_mask) { - // 2nd optional scalar op: - - if (alu->vector_write_mask) { - output->Append(levels[level]); - output->AppendFormat(" \t\t\t\t\t\t \t"); - } - - if (scalar_instructions[alu->scalar_opc].name) { - output->AppendFormat("%s\t", scalar_instructions[alu->scalar_opc].name); - } else { - output->AppendFormat("OP(%u)\t", alu->scalar_opc); - } - - print_dstreg(output, alu->scalar_dest, alu->scalar_write_mask, - alu->export_data); - output->Append(" = "); - if (scalar_instructions[alu->scalar_opc].num_srcs == 2) { - // MUL/ADD/etc - // Clever, CONST_0 and CONST_1 are just an extra storage bit. - // ADD_CONST_0 dest, [const], [reg] - uint32_t src3_swiz = alu->src3_swiz & ~0x3C; - uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; - uint32_t swiz_b = (src3_swiz & 0x3); - print_srcreg(output, alu->src3_reg, 0, 0, alu->src3_reg_negate, - alu->abs_constants, false, type); - output->AppendFormat(".%c", chan_names[swiz_a]); - output->Append(", "); - uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | - (alu->src3_sel << 1); - print_srcreg(output, reg2, 1, 0, alu->src3_reg_negate, alu->abs_constants, - false, type); - output->AppendFormat(".%c", chan_names[swiz_b]); - } else { - print_srcreg(output, alu->src3_reg, alu->src3_sel, alu->src3_swiz, - alu->src3_reg_negate, alu->abs_constants, false, type); - } - if (alu->scalar_clamp) { - output->Append(" CLAMP"); - } - if (alu->export_data) { - print_export_comment(output, alu->scalar_dest, type); - } - output->Append('\n'); - } - - return 0; -} - -struct { - const char* name; -} fetch_types[0xff] = { -#define TYPE(id) \ - { #id } - TYPE(FMT_1_REVERSE), // 0 - {0}, - TYPE(FMT_8), // 2 - {0}, - {0}, - {0}, - TYPE(FMT_8_8_8_8), // 6 - TYPE(FMT_2_10_10_10), // 7 - {0}, - {0}, - TYPE(FMT_8_8), // 10 - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - TYPE(FMT_16), // 24 - TYPE(FMT_16_16), // 25 - TYPE(FMT_16_16_16_16), // 26 - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - TYPE(FMT_32), // 33 - TYPE(FMT_32_32), // 34 - TYPE(FMT_32_32_32_32), // 35 - TYPE(FMT_32_FLOAT), // 36 - TYPE(FMT_32_32_FLOAT), // 37 - TYPE(FMT_32_32_32_32_FLOAT), // 38 - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - TYPE(FMT_32_32_32_FLOAT), // 57 -#undef TYPE -}; - -void print_fetch_dst(StringBuffer* output, uint32_t dst_reg, - uint32_t dst_swiz) { - output->AppendFormat("\tR%u.", dst_reg); - for (int i = 0; i < 4; i++) { - output->Append(chan_names[dst_swiz & 0x7]); - dst_swiz >>= 3; - } -} - -void print_fetch_vtx(StringBuffer* output, const instr_fetch_t* fetch) { - const instr_fetch_vtx_t* vtx = &fetch->vtx; - - if (vtx->pred_select) { - // seems to work similar to conditional execution in ARM instruction - // set, so let's use a similar syntax for now: - output->Append(vtx->pred_condition ? "EQ" : "NE"); - } - - print_fetch_dst(output, vtx->dst_reg, vtx->dst_swiz); - output->AppendFormat(" = R%u.", vtx->src_reg); - output->Append(chan_names[vtx->src_swiz & 0x3]); - if (fetch_types[vtx->format].name) { - output->AppendFormat(" %s", fetch_types[vtx->format].name); - } else { - output->AppendFormat(" TYPE(0x%x)", vtx->format); - } - output->AppendFormat(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED"); - if (!vtx->num_format_all) { - output->Append(" NORMALIZED"); - } - output->AppendFormat(" STRIDE(%u)", vtx->stride); - if (vtx->offset) { - output->AppendFormat(" OFFSET(%u)", vtx->offset); - } - output->AppendFormat(" CONST(%u, %u)", vtx->const_index, - vtx->const_index_sel); - if (vtx->pred_select) { - output->AppendFormat(" COND(%d)", vtx->pred_condition); - } - if (1) { - // XXX - output->AppendFormat(" src_reg_am=%u", vtx->src_reg_am); - output->AppendFormat(" dst_reg_am=%u", vtx->dst_reg_am); - output->AppendFormat(" num_format_all=%u", vtx->num_format_all); - output->AppendFormat(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all); - output->AppendFormat(" exp_adjust_all=%u", vtx->exp_adjust_all); - } -} - -void print_fetch_tex(StringBuffer* output, const instr_fetch_t* fetch) { - static const char* filter[] = { - "POINT", // TEX_FILTER_POINT - "LINEAR", // TEX_FILTER_LINEAR - "BASEMAP", // TEX_FILTER_BASEMAP - }; - static const char* aniso_filter[] = { - "DISABLED", // ANISO_FILTER_DISABLED - "MAX_1_1", // ANISO_FILTER_MAX_1_1 - "MAX_2_1", // ANISO_FILTER_MAX_2_1 - "MAX_4_1", // ANISO_FILTER_MAX_4_1 - "MAX_8_1", // ANISO_FILTER_MAX_8_1 - "MAX_16_1", // ANISO_FILTER_MAX_16_1 - }; - static const char* arbitrary_filter[] = { - "2x4_SYM", // ARBITRARY_FILTER_2X4_SYM - "2x4_ASYM", // ARBITRARY_FILTER_2X4_ASYM - "4x2_SYM", // ARBITRARY_FILTER_4X2_SYM - "4x2_ASYM", // ARBITRARY_FILTER_4X2_ASYM - "4x4_SYM", // ARBITRARY_FILTER_4X4_SYM - "4x4_ASYM", // ARBITRARY_FILTER_4X4_ASYM - }; - static const char* sample_loc[] = { - "CENTROID", // SAMPLE_CENTROID - "CENTER", // SAMPLE_CENTER - }; - const instr_fetch_tex_t* tex = &fetch->tex; - uint32_t src_swiz = tex->src_swiz; - - if (tex->pred_select) { - // seems to work similar to conditional execution in ARM instruction - // set, so let's use a similar syntax for now: - output->Append(tex->pred_condition ? "EQ" : "NE"); - } - - print_fetch_dst(output, tex->dst_reg, tex->dst_swiz); - output->AppendFormat(" = R%u.", tex->src_reg); - for (int i = 0; i < 3; i++) { - output->Append(chan_names[src_swiz & 0x3]); - src_swiz >>= 2; - } - output->AppendFormat(" CONST(%u)", tex->const_idx); - if (tex->fetch_valid_only) { - output->Append(" VALID_ONLY"); - } - if (tex->tx_coord_denorm) { - output->Append(" DENORM"); - } - if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST) { - output->AppendFormat(" MAG(%s)", filter[tex->mag_filter]); - } - if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST) { - output->AppendFormat(" MIN(%s)", filter[tex->min_filter]); - } - if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST) { - output->AppendFormat(" MIP(%s)", filter[tex->mip_filter]); - } - if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST) { - output->AppendFormat(" ANISO(%s)", aniso_filter[tex->aniso_filter]); - } - if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST) { - output->AppendFormat(" ARBITRARY(%s)", - arbitrary_filter[tex->arbitrary_filter]); - } - if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST) { - output->AppendFormat(" VOL_MAG(%s)", filter[tex->vol_mag_filter]); - } - if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST) { - output->AppendFormat(" VOL_MIN(%s)", filter[tex->vol_min_filter]); - } - if (!tex->use_comp_lod) { - output->AppendFormat(" LOD(%u)", tex->use_comp_lod); - output->AppendFormat(" LOD_BIAS(%u)", tex->lod_bias); - } - if (tex->use_reg_lod) { - output->AppendFormat(" REG_LOD(%u)", tex->use_reg_lod); - } - if (tex->use_reg_gradients) { - output->Append(" USE_REG_GRADIENTS"); - } - output->AppendFormat(" LOCATION(%s)", sample_loc[tex->sample_location]); - if (tex->offset_x || tex->offset_y || tex->offset_z) { - output->AppendFormat(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, - tex->offset_z); - } - if (tex->pred_select) { - output->AppendFormat(" COND(%d)", tex->pred_condition); - } -} - -struct { - const char* name; - void (*fxn)(StringBuffer* output, const instr_fetch_t* cf); -} fetch_instructions[] = { -#define INSTR(opc, name, fxn) \ - { name, fxn } - INSTR(VTX_FETCH, "VERTEX", print_fetch_vtx), // 0 - INSTR(TEX_FETCH, "SAMPLE", print_fetch_tex), // 1 - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - INSTR(TEX_GET_BORDER_COLOR_FRAC, "?", print_fetch_tex), // 16 - INSTR(TEX_GET_COMP_TEX_LOD, "?", print_fetch_tex), // 17 - INSTR(TEX_GET_GRADIENTS, "?", print_fetch_tex), // 18 - INSTR(TEX_GET_WEIGHTS, "?", print_fetch_tex), // 19 - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - INSTR(TEX_SET_TEX_LOD, "SET_TEX_LOD", print_fetch_tex), // 24 - INSTR(TEX_SET_GRADIENTS_H, "?", print_fetch_tex), // 25 - INSTR(TEX_SET_GRADIENTS_V, "?", print_fetch_tex), // 26 - INSTR(TEX_RESERVED_4, "?", print_fetch_tex), // 27 -#undef INSTR -}; - -int disasm_fetch(StringBuffer* output, const uint32_t* dwords, uint32_t alu_off, - int level, int sync) { - const instr_fetch_t* fetch = (const instr_fetch_t*)dwords; - - output->Append(levels[level]); - output->AppendFormat("%02x: %08x %08x %08x\t", alu_off, dwords[0], dwords[1], - dwords[2]); - - output->AppendFormat(" %sFETCH:\t", sync ? "(S)" : " "); - if (fetch_instructions[fetch->opc].fxn) { - output->Append(fetch_instructions[fetch->opc].name); - fetch_instructions[fetch->opc].fxn(output, fetch); - } else { - output->Append("???"); - } - output->Append('\n'); - - return 0; -} - -void print_cf_nop(StringBuffer* output, const instr_cf_t* cf) {} - -void print_cf_exec(StringBuffer* output, const instr_cf_t* cf) { - output->AppendFormat(" ADDR(0x%x) CNT(0x%x)", cf->exec.address, - cf->exec.count); - if (cf->exec.yeild) { - output->Append(" YIELD"); - } - uint8_t vc = uint8_t(cf->exec.vc_hi | (cf->exec.vc_lo << 2)); - if (vc) { - output->AppendFormat(" VC(0x%x)", vc); - } - if (cf->exec.bool_addr) { - output->AppendFormat(" BOOL_ADDR(0x%x)", cf->exec.bool_addr); - } - if (cf->exec.address_mode == ABSOLUTE_ADDR) { - output->Append(" ABSOLUTE_ADDR"); - } - if (cf->is_cond_exec()) { - output->AppendFormat(" COND(%d)", cf->exec.pred_condition); - } -} - -void print_cf_loop(StringBuffer* output, const instr_cf_t* cf) { - output->AppendFormat(" ADDR(0x%x) LOOP_ID(%d)", cf->loop.address, - cf->loop.loop_id); - if (cf->loop.address_mode == ABSOLUTE_ADDR) { - output->Append(" ABSOLUTE_ADDR"); - } -} - -void print_cf_jmp_call(StringBuffer* output, const instr_cf_t* cf) { - output->AppendFormat(" ADDR(0x%x) DIR(%d)", cf->jmp_call.address, - cf->jmp_call.direction); - if (cf->jmp_call.force_call) { - output->Append(" FORCE_CALL"); - } - if (cf->jmp_call.predicated_jmp) { - output->AppendFormat(" COND(%d)", cf->jmp_call.condition); - } - if (cf->jmp_call.bool_addr) { - output->AppendFormat(" BOOL_ADDR(0x%x)", cf->jmp_call.bool_addr); - } - if (cf->jmp_call.address_mode == ABSOLUTE_ADDR) { - output->Append(" ABSOLUTE_ADDR"); - } -} - -void print_cf_alloc(StringBuffer* output, const instr_cf_t* cf) { - static const char* bufname[] = { - "NO ALLOC", // SQ_NO_ALLOC - "POSITION", // SQ_POSITION - "PARAM/PIXEL", // SQ_PARAMETER_PIXEL - "MEMORY", // SQ_MEMORY - }; - output->AppendFormat(" %s SIZE(0x%x)", bufname[cf->alloc.buffer_select], - cf->alloc.size); - if (cf->alloc.no_serial) { - output->Append(" NO_SERIAL"); - } - if (cf->alloc.alloc_mode) { - // ??? - output->Append(" ALLOC_MODE"); - } -} - -struct { - const char* name; - void (*fxn)(StringBuffer* output, const instr_cf_t* cf); -} cf_instructions[] = { -#define INSTR(opc, fxn) \ - { #opc, fxn } - INSTR(NOP, print_cf_nop), // - INSTR(EXEC, print_cf_exec), // - INSTR(EXEC_END, print_cf_exec), // - INSTR(COND_EXEC, print_cf_exec), // - INSTR(COND_EXEC_END, print_cf_exec), // - INSTR(COND_PRED_EXEC, print_cf_exec), // - INSTR(COND_PRED_EXEC_END, print_cf_exec), // - INSTR(LOOP_START, print_cf_loop), // - INSTR(LOOP_END, print_cf_loop), // - INSTR(COND_CALL, print_cf_jmp_call), // - INSTR(RETURN, print_cf_jmp_call), // - INSTR(COND_JMP, print_cf_jmp_call), // - INSTR(ALLOC, print_cf_alloc), // - INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec), // - INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec), // - INSTR(MARK_VS_FETCH_DONE, print_cf_nop), // ?? -#undef INSTR -}; - -static void print_cf(StringBuffer* output, const instr_cf_t* cf, int level) { - output->Append(levels[level]); - - auto words = reinterpret_cast(cf); - output->AppendFormat(" %04x %04x %04x \t", words[0], words[1], - words[2]); - - output->AppendFormat(cf_instructions[cf->opc].name); - cf_instructions[cf->opc].fxn(output, cf); - output->Append('\n'); -} - -/* - * The adreno shader microcode consists of two parts: - * 1) A CF (control-flow) program, at the header of the compiled shader, - * which refers to ALU/FETCH instructions that follow it by address. - * 2) ALU and FETCH instructions - */ -void disasm_exec(StringBuffer* output, const uint32_t* dwords, - size_t dword_count, int level, ShaderType type, - const instr_cf_t* cf) { - uint32_t sequence = cf->exec.serialize; - for (uint32_t i = 0; i < cf->exec.count; i++) { - uint32_t alu_off = (cf->exec.address + i); - if (sequence & 0x1) { - disasm_fetch(output, dwords + alu_off * 3, alu_off, level, - sequence & 0x2); - } else { - disasm_alu(output, dwords + alu_off * 3, alu_off, level, sequence & 0x2, - type); - } - sequence >>= 2; - } -} - -std::string DisassembleShader(ShaderType type, const uint32_t* dwords, - size_t dword_count) { - StringBuffer string_buffer(256 * 1024); - - instr_cf_t cfa; - instr_cf_t cfb; - for (int idx = 0; idx < dword_count; idx += 3) { - uint32_t dword_0 = dwords[idx + 0]; - uint32_t dword_1 = dwords[idx + 1]; - uint32_t dword_2 = dwords[idx + 2]; - cfa.dword_0 = dword_0; - cfa.dword_1 = dword_1 & 0xFFFF; - cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16); - cfb.dword_1 = dword_2 >> 16; - print_cf(&string_buffer, &cfa, 0); - if (cfa.is_exec()) { - disasm_exec(&string_buffer, dwords, dword_count, 0, type, &cfa); - } - print_cf(&string_buffer, &cfb, 0); - if (cfb.is_exec()) { - disasm_exec(&string_buffer, dwords, dword_count, 0, type, &cfb); - } - if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) { - break; - } - } - - return string_buffer.to_string(); -} - -} // namespace gl4 -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/gl4/ucode_disassembler.h b/src/xenia/gpu/gl4/ucode_disassembler.h deleted file mode 100644 index 834a229f1..000000000 --- a/src/xenia/gpu/gl4/ucode_disassembler.h +++ /dev/null @@ -1,28 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_GL4_UCODE_DISASSEMBLER_H_ -#define XENIA_GPU_GL4_UCODE_DISASSEMBLER_H_ - -#include - -#include "xenia/gpu/xenos.h" - -namespace xe { -namespace gpu { -namespace gl4 { - -std::string DisassembleShader(ShaderType type, const uint32_t* dwords, - size_t dword_count); - -} // namespace gl4 -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_GL4_UCODE_DISASSEMBLER_H_ diff --git a/src/xenia/gpu/glsl_shader_translator.cc b/src/xenia/gpu/glsl_shader_translator.cc index 425dd2443..c80633ae7 100644 --- a/src/xenia/gpu/glsl_shader_translator.cc +++ b/src/xenia/gpu/glsl_shader_translator.cc @@ -55,6 +55,13 @@ GlslShaderTranslator::GlslShaderTranslator(Dialect dialect) GlslShaderTranslator::~GlslShaderTranslator() = default; +void GlslShaderTranslator::Reset() { + ShaderTranslator::Reset(); + depth_ = 0; + depth_prefix_[0] = 0; + source_.Reset(); +} + void GlslShaderTranslator::EmitTranslationError(const char* message) { ShaderTranslator::EmitTranslationError(message); EmitSourceDepth("// TRANSLATION ERROR: %s\n", message); @@ -84,7 +91,7 @@ void GlslShaderTranslator::StartTranslation() { // We have a large amount of shared state defining uniforms and some common // utility functions used in both vertex and pixel shaders. EmitSource(R"( -version 450 +#version 450 #extension all : warn #extension GL_ARB_bindless_texture : require #extension GL_ARB_explicit_uniform_location : require @@ -228,7 +235,7 @@ void applyAlphaTest(int alpha_func, float alpha_ref) { if (!passes) discard; } void processFragment(const in StateData state); -void main() { + +void main() { const StateData state = states[draw_id]; processFragment(state); if (state.alpha_test.x != 0.0) { @@ -241,11 +248,13 @@ void main() { + // Add vertex shader input declarations. if (is_vertex_shader()) { for (auto& binding : vertex_bindings()) { - const char* type_name = - GetVertexFormatTypeName(binding.fetch_instr.attributes.data_format); - EmitSource("layout(location = %d) in %s vf%u_%d;\n", - binding.binding_index, type_name, binding.fetch_constant, - binding.fetch_instr.attributes.offset); + for (auto& attrib : binding.attributes) { + const char* type_name = + GetVertexFormatTypeName(attrib.fetch_instr.attributes.data_format); + EmitSource("layout(location = %d) in %s vf%u_%d;\n", + attrib.attrib_index, type_name, binding.fetch_constant, + attrib.fetch_instr.attributes.offset); + } } } @@ -273,6 +282,10 @@ void main() { + EmitSource(" bool p0 = false;\n"); // Address register when using absolute addressing. EmitSource(" int a0 = 0;\n"); + // Temps for source register values. + EmitSource(" vec4 src0;\n"); + EmitSource(" vec4 src1;\n"); + EmitSource(" vec4 src2;\n"); } std::vector GlslShaderTranslator::CompleteTranslation() { @@ -300,7 +313,7 @@ void GlslShaderTranslator::ProcessExecInstructionBegin( EmitSourceDepth("{\n"); break; case ParsedExecInstruction::Type::kConditional: - EmitSourceDepth("if (state.bool_consts[%d] & (1 << %d) == %c) {\n", + EmitSourceDepth("if ((state.bool_consts[%d] & (1 << %d)) == %c) {\n", instr.bool_constant_index / 32, instr.bool_constant_index % 32, instr.condition ? '1' : '0'); @@ -374,18 +387,26 @@ void GlslShaderTranslator::ProcessVertexFetchInstruction( Indent(); } - for (size_t i = 0; i < instr.operand_count; ++i) { - if (instr.operands[i].storage_source != - InstructionStorageSource::kVertexFetchConstant) { - EmitLoadOperand(i, instr.operands[i]); + if (instr.result.stores_non_constants()) { + for (size_t i = 0; i < instr.operand_count; ++i) { + if (instr.operands[i].storage_source != + InstructionStorageSource::kVertexFetchConstant) { + EmitLoadOperand(i, instr.operands[i]); + } } - } - switch (instr.opcode) { - case FetchOpcode::kVertexFetch: - EmitSourceDepth("pv = vf%u_%d;\n", instr.operands[1].storage_index, - instr.attributes.offset); - break; + switch (instr.opcode) { + case FetchOpcode::kVertexFetch: + EmitSourceDepth("pv."); + for (int i = 0; + i < GetVertexFormatComponentCount(instr.attributes.data_format); + ++i) { + EmitSource("%c", GetCharForComponentIndex(i)); + } + EmitSource(" = vf%u_%d;\n", instr.operands[1].storage_index, + instr.attributes.offset); + break; + } } EmitStoreVectorResult(instr.result); @@ -417,7 +438,7 @@ void GlslShaderTranslator::ProcessTextureFetchInstruction( case FetchOpcode::kTextureFetch: switch (instr.dimension) { case TextureDimension::k1D: - EmitSourceDepth("if (state.texture_samplers[%d] != 0.0) {\n", + EmitSourceDepth("if (state.texture_samplers[%d] != 0) {\n", instr.operands[1].storage_index); EmitSourceDepth( " pv = texture(sampler1D(state.texture_samplers[%d]), " @@ -428,7 +449,7 @@ void GlslShaderTranslator::ProcessTextureFetchInstruction( EmitSourceDepth("}\n"); break; case TextureDimension::k2D: - EmitSourceDepth("if (state.texture_samplers[%d] != 0.0) {\n", + EmitSourceDepth("if (state.texture_samplers[%d] != 0) {\n", instr.operands[1].storage_index); EmitSourceDepth( " pv = texture(sampler2D(state.texture_samplers[%d]), " @@ -439,7 +460,7 @@ void GlslShaderTranslator::ProcessTextureFetchInstruction( EmitSourceDepth("}\n"); break; case TextureDimension::k3D: - EmitSourceDepth("if (state.texture_samplers[%d] != 0.0) {\n", + EmitSourceDepth("if (state.texture_samplers[%d] != 0) {\n", instr.operands[1].storage_index); EmitSourceDepth( " pv = texture(sampler3D(state.texture_samplers[%d]), " @@ -451,7 +472,7 @@ void GlslShaderTranslator::ProcessTextureFetchInstruction( break; case TextureDimension::kCube: // TODO(benvanik): undo CUBEv logic on t? (s,t,faceid) - EmitSourceDepth("if (state.texture_samplers[%d] != 0.0) {\n", + EmitSourceDepth("if (state.texture_samplers[%d] != 0) {\n", instr.operands[1].storage_index); EmitSourceDepth( " pv = texture(samplerCube(state.texture_samplers[%d]), " @@ -835,35 +856,36 @@ void GlslShaderTranslator::ProcessVectorAluInstruction( // max4 dest, src0 case AluVectorOpcode::kMax4: - EmitSourceDepth("pv = max(src0.x, src0.y, src0.z, src0.w).xxxx;\n"); + EmitSourceDepth( + "pv = max(src0.x, max(src0.y, max(src0.z, src0.w))).xxxx;\n"); break; // setp_eq_push dest, src0, src1 case AluVectorOpcode::kSetpEqPush: EmitSourceDepth("p0 = src0.w == 0.0 && src1.w == 0.0 ? true : false;\n"); EmitSourceDepth( - "pv = src0.x == 0.0 && src1.x == 0.0 ? 0.0 : src0.x + 1.0;\n"); + "pv = vec4(src0.x == 0.0 && src1.x == 0.0 ? 0.0 : src0.x + 1.0);\n"); break; // setp_ne_push dest, src0, src1 case AluVectorOpcode::kSetpNePush: EmitSourceDepth("p0 = src0.w == 0.0 && src1.w != 0.0 ? true : false;\n"); EmitSourceDepth( - "pv = src0.x == 0.0 && src1.x != 0.0 ? 0.0 : src0.x + 1.0;\n"); + "pv = vec4(src0.x == 0.0 && src1.x != 0.0 ? 0.0 : src0.x + 1.0);\n"); break; // setp_gt_push dest, src0, src1 case AluVectorOpcode::kSetpGtPush: EmitSourceDepth("p0 = src0.w == 0.0 && src1.w > 0.0 ? true : false;\n"); EmitSourceDepth( - "pv = src0.x == 0.0 && src1.x > 0.0 ? 0.0 : src0.x + 1.0;\n"); + "pv = vec4(src0.x == 0.0 && src1.x > 0.0 ? 0.0 : src0.x + 1.0);\n"); break; // setp_ge_push dest, src0, src1 case AluVectorOpcode::kSetpGePush: EmitSourceDepth("p0 = src0.w == 0.0 && src1.w >= 0.0 ? true : false;\n"); EmitSourceDepth( - "pv = src0.x == 0.0 && src1.x >= 0.0 ? 0.0 : src0.x + 1.0;\n"); + "pv = vec4(src0.x == 0.0 && src1.x >= 0.0 ? 0.0 : src0.x + 1.0);\n"); break; // kill_eq dest, src0, src1 @@ -951,7 +973,7 @@ void GlslShaderTranslator::ProcessScalarAluInstruction( switch (instr.scalar_opcode) { // adds dest, src0.ab case AluScalarOpcode::kAdds: - EmitSourceDepth("ps = src0.x + src1.y;\n"); + EmitSourceDepth("ps = src0.x + src0.y;\n"); break; // adds_prev dest, src0.a @@ -961,7 +983,7 @@ void GlslShaderTranslator::ProcessScalarAluInstruction( // muls dest, src0.ab case AluScalarOpcode::kMuls: - EmitSourceDepth("ps = src0.x * src1.y;\n"); + EmitSourceDepth("ps = src0.x * src0.y;\n"); break; // muls_prev dest, src0.a @@ -972,18 +994,18 @@ void GlslShaderTranslator::ProcessScalarAluInstruction( // muls_prev2 dest, src0.ab case AluScalarOpcode::kMulsPrev2: EmitSourceDepth( - "ps = ps == -FLT_MAX || isinf(ps) || isinf(src0.y) || src0.y <= 0.0 " - "? -FLT_MAX : src0.x * ps;\n"); + "ps = ps == -FLT_MAX || isinf(ps) || isnan(ps) || isnan(src0.y) || " + "src0.y <= 0.0 ? -FLT_MAX : src0.x * ps;\n"); break; // maxs dest, src0.ab case AluScalarOpcode::kMaxs: - EmitSourceDepth("ps = max(src0.x, src1.y);\n"); + EmitSourceDepth("ps = max(src0.x, src0.y);\n"); break; // mins dest, src0.ab case AluScalarOpcode::kMins: - EmitSourceDepth("ps = min(src0.x, src1.y);\n"); + EmitSourceDepth("ps = min(src0.x, src0.y);\n"); break; // seqs dest, src0.a @@ -1023,52 +1045,52 @@ void GlslShaderTranslator::ProcessScalarAluInstruction( // exp dest, src0.a case AluScalarOpcode::kExp: - EmitSourceDepth("ps = src0.x == 0.0 ? 1.0 : exp2(src0.x);\n"); + EmitSourceDepth("ps = exp2(src0.x);\n"); break; // logc dest, src0.a case AluScalarOpcode::kLogc: - EmitSourceDepth("ps = src0.x == 1.0 ? 0.0 : log2(src0.x);\n"); + EmitSourceDepth("ps = log2(src0.x);\n"); EmitSourceDepth("ps = isinf(ps) ? -FLT_MAX : ps;\n"); break; // log dest, src0.a case AluScalarOpcode::kLog: - EmitSourceDepth("ps = src0.x == 1.0 ? 0.0 : log2(src0.x);\n"); + EmitSourceDepth("ps = log2(src0.x);\n"); break; // rcpc dest, src0.a case AluScalarOpcode::kRcpc: - EmitSourceDepth("ps = src0.x == 1.0 ? 1.0 : 1.0 / src0.x;\n"); + EmitSourceDepth("ps = 1.0 / src0.x;\n"); EmitSourceDepth("if (isinf(ps)) ps = FLT_MAX;\n"); break; // rcpf dest, src0.a case AluScalarOpcode::kRcpf: - EmitSourceDepth("ps = src0.x == 1.0 ? 1.0 : 1.0 / src0.x;\n"); + EmitSourceDepth("ps = 1.0 / src0.x;\n"); EmitSourceDepth("if (isinf(ps)) ps = 0.0;\n"); break; // rcp dest, src0.a case AluScalarOpcode::kRcp: - EmitSourceDepth("ps = src0.x == 1.0 ? 1.0 : 1.0 / src0.x;\n"); + EmitSourceDepth("ps = 1.0 / src0.x;\n"); break; // rsqc dest, src0.a case AluScalarOpcode::kRsqc: - EmitSourceDepth("ps = src0.x == 1.0 ? 1.0 : inversesqrt(src0.x);\n"); + EmitSourceDepth("ps = inversesqrt(src0.x);\n"); EmitSourceDepth("if (isinf(ps)) ps = FLT_MAX;\n"); break; // rsqc dest, src0.a case AluScalarOpcode::kRsqf: - EmitSourceDepth("ps = src0.x == 1.0 ? 1.0 : inversesqrt(src0.x);\n"); + EmitSourceDepth("ps = inversesqrt(src0.x);\n"); EmitSourceDepth("if (isinf(ps)) ps = 0.0;\n"); break; // rsq dest, src0.a case AluScalarOpcode::kRsq: - EmitSourceDepth("ps = src0.x == 1.0 ? 1.0 : inversesqrt(src0.x);\n"); + EmitSourceDepth("ps = inversesqrt(src0.x);\n"); break; // maxas dest, src0.ab @@ -1145,7 +1167,7 @@ void GlslShaderTranslator::ProcessScalarAluInstruction( EmitSourceDepth(" ps = 0.0;\n"); EmitSourceDepth(" p0 = true;\n"); EmitSourceDepth("} else {\n"); - EmitSourceDepth(" ps = src0.x == 0.0 ? 1.0 : src1.x;\n"); + EmitSourceDepth(" ps = src0.x == 0.0 ? 1.0 : src0.x;\n"); EmitSourceDepth(" p0 = false;\n"); EmitSourceDepth("}\n"); break; @@ -1169,13 +1191,8 @@ void GlslShaderTranslator::ProcessScalarAluInstruction( // setp_rstr dest, src0.a case AluScalarOpcode::kSetpRstr: - EmitSourceDepth("if (src0.x == 0.0) {\n"); - EmitSourceDepth(" ps = 0.0;\n"); - EmitSourceDepth(" p0 = true;\n"); - EmitSourceDepth("} else {\n"); - EmitSourceDepth(" ps = src0.x;\n"); - EmitSourceDepth(" p0 = false;\n"); - EmitSourceDepth("}\n"); + EmitSourceDepth("ps = src0.x;\n"); + EmitSourceDepth("p0 = src0.x == 0.0 ? true : false;\n"); break; // kills_eq dest, src0.a diff --git a/src/xenia/gpu/glsl_shader_translator.h b/src/xenia/gpu/glsl_shader_translator.h index 16fb98000..043a61d15 100644 --- a/src/xenia/gpu/glsl_shader_translator.h +++ b/src/xenia/gpu/glsl_shader_translator.h @@ -30,6 +30,8 @@ class GlslShaderTranslator : public ShaderTranslator { ~GlslShaderTranslator() override; protected: + void Reset() override; + void EmitTranslationError(const char* message) override; void EmitUnimplementedTranslationError() override; diff --git a/src/xenia/gpu/sampler_info.cc b/src/xenia/gpu/sampler_info.cc index c11ecccab..51733a110 100644 --- a/src/xenia/gpu/sampler_info.cc +++ b/src/xenia/gpu/sampler_info.cc @@ -18,22 +18,29 @@ namespace xe { namespace gpu { bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch, - const gl4::ucode::instr_fetch_tex_t& fetch_instr, + const ParsedTextureFetchInstruction& fetch_instr, SamplerInfo* out_info) { std::memset(out_info, 0, sizeof(SamplerInfo)); - out_info->min_filter = static_cast( - fetch_instr.min_filter == 3 ? fetch.min_filter : fetch_instr.min_filter); - out_info->mag_filter = static_cast( - fetch_instr.mag_filter == 3 ? fetch.mag_filter : fetch_instr.mag_filter); - out_info->mip_filter = static_cast( - fetch_instr.mip_filter == 3 ? fetch.mip_filter : fetch_instr.mip_filter); + out_info->min_filter = + fetch_instr.attributes.min_filter == TextureFilter::kUseFetchConst + ? static_cast(fetch.min_filter) + : fetch_instr.attributes.min_filter; + out_info->mag_filter = + fetch_instr.attributes.mag_filter == TextureFilter::kUseFetchConst + ? static_cast(fetch.mag_filter) + : fetch_instr.attributes.mag_filter; + out_info->mip_filter = + fetch_instr.attributes.mip_filter == TextureFilter::kUseFetchConst + ? static_cast(fetch.mip_filter) + : fetch_instr.attributes.mip_filter; out_info->clamp_u = static_cast(fetch.clamp_x); out_info->clamp_v = static_cast(fetch.clamp_y); out_info->clamp_w = static_cast(fetch.clamp_z); - out_info->aniso_filter = static_cast( - fetch_instr.aniso_filter == 7 ? fetch.aniso_filter - : fetch_instr.aniso_filter); + out_info->aniso_filter = + fetch_instr.attributes.aniso_filter == AnisoFilter::kUseFetchConst + ? static_cast(fetch.aniso_filter) + : fetch_instr.attributes.aniso_filter; return true; } diff --git a/src/xenia/gpu/sampler_info.h b/src/xenia/gpu/sampler_info.h index 1a776664c..32cce2bff 100644 --- a/src/xenia/gpu/sampler_info.h +++ b/src/xenia/gpu/sampler_info.h @@ -10,7 +10,7 @@ #ifndef XENIA_GPU_SAMPLER_INFO_H_ #define XENIA_GPU_SAMPLER_INFO_H_ -#include "xenia/gpu/gl4/ucode.h" +#include "xenia/gpu/shader_translator.h" #include "xenia/gpu/xenos.h" namespace xe { @@ -26,7 +26,7 @@ struct SamplerInfo { AnisoFilter aniso_filter; static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch, - const gl4::ucode::instr_fetch_tex_t& fetch_instr, + const ParsedTextureFetchInstruction& fetch_instr, SamplerInfo* out_info); uint64_t hash() const; diff --git a/src/xenia/gpu/shader.cc b/src/xenia/gpu/shader.cc index f5e95dc52..f0d19924b 100644 --- a/src/xenia/gpu/shader.cc +++ b/src/xenia/gpu/shader.cc @@ -9,267 +9,31 @@ #include "xenia/gpu/shader.h" -#include - -#include "xenia/base/math.h" +#include "xenia/base/logging.h" #include "xenia/base/memory.h" -#include "xenia/gpu/gl4/ucode_disassembler.h" namespace xe { namespace gpu { -using namespace xe::gpu::gl4::ucode; -using namespace xe::gpu::xenos; - Shader::Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr, uint32_t dword_count) : shader_type_(shader_type), data_hash_(data_hash) { data_.resize(dword_count); xe::copy_and_swap(data_.data(), dword_ptr, dword_count); - std::memset(&alloc_counts_, 0, sizeof(alloc_counts_)); - std::memset(&buffer_inputs_, 0, sizeof(buffer_inputs_)); - std::memset(&sampler_inputs_, 0, sizeof(sampler_inputs_)); - - // Disassemble ucode and stash. - // TODO(benvanik): debug only. - ucode_disassembly_ = - gl4::DisassembleShader(shader_type_, data_.data(), data_.size()); - - // Gather input/output registers/etc. - GatherIO(); } Shader::~Shader() = default; -void Shader::GatherIO() { - // Process all execution blocks. - instr_cf_t cfa; - instr_cf_t cfb; - for (size_t idx = 0; idx < data_.size(); idx += 3) { - uint32_t dword_0 = data_[idx + 0]; - uint32_t dword_1 = data_[idx + 1]; - uint32_t dword_2 = data_[idx + 2]; - cfa.dword_0 = dword_0; - cfa.dword_1 = dword_1 & 0xFFFF; - cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16); - cfb.dword_1 = dword_2 >> 16; - if (cfa.opc == ALLOC) { - GatherAlloc(&cfa.alloc); - } else if (cfa.is_exec()) { - GatherExec(&cfa.exec); - } - if (cfb.opc == ALLOC) { - GatherAlloc(&cfb.alloc); - } else if (cfb.is_exec()) { - GatherExec(&cfb.exec); - } - if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) { - break; - } +bool Shader::Prepare(ShaderTranslator* shader_translator) { + // Perform translation. + translated_shader_ = shader_translator->Translate(shader_type_, data_hash_, + data_.data(), data_.size()); + if (!translated_shader_) { + XELOGE("Shader failed translation"); + return false; } + + return true; } - -void Shader::GatherAlloc(const instr_cf_alloc_t* cf) { - allocs_.push_back(*cf); - - switch (cf->buffer_select) { - case SQ_POSITION: - // Position (SV_POSITION). - alloc_counts_.positions += cf->size + 1; - break; - case SQ_PARAMETER_PIXEL: - // Output to PS (if VS), or frag output (if PS). - alloc_counts_.params += cf->size + 1; - break; - case SQ_MEMORY: - // MEMEXPORT? - alloc_counts_.memories += cf->size + 1; - break; - } -} - -void Shader::GatherExec(const instr_cf_exec_t* cf) { - uint32_t sequence = cf->serialize; - for (uint32_t i = 0; i < cf->count; i++) { - uint32_t alu_off = (cf->address + i); - // int sync = sequence & 0x2; - if (sequence & 0x1) { - auto fetch = - reinterpret_cast(data_.data() + alu_off * 3); - switch (fetch->opc) { - case VTX_FETCH: - GatherVertexFetch(&fetch->vtx); - break; - case TEX_FETCH: - GatherTextureFetch(&fetch->tex); - break; - case TEX_GET_BORDER_COLOR_FRAC: - case TEX_GET_COMP_TEX_LOD: - case TEX_GET_GRADIENTS: - case TEX_GET_WEIGHTS: - case TEX_SET_TEX_LOD: - case TEX_SET_GRADIENTS_H: - case TEX_SET_GRADIENTS_V: - default: - assert_always(); - break; - } - } else { - // TODO(benvanik): gather registers used, predicate bits used, etc. - auto alu = - reinterpret_cast(data_.data() + alu_off * 3); - if (alu->export_data && alu->vector_write_mask) { - switch (alu->vector_dest) { - case 0: - case 1: - case 2: - case 3: - alloc_counts_.color_targets[alu->vector_dest] = true; - break; - case 63: - alloc_counts_.point_size = true; - break; - } - } - if (alu->export_data && - (alu->scalar_write_mask || !alu->vector_write_mask)) { - switch (alu->scalar_dest) { - case 0: - case 1: - case 2: - case 3: - alloc_counts_.color_targets[alu->scalar_dest] = true; - break; - case 63: - alloc_counts_.point_size = true; - break; - } - } - } - sequence >>= 2; - } -} - -void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) { - // dst_reg/dst_swiz - // src_reg/src_swiz - // format = a2xx_sq_surfaceformat - // format_comp_all ? signed : unsigned - // num_format_all ? normalized - // stride - // offset - // const_index/const_index_sel -- fetch constant register - // num_format_all ? integer : fraction - // exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default - - if (!vtx->must_be_one) { - return; - } - - // Sometimes games have fetches that just produce constants. We can - // ignore those. - uint32_t dst_swiz = vtx->dst_swiz; - bool fetches_any_data = false; - for (int i = 0; i < 4; i++) { - if ((dst_swiz & 0x7) == 4) { - // 0.0 - } else if ((dst_swiz & 0x7) == 5) { - // 1.0 - } else if ((dst_swiz & 0x7) == 6) { - // ? - } else if ((dst_swiz & 0x7) == 7) { - // Previous register value. - } else { - fetches_any_data = true; - break; - } - dst_swiz >>= 3; - } - if (!fetches_any_data) { - return; - } - - assert_true(vtx->const_index <= 0x1F); - - uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel; - auto& inputs = buffer_inputs_; - BufferDescElement* el = nullptr; - for (size_t n = 0; n < inputs.count; n++) { - auto& desc = inputs.descs[n]; - if (desc.fetch_slot == fetch_slot) { - assert_true(desc.element_count <= xe::countof(desc.elements)); - // It may not hold that all strides are equal, but I hope it does. - assert_true(!vtx->stride || desc.stride_words == vtx->stride); - el = &desc.elements[desc.element_count++]; - break; - } - } - if (!el) { - assert_not_zero(vtx->stride); - assert_true(inputs.count + 1 < xe::countof(inputs.descs)); - auto& desc = inputs.descs[inputs.count++]; - desc.input_index = inputs.count - 1; - desc.fetch_slot = fetch_slot; - desc.stride_words = vtx->stride; - el = &desc.elements[desc.element_count++]; - } - ++inputs.total_elements_count; - - el->vtx_fetch = *vtx; - el->format = static_cast(vtx->format); - el->is_normalized = vtx->num_format_all == 0; - el->is_signed = vtx->format_comp_all == 1; - el->offset_words = vtx->offset; - el->size_words = 0; - switch (el->format) { - case VertexFormat::k_8_8_8_8: - case VertexFormat::k_2_10_10_10: - case VertexFormat::k_10_11_11: - case VertexFormat::k_11_11_10: - el->size_words = 1; - break; - case VertexFormat::k_16_16: - case VertexFormat::k_16_16_FLOAT: - el->size_words = 1; - break; - case VertexFormat::k_16_16_16_16: - case VertexFormat::k_16_16_16_16_FLOAT: - el->size_words = 2; - break; - case VertexFormat::k_32: - case VertexFormat::k_32_FLOAT: - el->size_words = 1; - break; - case VertexFormat::k_32_32: - case VertexFormat::k_32_32_FLOAT: - el->size_words = 2; - break; - case VertexFormat::k_32_32_32_FLOAT: - el->size_words = 3; - break; - case VertexFormat::k_32_32_32_32: - case VertexFormat::k_32_32_32_32_FLOAT: - el->size_words = 4; - break; - default: - assert_unhandled_case(el->format); - break; - } -} - -void Shader::GatherTextureFetch(const instr_fetch_tex_t* tex) { - // TODO(benvanik): check dest_swiz to see if we are writing anything. - - assert_true(tex->const_idx < 0x1F); - - assert_true(sampler_inputs_.count + 1 <= xe::countof(sampler_inputs_.descs)); - auto& input = sampler_inputs_.descs[sampler_inputs_.count++]; - input.input_index = sampler_inputs_.count - 1; - input.fetch_slot = tex->const_idx & 0xF; // ?????????????????????????????? - input.tex_fetch = *tex; - - // Format mangling, size estimation, etc. -} - } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 47cf0c201..5f66816e5 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -13,7 +13,7 @@ #include #include -#include "xenia/gpu/gl4/ucode.h" +#include "xenia/gpu/shader_translator.h" #include "xenia/gpu/xenos.h" namespace xe { @@ -24,85 +24,31 @@ class Shader { virtual ~Shader(); ShaderType type() const { return shader_type_; } - bool is_valid() const { return is_valid_; } - const std::string& ucode_disassembly() const { return ucode_disassembly_; } - const std::string& translated_disassembly() const { - return translated_disassembly_; - } - const std::vector translated_binary() { return translated_binary_; } + bool is_valid() const { return !!translated_shader_; } const std::string& host_disassembly() const { return host_disassembly_; } + TranslatedShader* translated_shader() const { + return translated_shader_.get(); + } const uint32_t* data() const { return data_.data(); } uint32_t dword_count() const { return uint32_t(data_.size()); } - struct BufferDescElement { - gl4::ucode::instr_fetch_vtx_t vtx_fetch; - VertexFormat format; - uint32_t offset_words; - uint32_t size_words; - bool is_signed; - bool is_normalized; - }; - struct BufferDesc { - uint32_t input_index; - uint32_t fetch_slot; - uint32_t stride_words; - uint32_t element_count; - BufferDescElement elements[16]; - }; - struct BufferInputs { - uint32_t count; - uint32_t total_elements_count; - BufferDesc descs[32]; - }; - const BufferInputs& buffer_inputs() { return buffer_inputs_; } - - struct SamplerDesc { - uint32_t input_index; - uint32_t fetch_slot; - uint32_t format; - gl4::ucode::instr_fetch_tex_t tex_fetch; - }; - struct SamplerInputs { - uint32_t count; - SamplerDesc descs[32]; - }; - const SamplerInputs& sampler_inputs() { return sampler_inputs_; } - - struct AllocCounts { - uint32_t positions; - uint32_t params; - uint32_t memories; - bool point_size; - bool color_targets[4]; - }; - const AllocCounts& alloc_counts() const { return alloc_counts_; } + virtual bool Prepare(ShaderTranslator* shader_translator); protected: Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr, uint32_t dword_count); - void GatherIO(); - void GatherAlloc(const gl4::ucode::instr_cf_alloc_t* cf); - void GatherExec(const gl4::ucode::instr_cf_exec_t* cf); - void GatherVertexFetch(const gl4::ucode::instr_fetch_vtx_t* vtx); - void GatherTextureFetch(const gl4::ucode::instr_fetch_tex_t* tex); - ShaderType shader_type_; uint64_t data_hash_; std::vector data_; - bool is_valid_ = false; - std::string ucode_disassembly_; std::string translated_disassembly_; std::vector translated_binary_; std::string host_disassembly_; std::string error_log_; - AllocCounts alloc_counts_; - std::vector allocs_; - BufferInputs buffer_inputs_; - SamplerInputs sampler_inputs_; + std::unique_ptr translated_shader_; }; } // namespace gpu diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 76b277830..272c31b47 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -46,7 +46,7 @@ TranslatedShader::TranslatedShader(ShaderType shader_type, size_t ucode_dword_count, std::vector errors) : shader_type_(shader_type), - ucode_data_hash_(ucode_data_hash_), + ucode_data_hash_(ucode_data_hash), errors_(std::move(errors)) { ucode_data_.resize(ucode_dword_count); std::memcpy(ucode_data_.data(), ucode_dwords, @@ -63,13 +63,35 @@ TranslatedShader::TranslatedShader(ShaderType shader_type, TranslatedShader::~TranslatedShader() = default; +std::string TranslatedShader::GetBinaryString() const { + std::string result; + result.resize(binary_.size()); + std::memcpy(const_cast(result.data()), binary_.data(), binary_.size()); + return result; +} + ShaderTranslator::ShaderTranslator() = default; ShaderTranslator::~ShaderTranslator() = default; +void ShaderTranslator::Reset() { + errors_.clear(); + ucode_disasm_buffer_.Reset(); + ucode_disasm_line_number_ = 0; + previous_ucode_disasm_scan_offset_ = 0; + total_attrib_count_ = 0; + vertex_bindings_.clear(); + texture_bindings_.clear(); + for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) { + writes_color_targets_[i] = false; + } +} + std::unique_ptr ShaderTranslator::Translate( ShaderType shader_type, uint64_t ucode_data_hash, const uint32_t* ucode_dwords, size_t ucode_dword_count) { + Reset(); + shader_type_ = shader_type; ucode_dwords_ = ucode_dwords; ucode_dword_count_ = ucode_dword_count; @@ -101,8 +123,12 @@ std::unique_ptr ShaderTranslator::Translate( new TranslatedShader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count, std::move(errors_))); translated_shader->binary_ = CompleteTranslation(); + translated_shader->ucode_disassembly_ = ucode_disasm_buffer_.to_string(); translated_shader->vertex_bindings_ = std::move(vertex_bindings_); translated_shader->texture_bindings_ = std::move(texture_bindings_); + for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) { + translated_shader->writes_color_targets_[i] = writes_color_targets_[i]; + } return translated_shader; } @@ -165,6 +191,7 @@ void ShaderTranslator::GatherBindingInformation( ++instr_offset, sequence >>= 2) { bool is_fetch = (sequence & 0x1) == 0x1; if (is_fetch) { + // Gather vertex and texture fetches. auto fetch_opcode = static_cast(ucode_dwords_[instr_offset * 3] & 0x1F); if (fetch_opcode == FetchOpcode::kVertexFetch) { @@ -176,6 +203,20 @@ void ShaderTranslator::GatherBindingInformation( *reinterpret_cast( ucode_dwords_ + instr_offset * 3)); } + } else if (is_pixel_shader()) { + // Gather up color targets written to. + auto& op = *reinterpret_cast(ucode_dwords_ + + instr_offset * 3); + if (op.has_vector_op() && op.is_export()) { + if (op.vector_dest() >= 0 && op.vector_dest() <= 3) { + writes_color_targets_[op.vector_dest()] = true; + } + } + if (op.has_scalar_op() && op.is_export()) { + if (op.vector_dest() >= 0 && op.vector_dest() <= 3) { + writes_color_targets_[op.vector_dest()] = true; + } + } } } break; @@ -184,11 +225,39 @@ void ShaderTranslator::GatherBindingInformation( void ShaderTranslator::GatherVertexBindingInformation( const VertexFetchInstruction& op) { - TranslatedShader::VertexBinding binding; - binding.binding_index = vertex_bindings_.size(); - ParseVertexFetchInstruction(op, &binding.fetch_instr); - binding.fetch_constant = binding.fetch_instr.operands[1].storage_index; - vertex_bindings_.emplace_back(std::move(binding)); + if (!op.fetches_any_data()) { + return; + } + + // Try to allocate an attribute on an existing binding. + // If no binding for this fetch slot is found create it. + using VertexBinding = TranslatedShader::VertexBinding; + VertexBinding::Attribute* attrib = nullptr; + for (auto& vertex_binding : vertex_bindings_) { + if (vertex_binding.fetch_constant == op.fetch_constant_index()) { + // It may not hold that all strides are equal, but I hope it does. + assert_true(!op.stride() || vertex_binding.stride_words == op.stride()); + vertex_binding.attributes.push_back({}); + attrib = &vertex_binding.attributes.back(); + break; + } + } + if (!attrib) { + assert_not_zero(op.stride()); + TranslatedShader::VertexBinding vertex_binding; + vertex_binding.binding_index = static_cast(vertex_bindings_.size()); + vertex_binding.fetch_constant = op.fetch_constant_index(); + vertex_binding.stride_words = op.stride(); + vertex_binding.attributes.push_back({}); + vertex_bindings_.emplace_back(std::move(vertex_binding)); + attrib = &vertex_bindings_.back().attributes.back(); + } + + // Populate attribute. + attrib->attrib_index = total_attrib_count_++; + ParseVertexFetchInstruction(op, &attrib->fetch_instr); + attrib->size_words = + GetVertexFormatSizeInWords(attrib->fetch_instr.attributes.data_format); } void ShaderTranslator::GatherTextureBindingInformation( diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index a37ad4d80..5b519475b 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -66,6 +66,10 @@ enum class SwizzleSource { constexpr SwizzleSource GetSwizzleFromComponentIndex(int i) { return static_cast(i); } +inline char GetCharForComponentIndex(int i) { + const static char kChars[] = {'x', 'y', 'z', 'w'}; + return kChars[i]; +} inline char GetCharForSwizzle(SwizzleSource swizzle_source) { const static char kChars[] = {'x', 'y', 'z', 'w', '0', '1'}; return kChars[static_cast(swizzle_source)]; @@ -96,6 +100,16 @@ struct InstructionResult { bool has_all_writes() const { return write_mask[0] && write_mask[1] && write_mask[2] && write_mask[3]; } + // Returns true if any non-constant components are written. + bool stores_non_constants() const { + for (int i = 0; i < 4; ++i) { + if (write_mask[i] && components[i] != SwizzleSource::k0 && + components[i] != SwizzleSource::k1) { + return true; + } + } + return false; + } // True if the components are in their 'standard' swizzle arrangement (xyzw). bool is_standard_swizzle() const { return has_all_writes() && components[0] == SwizzleSource::kX && @@ -337,7 +351,7 @@ struct ParsedVertexFetchInstruction { struct Attributes { VertexFormat data_format = VertexFormat::kUndefined; int offset = 0; - int stride = 0; + int stride = 0; // In dwords. int exp_adjust = 0; bool is_index_rounded = false; bool is_signed = false; @@ -450,12 +464,23 @@ class TranslatedShader { }; struct VertexBinding { + struct Attribute { + // Attribute index, 0-based in the entire shader. + int attrib_index; + // Fetch instruction with all parameters. + ParsedVertexFetchInstruction fetch_instr; + // Size of the attribute, in words. + uint32_t size_words; + }; + // Index within the vertex binding listing. - size_t binding_index; + int binding_index; // Fetch constant index [0-95]. uint32_t fetch_constant; - // Fetch instruction with all parameters. - ParsedVertexFetchInstruction fetch_instr; + // Stride of the entire binding, in words. + uint32_t stride_words; + // Packed attributes within the binding buffer. + std::vector attributes; }; struct TextureBinding { @@ -480,11 +505,16 @@ class TranslatedShader { const std::vector& texture_bindings() const { return texture_bindings_; } + // Returns true if the given color target index [0-3]. + bool writes_color_target(int i) const { return writes_color_targets_[i]; } bool is_valid() const { return is_valid_; } const std::vector& errors() const { return errors_; } const std::vector& binary() const { return binary_; } + const std::string& ucode_disassembly() const { return ucode_disassembly_; } + + std::string GetBinaryString() const; private: friend class ShaderTranslator; @@ -499,10 +529,12 @@ class TranslatedShader { std::vector vertex_bindings_; std::vector texture_bindings_; + bool writes_color_targets_[4] = {false, false, false, false}; bool is_valid_ = false; std::vector errors_; + std::string ucode_disassembly_; std::vector binary_; }; @@ -518,6 +550,9 @@ class ShaderTranslator { protected: ShaderTranslator(); + // Resets translator state before beginning translation. + virtual void Reset(); + // True if the current shader is a vertex shader. bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; } // True if the current shader is a pixel shader. @@ -662,8 +697,10 @@ class ShaderTranslator { ucode::VertexFetchInstruction previous_vfetch_full_; // Detected binding information gathered before translation. + int total_attrib_count_ = 0; std::vector vertex_bindings_; std::vector texture_bindings_; + bool writes_color_targets_[4] = {false, false, false, false}; static const AluOpcodeInfo alu_vector_opcode_infos_[0x20]; static const AluOpcodeInfo alu_scalar_opcode_infos_[0x40]; diff --git a/src/xenia/gpu/shader_translator_disasm.cc b/src/xenia/gpu/shader_translator_disasm.cc index 41c887179..b0d33d641 100644 --- a/src/xenia/gpu/shader_translator_disasm.cc +++ b/src/xenia/gpu/shader_translator_disasm.cc @@ -96,7 +96,11 @@ void DisassembleSourceOperand(const InstructionOperand& op, StringBuffer* out) { } switch (op.storage_addressing_mode) { case InstructionStorageAddressingMode::kStatic: - out->AppendFormat("%d", op.storage_index); + if (op.is_absolute_value) { + out->AppendFormat("[%d]", op.storage_index); + } else { + out->AppendFormat("%d", op.storage_index); + } break; case InstructionStorageAddressingMode::kAddressAbsolute: out->AppendFormat("[%d+a0]", op.storage_index); diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index d00932033..59d0fc640 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -486,11 +486,11 @@ void TraceViewer::DrawShaderUI(Shader* shader, ShaderDisplayType display_type) { switch (display_type) { case ShaderDisplayType::kUcode: { - DrawMultilineString(shader->ucode_disassembly()); + DrawMultilineString(shader->translated_shader()->ucode_disassembly()); break; } case ShaderDisplayType::kTranslated: { - const auto& str = shader->translated_disassembly(); + const auto& str = shader->translated_shader()->GetBinaryString(); size_t i = 0; bool done = false; while (!done && i < str.size()) { @@ -566,29 +566,33 @@ void TraceViewer::DrawBlendMode(uint32_t src_blend, uint32_t dest_blend, ImGui::Text(op_template, src_str, dest_str); } -void TraceViewer::DrawTextureInfo(const Shader::SamplerDesc& desc) { +void TraceViewer::DrawTextureInfo( + const TranslatedShader::TextureBinding& texture_binding) { auto& regs = *graphics_system_->register_file(); - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + desc.fetch_slot * 6; + int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + + texture_binding.fetch_constant * 6; auto group = reinterpret_cast(®s.values[r]); auto& fetch = group->texture_fetch; if (fetch.type != 0x2) { - DrawFailedTextureInfo(desc, "Invalid fetch type"); + DrawFailedTextureInfo(texture_binding, "Invalid fetch type"); return; } TextureInfo texture_info; if (!TextureInfo::Prepare(fetch, &texture_info)) { - DrawFailedTextureInfo(desc, "Unable to parse texture fetcher info"); + DrawFailedTextureInfo(texture_binding, + "Unable to parse texture fetcher info"); return; } SamplerInfo sampler_info; - if (!SamplerInfo::Prepare(fetch, desc.tex_fetch, &sampler_info)) { - DrawFailedTextureInfo(desc, "Unable to parse sampler info"); + if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr, + &sampler_info)) { + DrawFailedTextureInfo(texture_binding, "Unable to parse sampler info"); return; } auto texture = GetTextureEntry(texture_info, sampler_info); if (!texture) { - DrawFailedTextureInfo(desc, "Failed to demand texture"); + DrawFailedTextureInfo(texture_binding, "Failed to demand texture"); return; } @@ -599,7 +603,7 @@ void TraceViewer::DrawTextureInfo(const Shader::SamplerDesc& desc) { // show viewer } ImGui::NextColumn(); - ImGui::Text("Fetch Slot: %d", desc.fetch_slot); + ImGui::Text("Fetch Slot: %u", texture_binding.fetch_constant); ImGui::Text("Guest Address: %.8X", texture_info.guest_address); switch (texture_info.dimension) { case Dimension::k1D: @@ -628,21 +632,21 @@ void TraceViewer::DrawTextureInfo(const Shader::SamplerDesc& desc) { ImGui::Columns(1); } -void TraceViewer::DrawFailedTextureInfo(const Shader::SamplerDesc& desc, - const char* message) { +void TraceViewer::DrawFailedTextureInfo( + const TranslatedShader::TextureBinding& texture_binding, + const char* message) { // TODO(benvanik): better error info/etc. ImGui::TextColored(kColorError, "ERROR: %s", message); } -void TraceViewer::DrawVertexFetcher(Shader* shader, - const Shader::BufferDesc& desc, - const xe_gpu_vertex_fetch_t* fetch) { +void TraceViewer::DrawVertexFetcher( + Shader* shader, const TranslatedShader::VertexBinding& vertex_binding, + const xe_gpu_vertex_fetch_t* fetch) { const uint8_t* addr = memory_->TranslatePhysical(fetch->address << 2); - uint32_t vertex_count = (fetch->size * 4) / desc.stride_words; + uint32_t vertex_count = (fetch->size * 4) / vertex_binding.stride_words; int column_count = 0; - for (uint32_t el_index = 0; el_index < desc.element_count; ++el_index) { - const auto& el = desc.elements[el_index]; - switch (el.format) { + for (const auto& attrib : vertex_binding.attributes) { + switch (attrib.fetch_instr.attributes.data_format) { case VertexFormat::k_32: case VertexFormat::k_32_FLOAT: ++column_count; @@ -679,9 +683,10 @@ void TraceViewer::DrawVertexFetcher(Shader* shader, (display_start)*ImGui::GetTextLineHeight()); ImGui::Columns(column_count); if (display_start <= 1) { - for (uint32_t el_index = 0; el_index < desc.element_count; ++el_index) { - const auto& el = desc.elements[el_index]; - switch (el.format) { + for (size_t el_index = 0; el_index < vertex_binding.attributes.size(); + ++el_index) { + const auto& attrib = vertex_binding.attributes[el_index]; + switch (attrib.fetch_instr.attributes.data_format) { case VertexFormat::k_32: case VertexFormat::k_32_FLOAT: ImGui::Text("e%d.x", el_index); @@ -729,13 +734,13 @@ void TraceViewer::DrawVertexFetcher(Shader* shader, ImGui::Separator(); } for (int i = display_start; i < display_end; ++i) { - const uint8_t* vstart = addr + i * desc.stride_words * 4; - for (uint32_t el_index = 0; el_index < desc.element_count; ++el_index) { - const auto& el = desc.elements[el_index]; -#define LOADEL(type, wo) \ - GpuSwap(xe::load(vstart + (el.offset_words + wo) * 4), \ + const uint8_t* vstart = addr + i * vertex_binding.stride_words * 4; + for (const auto& attrib : vertex_binding.attributes) { +#define LOADEL(type, wo) \ + GpuSwap(xe::load(vstart + \ + (attrib.fetch_instr.attributes.offset + wo) * 4), \ Endian(fetch->endian)) - switch (el.format) { + switch (attrib.fetch_instr.attributes.data_format) { case VertexFormat::k_32: ImGui::Text("%.8X", LOADEL(uint32_t, 0)); ImGui::NextColumn(); @@ -1406,16 +1411,15 @@ void TraceViewer::DrawStateUI() { if (ImGui::CollapsingHeader("Vertex Buffers")) { auto shader = command_processor->active_vertex_shader(); if (shader) { - const auto& buffer_inputs = shader->buffer_inputs(); - for (uint32_t buffer_index = 0; buffer_index < buffer_inputs.count; - ++buffer_index) { - const auto& desc = buffer_inputs.descs[buffer_index]; - int r = - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6; + const auto& vertex_bindings = + shader->translated_shader()->vertex_bindings(); + for (const auto& vertex_binding : vertex_bindings) { + int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + + (vertex_binding.fetch_constant / 3) * 6; const auto group = reinterpret_cast(®s.values[r]); const xe_gpu_vertex_fetch_t* fetch = nullptr; - switch (desc.fetch_slot % 3) { + switch (vertex_binding.fetch_constant % 3) { case 0: fetch = &group->vertex_fetch_0; break; @@ -1428,13 +1432,14 @@ void TraceViewer::DrawStateUI() { } assert_true(fetch->endian == 2); char tree_root_id[32]; - sprintf(tree_root_id, "#vertices_root_%d", desc.fetch_slot); + sprintf(tree_root_id, "#vertices_root_%d", + vertex_binding.fetch_constant); if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s", - desc.fetch_slot, fetch->address << 2, + vertex_binding.fetch_constant, fetch->address << 2, fetch->size * 4, kEndiannessNames[int(fetch->endian)])) { ImGui::BeginChild("#vertices", ImVec2(0, 300)); - DrawVertexFetcher(shader, desc, fetch); + DrawVertexFetcher(shader, vertex_binding, fetch); ImGui::EndChild(); ImGui::TreePop(); } @@ -1446,10 +1451,11 @@ void TraceViewer::DrawStateUI() { if (ImGui::CollapsingHeader("Vertex Textures")) { auto shader = command_processor->active_vertex_shader(); if (shader) { - const auto& sampler_inputs = shader->sampler_inputs(); - if (sampler_inputs.count) { - for (size_t i = 0; i < sampler_inputs.count; ++i) { - DrawTextureInfo(sampler_inputs.descs[i]); + const auto& texture_bindings = + shader->translated_shader()->texture_bindings(); + if (!texture_bindings.empty()) { + for (const auto& texture_binding : texture_bindings) { + DrawTextureInfo(texture_binding); } } else { ImGui::Text("No vertex shader samplers"); @@ -1461,10 +1467,11 @@ void TraceViewer::DrawStateUI() { if (ImGui::CollapsingHeader("Textures")) { auto shader = command_processor->active_pixel_shader(); if (shader) { - const auto& sampler_inputs = shader->sampler_inputs(); - if (sampler_inputs.count) { - for (size_t i = 0; i < sampler_inputs.count; ++i) { - DrawTextureInfo(sampler_inputs.descs[i]); + const auto& texture_bindings = + shader->translated_shader()->texture_bindings(); + if (!texture_bindings.empty()) { + for (const auto& texture_binding : texture_bindings) { + DrawTextureInfo(texture_binding); } } else { ImGui::Text("No pixel shader samplers"); diff --git a/src/xenia/gpu/trace_viewer.h b/src/xenia/gpu/trace_viewer.h index f11927b4b..c76586141 100644 --- a/src/xenia/gpu/trace_viewer.h +++ b/src/xenia/gpu/trace_viewer.h @@ -85,11 +85,13 @@ class TraceViewer { void DrawBlendMode(uint32_t src_blend, uint32_t dest_blend, uint32_t blend_op); - void DrawTextureInfo(const Shader::SamplerDesc& desc); - void DrawFailedTextureInfo(const Shader::SamplerDesc& desc, - const char* message); + void DrawTextureInfo(const TranslatedShader::TextureBinding& texture_binding); + void DrawFailedTextureInfo( + const TranslatedShader::TextureBinding& texture_binding, + const char* message); - void DrawVertexFetcher(Shader* shader, const Shader::BufferDesc& desc, + void DrawVertexFetcher(Shader* shader, + const TranslatedShader::VertexBinding& vertex_binding, const xenos::xe_gpu_vertex_fetch_t* fetch); }; diff --git a/src/xenia/gpu/ucode.h b/src/xenia/gpu/ucode.h index 9160cf80c..639448c76 100644 --- a/src/xenia/gpu/ucode.h +++ b/src/xenia/gpu/ucode.h @@ -548,6 +548,29 @@ struct VertexFetchInstruction { uint32_t src_swizzle() const { return data_.src_swiz; } bool is_src_relative() const { return data_.src_reg_am; } + // Returns true if the fetch actually fetches data. + // This may be false if it's used only to populate constants. + bool fetches_any_data() const { + uint32_t dst_swiz = data_.dst_swiz; + bool fetches_any_data = false; + for (int i = 0; i < 4; i++) { + if ((dst_swiz & 0x7) == 4) { + // 0.0 + } else if ((dst_swiz & 0x7) == 5) { + // 1.0 + } else if ((dst_swiz & 0x7) == 6) { + // ? + } else if ((dst_swiz & 0x7) == 7) { + // Previous register value. + } else { + fetches_any_data = true; + break; + } + dst_swiz >>= 3; + } + return fetches_any_data; + } + uint32_t prefetch_count() const { return data_.prefetch_count; } bool is_mini_fetch() const { return data_.is_mini_fetch == 1; } diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 98b022653..65c1f0bad 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -208,6 +208,60 @@ enum class VertexFormat : uint32_t { k_32_32_32_FLOAT = 57, }; +inline int GetVertexFormatComponentCount(VertexFormat format) { + switch (format) { + case VertexFormat::k_32: + case VertexFormat::k_32_FLOAT: + return 1; + case VertexFormat::k_16_16: + case VertexFormat::k_16_16_FLOAT: + case VertexFormat::k_32_32: + case VertexFormat::k_32_32_FLOAT: + return 2; + case VertexFormat::k_10_11_11: + case VertexFormat::k_11_11_10: + case VertexFormat::k_32_32_32_FLOAT: + return 3; + case VertexFormat::k_8_8_8_8: + case VertexFormat::k_2_10_10_10: + case VertexFormat::k_16_16_16_16: + case VertexFormat::k_16_16_16_16_FLOAT: + case VertexFormat::k_32_32_32_32: + case VertexFormat::k_32_32_32_32_FLOAT: + return 4; + default: + assert_unhandled_case(format); + return 0; + } +} + +inline int GetVertexFormatSizeInWords(VertexFormat format) { + switch (format) { + case VertexFormat::k_8_8_8_8: + case VertexFormat::k_2_10_10_10: + case VertexFormat::k_10_11_11: + case VertexFormat::k_11_11_10: + case VertexFormat::k_16_16: + case VertexFormat::k_16_16_FLOAT: + case VertexFormat::k_32: + case VertexFormat::k_32_FLOAT: + return 1; + case VertexFormat::k_16_16_16_16: + case VertexFormat::k_16_16_16_16_FLOAT: + case VertexFormat::k_32_32: + case VertexFormat::k_32_32_FLOAT: + return 2; + case VertexFormat::k_32_32_32_FLOAT: + return 3; + case VertexFormat::k_32_32_32_32: + case VertexFormat::k_32_32_32_32_FLOAT: + return 4; + default: + assert_unhandled_case(format); + return 1; + } +} + namespace xenos { typedef enum { @@ -231,37 +285,6 @@ enum class CopyCommand : uint32_t { kNull = 3, // ? }; -inline int GetVertexFormatComponentCount(VertexFormat format) { - switch (format) { - case VertexFormat::k_32: - case VertexFormat::k_32_FLOAT: - return 1; - break; - case VertexFormat::k_16_16: - case VertexFormat::k_16_16_FLOAT: - case VertexFormat::k_32_32: - case VertexFormat::k_32_32_FLOAT: - return 2; - break; - case VertexFormat::k_10_11_11: - case VertexFormat::k_11_11_10: - case VertexFormat::k_32_32_32_FLOAT: - return 3; - break; - case VertexFormat::k_8_8_8_8: - case VertexFormat::k_2_10_10_10: - case VertexFormat::k_16_16_16_16: - case VertexFormat::k_16_16_16_16_FLOAT: - case VertexFormat::k_32_32_32_32: - case VertexFormat::k_32_32_32_32_FLOAT: - return 4; - break; - default: - assert_unhandled_case(format); - return 0; - } -} - #define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \ (((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \ ((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9))