diff --git a/src/alloy/string_buffer.cc b/src/alloy/string_buffer.cc
index bb9270def..d6cebc2c4 100644
--- a/src/alloy/string_buffer.cc
+++ b/src/alloy/string_buffer.cc
@@ -62,6 +62,8 @@ void StringBuffer::AppendBytes(const uint8_t* buffer, size_t length) {
 
 const char* StringBuffer::GetString() const { return buffer_.data(); }
 
+std::string StringBuffer::to_string() { return std::string(buffer_.data()); }
+
 char* StringBuffer::ToString() { return strdup(buffer_.data()); }
 
 }  // namespace alloy
diff --git a/src/alloy/string_buffer.h b/src/alloy/string_buffer.h
index e2b294d64..549ddfe7a 100644
--- a/src/alloy/string_buffer.h
+++ b/src/alloy/string_buffer.h
@@ -31,6 +31,7 @@ class StringBuffer {
   void AppendBytes(const uint8_t* buffer, size_t length);
 
   const char* GetString() const;
+  std::string to_string();
   char* ToString();
   char* EncodeBase64();
 
diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc
index ba67ffe8c..426a0f3dc 100644
--- a/src/xenia/gpu/gl4/command_processor.cc
+++ b/src/xenia/gpu/gl4/command_processor.cc
@@ -1839,82 +1839,53 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
     uint32_t el_index = 0;
     for (uint32_t i = 0; i < desc.element_count; ++i) {
       const auto& el = desc.elements[i];
-      GLuint comp_count;
-      GLuint comp_size;
+      auto comp_count = GetVertexFormatComponentCount(el.format);
       GLenum comp_type;
       switch (el.format) {
         case VertexFormat::k_8_8_8_8:
-          comp_count = 4;
-          comp_size = 1;
           comp_type = el.is_signed ? GL_BYTE : GL_UNSIGNED_BYTE;
           break;
         case VertexFormat::k_2_10_10_10:
-          comp_count = 4;
-          comp_size = 4;
           comp_type = el.is_signed ? GL_INT_2_10_10_10_REV
                                    : GL_UNSIGNED_INT_2_10_10_10_REV;
           break;
         case VertexFormat::k_10_11_11:
-          comp_count = 3;
-          comp_size = 4;
           assert_false(el.is_signed);
           comp_type = GL_UNSIGNED_INT_10F_11F_11F_REV;
           break;
         /*case VertexFormat::k_11_11_10:
           break;*/
         case VertexFormat::k_16_16:
-          comp_count = 2;
-          comp_size = 2;
           comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
           break;
         case VertexFormat::k_16_16_FLOAT:
-          comp_count = 2;
-          comp_size = 2;
           comp_type = GL_HALF_FLOAT;
           break;
         case VertexFormat::k_16_16_16_16:
-          comp_count = 4;
-          comp_size = 2;
           comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
           break;
         case VertexFormat::k_16_16_16_16_FLOAT:
-          comp_count = 4;
-          comp_size = 2;
           comp_type = GL_HALF_FLOAT;
           break;
         case VertexFormat::k_32:
-          comp_count = 1;
-          comp_size = 4;
           comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
           break;
         case VertexFormat::k_32_32:
-          comp_count = 2;
-          comp_size = 4;
           comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
           break;
         case VertexFormat::k_32_32_32_32:
-          comp_count = 4;
-          comp_size = 4;
           comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
           break;
         case VertexFormat::k_32_FLOAT:
-          comp_count = 1;
-          comp_size = 4;
           comp_type = GL_FLOAT;
           break;
         case VertexFormat::k_32_32_FLOAT:
-          comp_count = 2;
-          comp_size = 4;
           comp_type = GL_FLOAT;
           break;
         case VertexFormat::k_32_32_32_FLOAT:
-          comp_count = 3;
-          comp_size = 4;
           comp_type = GL_FLOAT;
           break;
         case VertexFormat::k_32_32_32_32_FLOAT:
-          comp_count = 4;
-          comp_size = 4;
           comp_type = GL_FLOAT;
           break;
         default:
diff --git a/src/xenia/gpu/gl4/gl4_shader.cc b/src/xenia/gpu/gl4/gl4_shader.cc
index d987f4a06..80702cf3a 100644
--- a/src/xenia/gpu/gl4/gl4_shader.cc
+++ b/src/xenia/gpu/gl4/gl4_shader.cc
@@ -10,6 +10,7 @@
 #include <xenia/gpu/gl4/gl4_shader.h>
 
 #include <poly/math.h>
+#include <xenia/gpu/gl4/gl4_shader_translator.h>
 #include <xenia/gpu/gpu-private.h>
 
 namespace xe {
@@ -18,6 +19,9 @@ namespace gl4 {
 
 extern "C" GLEWContext* glewGetContext();
 
+// Stateful, but minimally.
+thread_local GL4ShaderTranslator shader_translator_;
+
 GL4Shader::GL4Shader(ShaderType shader_type, uint64_t data_hash,
                      const uint32_t* dword_ptr, uint32_t dword_count)
     : Shader(shader_type, data_hash, dword_ptr, dword_count), program_(0) {}
@@ -106,6 +110,13 @@ bool GL4Shader::PrepareVertexShader(
       //"  gl_Position = oPos;\n"
       "}\n";
 
+  std::string translated_source =
+      shader_translator_.TranslateVertexShader(this, program_cntl);
+  if (translated_source.empty()) {
+    PLOGE("Vertex shader failed translation");
+    return false;
+  }
+
   if (!CompileProgram(source)) {
     return false;
   }
@@ -133,6 +144,13 @@ bool GL4Shader::PreparePixelShader(
                        //"  gl_FragDepth = 0.0;\n"
                        "}\n";
 
+  std::string translated_source = shader_translator_.TranslatePixelShader(
+      this, program_cntl, vertex_shader->alloc_counts());
+  if (translated_source.empty()) {
+    PLOGE("Pixel shader failed translation");
+    return false;
+  }
+
   if (!CompileProgram(source)) {
     return false;
   }
diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.cc b/src/xenia/gpu/gl4/gl4_shader_translator.cc
new file mode 100644
index 000000000..d61437d49
--- /dev/null
+++ b/src/xenia/gpu/gl4/gl4_shader_translator.cc
@@ -0,0 +1,1662 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include <xenia/gpu/gl4/gl4_shader_translator.h>
+
+#include <poly/math.h>
+#include <xenia/gpu/gpu-private.h>
+
+namespace xe {
+namespace gpu {
+namespace gl4 {
+
+using namespace xe::gpu::ucode;
+using namespace xe::gpu::xenos;
+
+static const char chan_names[] = {
+    'x', 'y', 'z', 'w',
+    // these only apply to FETCH dst's, and we shouldn't be using them:
+    '0', '1', '?', '_',
+};
+
+const char* GetVertexFormatTypeName(const GL4Shader::BufferDescElement& el) {
+  switch (el.format) {
+    case VertexFormat::k_32:
+      return el.is_signed ? "int" : "uint";
+    case VertexFormat::k_32_FLOAT:
+      return "float";
+    case VertexFormat::k_16_16:
+    case VertexFormat::k_32_32:
+      if (el.is_normalized) {
+        return el.is_signed ? "snorm float2" : "unorm float2";
+      } else {
+        return el.is_signed ? "int2" : "uint2";
+      }
+    case VertexFormat::k_16_16_FLOAT:
+    case VertexFormat::k_32_32_FLOAT:
+      return "float2";
+    case VertexFormat::k_10_11_11:
+    case VertexFormat::k_11_11_10:
+      return "int3";  // ?
+    case VertexFormat::k_32_32_32_FLOAT:
+      return "float3";
+    case VertexFormat::k_8_8_8_8:
+    case VertexFormat::k_2_10_10_10:
+    case VertexFormat::k_16_16_16_16:
+    case VertexFormat::k_32_32_32_32:
+      if (el.is_normalized) {
+        return el.is_signed ? "snorm float4" : "unorm float4";
+      } else {
+        return el.is_signed ? "int4" : "uint4";
+      }
+    case VertexFormat::k_16_16_16_16_FLOAT:
+    case VertexFormat::k_32_32_32_32_FLOAT:
+      return "float4";
+    default:
+      XELOGE("Unknown vertex format: %d", el.format);
+      assert_always();
+      return "float4";
+  }
+}
+
+GL4ShaderTranslator::GL4ShaderTranslator()
+    : output_(kOutputCapacity), tex_fetch_index_(0), dwords_(nullptr) {}
+
+GL4ShaderTranslator::~GL4ShaderTranslator() = default;
+
+void GL4ShaderTranslator::Reset(GL4Shader* shader) {
+  output_.Reset();
+  shader_type_ = shader->type();
+  tex_fetch_index_ = 0;
+  dwords_ = shader->data();
+}
+
+std::string GL4ShaderTranslator::TranslateVertexShader(
+    GL4Shader* vertex_shader, const xe_gpu_program_cntl_t& program_cntl) {
+  Reset(vertex_shader);
+
+  // Add constants buffers.
+  // We could optimize this by only including used buffers, but the compiler
+  // seems to do a good job of doing this for us.
+  // It also does read detection, so c[512] can end up c[4] in the asm -
+  // instead of doing this optimization ourselves we could maybe just query
+  // this from the compiler.
+  Append(
+      "cbuffer float_consts : register(b0) {\n"
+      "  float4 c[512];\n"
+      "};\n");
+  // TODO(benvanik): add bool/loop constants.
+
+  AppendTextureHeader(vertex_shader->sampler_inputs());
+
+  // Transform utilities. We adjust the output position in various ways
+  // as we can't do this via D3D11 APIs.
+  Append(
+      "cbuffer vs_consts : register(b3) {\n"
+      "  float4 window;\n"             // x,y,w,h
+      "  float4 viewport_z_enable;\n"  // min,(max - min),?,enabled
+      "  float4 viewport_size;\n"      // x,y,w,h
+      "};"
+      "float4 applyViewport(float4 pos) {\n"
+      "  if (viewport_z_enable.w) {\n"
+      //"    pos.x = (pos.x + 1) * viewport_size.z * 0.5 + viewport_size.x;\n"
+      //"    pos.y = (1 - pos.y) * viewport_size.w * 0.5 + viewport_size.y;\n"
+      //"    pos.z = viewport_z_enable.x + pos.z * viewport_z_enable.y;\n"
+      // w?
+      "  } else {\n"
+      "    pos.xy = pos.xy / float2(window.z / 2.0, -window.w / 2.0) + "
+      "float2(-1.0, 1.0);\n"
+      "    pos.zw = float2(0.0, 1.0);\n"
+      "  }\n"
+      "  pos.xy += window.xy;\n"
+      "  return pos;\n"
+      "}\n");
+
+  // Add vertex shader input.
+  Append("struct VS_INPUT {\n");
+  uint32_t el_index = 0;
+  const auto& buffer_inputs = vertex_shader->buffer_inputs();
+  for (uint32_t n = 0; n < buffer_inputs.count; n++) {
+    const auto& input = buffer_inputs.descs[n];
+    for (uint32_t m = 0; m < input.element_count; m++) {
+      const auto& el = input.elements[m];
+      const char* type_name = GetVertexFormatTypeName(el);
+      const auto& fetch = el.vtx_fetch;
+      uint32_t fetch_slot = fetch.const_index * 3 + fetch.const_index_sel;
+      Append("  %s vf%u_%d : XE_VF%u;\n", type_name, fetch_slot, fetch.offset,
+             el_index);
+      el_index++;
+    }
+  }
+  Append("};\n");
+
+  // Add vertex shader output (pixel shader input).
+  const auto& alloc_counts = vertex_shader->alloc_counts();
+  Append("struct VS_OUTPUT {\n");
+  if (alloc_counts.positions) {
+    assert_true(alloc_counts.positions == 1);
+    Append("  float4 oPos : SV_POSITION;\n");
+  }
+  if (alloc_counts.params) {
+    Append("  float4 o[%d] : XE_O;\n", kMaxInterpolators);
+  }
+  if (alloc_counts.point_size) {
+    Append("  float4 oPointSize : PSIZE;\n");
+  }
+  Append("};\n");
+
+  // Vertex shader main() header.
+  Append(
+      "VS_OUTPUT main(VS_INPUT i) {\n"
+      "  VS_OUTPUT o;\n");
+
+  // Always write position, as some shaders seem to only write certain values.
+  if (alloc_counts.positions) {
+    Append("  o.oPos = float4(0.0, 0.0, 0.0, 1.0);\n");
+  }
+  if (alloc_counts.point_size) {
+    Append("  o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n");
+  }
+
+  // TODO(benvanik): remove this, if possible (though the compiler may be smart
+  //     enough to do it for us).
+  if (alloc_counts.params) {
+    for (uint32_t n = 0; n < kMaxInterpolators; n++) {
+      Append("  o.o[%d] = float4(0.0, 0.0, 0.0, 0.0);\n", n);
+    }
+  }
+
+  // Add temporaries for any registers we may use.
+  uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
+  for (uint32_t n = 0; n <= temp_regs; n++) {
+    Append("  float4 r%d = c[%d];\n", n, n);
+  }
+  Append("  float4 t;\n");
+
+  // Execute blocks.
+  const auto& execs = vertex_shader->execs();
+  for (auto it = execs.begin(); it != execs.end(); ++it) {
+    const instr_cf_exec_t& cf = *it;
+    // TODO(benvanik): figure out how sequences/jmps/loops/etc work.
+    if (!TranslateExec(cf)) {
+      return "";
+    }
+  }
+
+  // main footer.
+  if (alloc_counts.positions) {
+    Append("  o.oPos = applyViewport(o.oPos);\n");
+  }
+  Append(
+      "  return o;\n"
+      "};\n");
+
+  return output_.to_string();
+}
+
+std::string GL4ShaderTranslator::TranslatePixelShader(
+    GL4Shader* pixel_shader, const xe_gpu_program_cntl_t& program_cntl,
+    const GL4Shader::AllocCounts& alloc_counts) {
+  Reset(pixel_shader);
+
+  // We need an input VS to make decisions here.
+  // TODO(benvanik): do we need to pair VS/PS up and store the combination?
+  // If the same PS is used with different VS that output different amounts
+  // (and less than the number of required registers), things may die.
+
+  // Add constants buffers.
+  // We could optimize this by only including used buffers, but the compiler
+  // seems to do a good job of doing this for us.
+  // It also does read detection, so c[512] can end up c[4] in the asm -
+  // instead of doing this optimization ourselves we could maybe just query
+  // this from the compiler.
+  Append(
+      "cbuffer float_consts : register(b0) {\n"
+      "  float4 c[512];\n"
+      "};\n");
+  // TODO(benvanik): add bool/loop constants.
+
+  AppendTextureHeader(pixel_shader->sampler_inputs());
+
+  // Add vertex shader output (pixel shader input).
+  Append("struct VS_OUTPUT {\n");
+  if (alloc_counts.positions) {
+    assert_true(alloc_counts.positions == 1);
+    Append("  float4 oPos : SV_POSITION;\n");
+  }
+  if (alloc_counts.params) {
+    Append("  float4 o[%d] : XE_O;\n", kMaxInterpolators);
+  }
+  Append("};\n");
+
+  // Add pixel shader output.
+  Append("struct PS_OUTPUT {\n");
+  for (uint32_t n = 0; n < alloc_counts.params; n++) {
+    Append("  float4 oC%d   : SV_TARGET%d;\n", n, n);
+    if (program_cntl.ps_export_depth) {
+      // Is this per render-target?
+      Append("  float oD%d   : SV_DEPTH%d;\n", n, n);
+    }
+  }
+  Append("};\n");
+
+  // Pixel shader main() header.
+  Append(
+      "PS_OUTPUT main(VS_OUTPUT i) {\n"
+      "  PS_OUTPUT o;\n");
+  for (uint32_t n = 0; n < alloc_counts.params; n++) {
+    Append("  o.oC%d = float4(1.0, 0.0, 0.0, 1.0);\n", n);
+  }
+
+  // Add temporary registers.
+  uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
+  for (uint32_t n = 0; n <= std::max(15u, temp_regs); n++) {
+    Append("  float4 r%d = c[%d];\n", n, n + 256);
+  }
+  Append("  float4 t;\n");
+  Append("  float s;\n");  // scalar result (used for RETAIN_PREV)
+
+  // Bring registers local.
+  if (alloc_counts.params) {
+    for (uint32_t n = 0; n < kMaxInterpolators; n++) {
+      Append("  r%d = i.o[%d];\n", n, n);
+    }
+  }
+
+  // Execute blocks.
+  const auto& execs = pixel_shader->execs();
+  for (auto it = execs.begin(); it != execs.end(); ++it) {
+    const instr_cf_exec_t& cf = *it;
+    // TODO(benvanik): figure out how sequences/jmps/loops/etc work.
+    if (!TranslateExec(cf)) {
+      return "";
+    }
+  }
+
+  // main footer.
+  Append(
+      "  return o;\n"
+      "}\n");
+
+  return output_.to_string();
+}
+
+void GL4ShaderTranslator::AppendTextureHeader(
+    const GL4Shader::SamplerInputs& sampler_inputs) {
+  bool fetch_setup[32] = {false};
+
+  // 1 texture per constant slot, 1 sampler per fetch.
+  for (uint32_t n = 0; n < sampler_inputs.count; n++) {
+    const auto& input = sampler_inputs.descs[n];
+    const auto& fetch = input.tex_fetch;
+
+    // Add texture, if needed.
+    if (!fetch_setup[fetch.const_idx]) {
+      fetch_setup[fetch.const_idx] = true;
+      const char* texture_type = nullptr;
+      switch (fetch.dimension) {
+        case DIMENSION_1D:
+          texture_type = "Texture1D";
+          break;
+        default:
+        case DIMENSION_2D:
+          texture_type = "Texture2D";
+          break;
+        case DIMENSION_3D:
+          texture_type = "Texture3D";
+          break;
+        case DIMENSION_CUBE:
+          texture_type = "TextureCube";
+          break;
+      }
+      Append("%s x_texture_%d;\n", texture_type, fetch.const_idx);
+    }
+
+    // Add sampler.
+    Append("SamplerState x_sampler_%d;\n", n);
+  }
+}
+
+void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type,
+                                       uint32_t swiz, uint32_t negate,
+                                       uint32_t abs_constants) {
+  if (negate) {
+    Append("-");
+  }
+  if (type) {
+    // Register.
+    if (num & 0x80) {
+      Append("abs(");
+    }
+    Append("r%u", num & 0x7F);
+    if (num & 0x80) {
+      Append(")");
+    }
+  } else {
+    // Constant.
+    if (abs_constants) {
+      Append("abs(");
+    }
+    Append("c[%u]", is_pixel_shader() ? num + 256 : num);
+    if (abs_constants) {
+      Append(")");
+    }
+  }
+  if (swiz) {
+    Append(".");
+    for (int i = 0; i < 4; i++) {
+      Append("%c", chan_names[(swiz + i) & 0x3]);
+      swiz >>= 2;
+    }
+  }
+}
+
+void GL4ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) {
+  if (!dst_exp) {
+    // Register.
+    Append("r%u", num);
+  } else {
+    // Export.
+    switch (shader_type_) {
+      case ShaderType::kVertex:
+        switch (num) {
+          case 62:
+            Append("o.oPos");
+            break;
+          case 63:
+            Append("o.oPointSize");
+            break;
+          default:
+            // Varying.
+            Append("o.o[%u]", num);
+            ;
+            break;
+        }
+        break;
+      case ShaderType::kPixel:
+        switch (num) {
+          case 0:
+            Append("o.oC0");
+            break;
+          default:
+            // TODO(benvanik): other render targets?
+            // TODO(benvanik): depth?
+            assert_always();
+            break;
+        }
+        break;
+    }
+  }
+}
+
+void GL4ShaderTranslator::AppendDestReg(uint32_t num, uint32_t mask,
+                                        uint32_t dst_exp) {
+  if (mask != 0xF) {
+    // If masking, store to a temporary variable and clean it up later.
+    Append("t");
+  } else {
+    // Store directly to output.
+    AppendDestRegName(num, dst_exp);
+  }
+}
+
+void GL4ShaderTranslator::AppendDestRegPost(uint32_t num, uint32_t mask,
+                                            uint32_t dst_exp) {
+  if (mask != 0xF) {
+    // Masking.
+    Append("  ");
+    AppendDestRegName(num, dst_exp);
+    Append(" = float4(");
+    for (int i = 0; i < 4; i++) {
+      // TODO(benvanik): mask out values? mix in old value as temp?
+      // Append("%c", (mask & 0x1) ? chan_names[i] : 'w');
+      if (!(mask & 0x1)) {
+        AppendDestRegName(num, dst_exp);
+      } else {
+        Append("t");
+      }
+      Append(".%c", chan_names[i]);
+      mask >>= 1;
+      if (i < 3) {
+        Append(", ");
+      }
+    }
+    Append(");\n");
+  }
+}
+
+void GL4ShaderTranslator::PrintSrcReg(uint32_t num, uint32_t type,
+                                      uint32_t swiz, uint32_t negate,
+                                      uint32_t abs_constants) {
+  if (negate) {
+    Append("-");
+  }
+  if (type) {
+    if (num & 0x80) {
+      Append("|");
+    }
+    Append("R%u", num & 0x7F);
+    if (num & 0x80) {
+      Append("|");
+    }
+  } else {
+    if (abs_constants) {
+      Append("|");
+    }
+    num += is_pixel_shader() ? 256 : 0;
+    Append("C%u", num);
+    if (abs_constants) {
+      Append("|");
+    }
+  }
+  if (swiz) {
+    Append(".");
+    for (int i = 0; i < 4; i++) {
+      Append("%c", chan_names[(swiz + i) & 0x3]);
+      swiz >>= 2;
+    }
+  }
+}
+
+void GL4ShaderTranslator::PrintDstReg(uint32_t num, uint32_t mask,
+                                      uint32_t dst_exp) {
+  Append("%s%u", dst_exp ? "export" : "R", num);
+  if (mask != 0xf) {
+    Append(".");
+    for (int i = 0; i < 4; i++) {
+      Append("%c", (mask & 0x1) ? chan_names[i] : '_');
+      mask >>= 1;
+    }
+  }
+}
+
+void GL4ShaderTranslator::PrintExportComment(uint32_t num) {
+  const char* name = nullptr;
+  switch (shader_type_) {
+    case ShaderType::kVertex:
+      switch (num) {
+        case 62:
+          name = "gl_Position";
+          break;
+        case 63:
+          name = "gl_PointSize";
+          break;
+      }
+      break;
+    case ShaderType::kPixel:
+      switch (num) {
+        case 0:
+          name = "gl_FragColor";
+          break;
+      }
+      break;
+  }
+  /* if we had a symbol table here, we could look
+   * up the name of the varying..
+   */
+  if (name) {
+    Append("\t; %s", name);
+  }
+}
+
+bool GL4ShaderTranslator::TranslateALU_ADDv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  Append("(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(" + ");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(")");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_MULv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  Append("(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(" * ");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(")");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_MAXv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  if (alu.src1_reg == alu.src2_reg && alu.src1_sel == alu.src2_sel &&
+      alu.src1_swiz == alu.src2_swiz &&
+      alu.src1_reg_negate == alu.src2_reg_negate) {
+    // This is a mov.
+    AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+                 alu.abs_constants);
+  } else {
+    Append("max(");
+    AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+                 alu.abs_constants);
+    Append(", ");
+    AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+                 alu.abs_constants);
+    Append(")");
+  }
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_MINv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  Append("min(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(", ");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(")");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_SETXXv(const instr_alu_t& alu,
+                                              const char* op) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  Append("float4((");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(").x %s (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(").x ? 1.0 : 0.0, (");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(").y %s (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(").y ? 1.0 : 0.0, (");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(").z %s (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(").z ? 1.0 : 0.0, (");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(").w %s (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(").w ? 1.0 : 0.0)");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+bool GL4ShaderTranslator::TranslateALU_SETEv(const instr_alu_t& alu) {
+  return TranslateALU_SETXXv(alu, "==");
+}
+bool GL4ShaderTranslator::TranslateALU_SETGTv(const instr_alu_t& alu) {
+  return TranslateALU_SETXXv(alu, ">");
+}
+bool GL4ShaderTranslator::TranslateALU_SETGTEv(const instr_alu_t& alu) {
+  return TranslateALU_SETXXv(alu, ">=");
+}
+bool GL4ShaderTranslator::TranslateALU_SETNEv(const instr_alu_t& alu) {
+  return TranslateALU_SETXXv(alu, "!=");
+}
+
+bool GL4ShaderTranslator::TranslateALU_FRACv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  Append("frac(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(")");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_TRUNCv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  Append("trunc(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(")");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_FLOORv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  Append("floor(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(")");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_MULADDv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  Append("mad(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(", ");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(", ");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+               alu.abs_constants);
+  Append(")");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_CNDXXv(const instr_alu_t& alu,
+                                              const char* op) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  // TODO(benvanik): check argument order - could be 3 as compare and 1 and 2 as
+  // values.
+  Append("float4((");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(").x %s 0.0 ? (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(").x : (");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+               alu.abs_constants);
+  Append(").x, (");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(").y %s 0.0 ? (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(").y : (");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+               alu.abs_constants);
+  Append(").y, (");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(").z %s 0.0 ? (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(").z : (");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+               alu.abs_constants);
+  Append(").z, (");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(").w %s 0.0 ? (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(").w : (");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+               alu.abs_constants);
+  Append(").w)");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+bool GL4ShaderTranslator::TranslateALU_CNDEv(const instr_alu_t& alu) {
+  return TranslateALU_CNDXXv(alu, "==");
+}
+bool GL4ShaderTranslator::TranslateALU_CNDGTEv(const instr_alu_t& alu) {
+  return TranslateALU_CNDXXv(alu, ">=");
+}
+bool GL4ShaderTranslator::TranslateALU_CNDGTv(const instr_alu_t& alu) {
+  return TranslateALU_CNDXXv(alu, ">");
+}
+
+bool GL4ShaderTranslator::TranslateALU_DOT4v(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  Append("dot(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(", ");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(")");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_DOT3v(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  Append("dot(float4(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(").xyz, float4(");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(").xyz)");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  Append("dot(float4(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(").xy, float4(");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
+               alu.abs_constants);
+  Append(").xy) + ");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+               alu.abs_constants);
+  Append(".x");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+
+// CUBEv
+
+bool GL4ShaderTranslator::TranslateALU_MAX4v(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  Append(" = ");
+  if (alu.vector_clamp) {
+    Append("saturate(");
+  }
+  Append("max(");
+  Append("max(");
+  Append("max(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(".x, ");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(".y), ");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(".z), ");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
+               alu.abs_constants);
+  Append(".w)");
+  if (alu.vector_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return true;
+}
+
+// ...
+
+bool GL4ShaderTranslator::TranslateALU_MAXs(const instr_alu_t& alu) {
+  AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                alu.export_data);
+  Append(" = ");
+  if (alu.scalar_clamp) {
+    Append("saturate(");
+  }
+  if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) {
+    // This is a mov.
+    AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+                 alu.abs_constants);
+  } else {
+    Append("max(");
+    AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+                 alu.abs_constants);
+    Append(".x, ");
+    AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+                 alu.abs_constants);
+    Append(".y).xxxx");
+  }
+  if (alu.scalar_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                    alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_MINs(const instr_alu_t& alu) {
+  AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                alu.export_data);
+  Append(" = ");
+  if (alu.scalar_clamp) {
+    Append("saturate(");
+  }
+  Append("min(");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+               alu.abs_constants);
+  Append(".x, ");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+               alu.abs_constants);
+  Append(".y).xxxx");
+  if (alu.scalar_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                    alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_SETXXs(const instr_alu_t& alu,
+                                              const char* op) {
+  AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                alu.export_data);
+  Append(" = ");
+  if (alu.scalar_clamp) {
+    Append("saturate(");
+  }
+  Append("((");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+               alu.abs_constants);
+  Append(".x %s 0.0) ? 1.0 : 0.0).xxxx", op);
+  if (alu.scalar_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                    alu.export_data);
+  return true;
+}
+bool GL4ShaderTranslator::TranslateALU_SETEs(const instr_alu_t& alu) {
+  return TranslateALU_SETXXs(alu, "==");
+}
+bool GL4ShaderTranslator::TranslateALU_SETGTs(const instr_alu_t& alu) {
+  return TranslateALU_SETXXs(alu, ">");
+}
+bool GL4ShaderTranslator::TranslateALU_SETGTEs(const instr_alu_t& alu) {
+  return TranslateALU_SETXXs(alu, ">=");
+}
+bool GL4ShaderTranslator::TranslateALU_SETNEs(const instr_alu_t& alu) {
+  return TranslateALU_SETXXs(alu, "!=");
+}
+
+bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) {
+  AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                alu.export_data);
+  Append(" = ");
+  if (alu.scalar_clamp) {
+    Append("saturate(");
+  }
+  Append("(1.0 / ");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
+               alu.abs_constants);
+  Append(")");
+  if (alu.scalar_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                    alu.export_data);
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) {
+  AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                alu.export_data);
+  Append(" = ");
+  if (alu.scalar_clamp) {
+    Append("saturate(");
+  }
+  uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
+  uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
+  uint32_t swiz_b = (src3_swiz & 0x3);
+  uint32_t reg2 =
+      (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
+  Append("(");
+  AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants);
+  Append(".%c * ", chan_names[swiz_a]);
+  AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants);
+  Append(".%c", chan_names[swiz_b]);
+  Append(").xxxx");
+  if (alu.scalar_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                    alu.export_data);
+  return true;
+}
+bool GL4ShaderTranslator::TranslateALU_MUL_CONST_1(const instr_alu_t& alu) {
+  return TranslateALU_MUL_CONST_0(alu);
+}
+
+bool GL4ShaderTranslator::TranslateALU_ADD_CONST_0(const instr_alu_t& alu) {
+  AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                alu.export_data);
+  Append(" = ");
+  if (alu.scalar_clamp) {
+    Append("saturate(");
+  }
+  uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
+  uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
+  uint32_t swiz_b = (src3_swiz & 0x3);
+  uint32_t reg2 =
+      (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
+  Append("(");
+  AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants);
+  Append(".%c + ", chan_names[swiz_a]);
+  AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants);
+  Append(".%c", chan_names[swiz_b]);
+  Append(").xxxx");
+  if (alu.scalar_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                    alu.export_data);
+  return true;
+}
+bool GL4ShaderTranslator::TranslateALU_ADD_CONST_1(const instr_alu_t& alu) {
+  return TranslateALU_ADD_CONST_0(alu);
+}
+
+bool GL4ShaderTranslator::TranslateALU_SUB_CONST_0(const instr_alu_t& alu) {
+  AppendDestReg(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                alu.export_data);
+  Append(" = ");
+  if (alu.scalar_clamp) {
+    Append("saturate(");
+  }
+  uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
+  uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
+  uint32_t swiz_b = (src3_swiz & 0x3);
+  uint32_t reg2 =
+      (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
+  Append("(");
+  AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.abs_constants);
+  Append(".%c - ", chan_names[swiz_a]);
+  AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.abs_constants);
+  Append(".%c", chan_names[swiz_b]);
+  Append(").xxxx");
+  if (alu.scalar_clamp) {
+    Append(")");
+  }
+  Append(";\n");
+  AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
+                    alu.export_data);
+  return true;
+}
+bool GL4ShaderTranslator::TranslateALU_SUB_CONST_1(const instr_alu_t& alu) {
+  return TranslateALU_SUB_CONST_0(alu);
+}
+
+bool GL4ShaderTranslator::TranslateALU_RETAIN_PREV(const instr_alu_t& alu) {
+  // TODO(benvanik): pull out prev value in s.
+  return false;
+}
+
+typedef bool (GL4ShaderTranslator::*TranslateFn)(const instr_alu_t& alu);
+typedef struct {
+  uint32_t num_srcs;
+  const char* name;
+  TranslateFn fn;
+} TranslateInfo;
+#define ALU_INSTR(opc, num_srcs) \
+  { num_srcs, #opc, nullptr }
+#define ALU_INSTR_IMPL(opc, num_srcs) \
+  { num_srcs, #opc, &GL4ShaderTranslator::TranslateALU_##opc }
+
+bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
+  static TranslateInfo vector_alu_instrs[0x20] = {
+      ALU_INSTR_IMPL(ADDv, 2),          // 0
+      ALU_INSTR_IMPL(MULv, 2),          // 1
+      ALU_INSTR_IMPL(MAXv, 2),          // 2
+      ALU_INSTR_IMPL(MINv, 2),          // 3
+      ALU_INSTR_IMPL(SETEv, 2),         // 4
+      ALU_INSTR_IMPL(SETGTv, 2),        // 5
+      ALU_INSTR_IMPL(SETGTEv, 2),       // 6
+      ALU_INSTR_IMPL(SETNEv, 2),        // 7
+      ALU_INSTR_IMPL(FRACv, 1),         // 8
+      ALU_INSTR_IMPL(TRUNCv, 1),        // 9
+      ALU_INSTR_IMPL(FLOORv, 1),        // 10
+      ALU_INSTR_IMPL(MULADDv, 3),       // 11
+      ALU_INSTR_IMPL(CNDEv, 3),         // 12
+      ALU_INSTR_IMPL(CNDGTEv, 3),       // 13
+      ALU_INSTR_IMPL(CNDGTv, 3),        // 14
+      ALU_INSTR_IMPL(DOT4v, 2),         // 15
+      ALU_INSTR_IMPL(DOT3v, 2),         // 16
+      ALU_INSTR_IMPL(DOT2ADDv, 3),      // 17 -- ???
+      ALU_INSTR(CUBEv, 2),              // 18
+      ALU_INSTR_IMPL(MAX4v, 1),         // 19
+      ALU_INSTR(PRED_SETE_PUSHv, 2),    // 20
+      ALU_INSTR(PRED_SETNE_PUSHv, 2),   // 21
+      ALU_INSTR(PRED_SETGT_PUSHv, 2),   // 22
+      ALU_INSTR(PRED_SETGTE_PUSHv, 2),  // 23
+      ALU_INSTR(KILLEv, 2),             // 24
+      ALU_INSTR(KILLGTv, 2),            // 25
+      ALU_INSTR(KILLGTEv, 2),           // 26
+      ALU_INSTR(KILLNEv, 2),            // 27
+      ALU_INSTR(DSTv, 2),               // 28
+      ALU_INSTR(MOVAv, 1),              // 29
+  };
+  static TranslateInfo scalar_alu_instrs[0x40] = {
+      ALU_INSTR(ADDs, 1),               // 0
+      ALU_INSTR(ADD_PREVs, 1),          // 1
+      ALU_INSTR(MULs, 1),               // 2
+      ALU_INSTR(MUL_PREVs, 1),          // 3
+      ALU_INSTR(MUL_PREV2s, 1),         // 4
+      ALU_INSTR_IMPL(MAXs, 1),          // 5
+      ALU_INSTR_IMPL(MINs, 1),          // 6
+      ALU_INSTR_IMPL(SETEs, 1),         // 7
+      ALU_INSTR_IMPL(SETGTs, 1),        // 8
+      ALU_INSTR_IMPL(SETGTEs, 1),       // 9
+      ALU_INSTR_IMPL(SETNEs, 1),        // 10
+      ALU_INSTR(FRACs, 1),              // 11
+      ALU_INSTR(TRUNCs, 1),             // 12
+      ALU_INSTR(FLOORs, 1),             // 13
+      ALU_INSTR(EXP_IEEE, 1),           // 14
+      ALU_INSTR(LOG_CLAMP, 1),          // 15
+      ALU_INSTR(LOG_IEEE, 1),           // 16
+      ALU_INSTR(RECIP_CLAMP, 1),        // 17
+      ALU_INSTR(RECIP_FF, 1),           // 18
+      ALU_INSTR_IMPL(RECIP_IEEE, 1),    // 19
+      ALU_INSTR(RECIPSQ_CLAMP, 1),      // 20
+      ALU_INSTR(RECIPSQ_FF, 1),         // 21
+      ALU_INSTR(RECIPSQ_IEEE, 1),       // 22
+      ALU_INSTR(MOVAs, 1),              // 23
+      ALU_INSTR(MOVA_FLOORs, 1),        // 24
+      ALU_INSTR(SUBs, 1),               // 25
+      ALU_INSTR(SUB_PREVs, 1),          // 26
+      ALU_INSTR(PRED_SETEs, 1),         // 27
+      ALU_INSTR(PRED_SETNEs, 1),        // 28
+      ALU_INSTR(PRED_SETGTs, 1),        // 29
+      ALU_INSTR(PRED_SETGTEs, 1),       // 30
+      ALU_INSTR(PRED_SET_INVs, 1),      // 31
+      ALU_INSTR(PRED_SET_POPs, 1),      // 32
+      ALU_INSTR(PRED_SET_CLRs, 1),      // 33
+      ALU_INSTR(PRED_SET_RESTOREs, 1),  // 34
+      ALU_INSTR(KILLEs, 1),             // 35
+      ALU_INSTR(KILLGTs, 1),            // 36
+      ALU_INSTR(KILLGTEs, 1),           // 37
+      ALU_INSTR(KILLNEs, 1),            // 38
+      ALU_INSTR(KILLONEs, 1),           // 39
+      ALU_INSTR(SQRT_IEEE, 1),          // 40
+      {0, 0, false},
+      ALU_INSTR_IMPL(MUL_CONST_0, 2),  // 42
+      ALU_INSTR_IMPL(MUL_CONST_1, 2),  // 43
+      ALU_INSTR_IMPL(ADD_CONST_0, 2),  // 44
+      ALU_INSTR_IMPL(ADD_CONST_1, 2),  // 45
+      ALU_INSTR_IMPL(SUB_CONST_0, 2),  // 46
+      ALU_INSTR_IMPL(SUB_CONST_1, 2),  // 47
+      ALU_INSTR(SIN, 1),               // 48
+      ALU_INSTR(COS, 1),               // 49
+      ALU_INSTR(RETAIN_PREV, 1),       // 50
+  };
+#undef ALU_INSTR
+#undef ALU_INSTR_IMPL
+
+  if (!alu->scalar_write_mask && !alu->vector_write_mask) {
+    Append("  //   <nop>\n");
+    return true;
+  }
+
+  if (alu->vector_write_mask) {
+    // Disassemble vector op.
+    const auto& iv = vector_alu_instrs[alu->vector_opc];
+    Append("  //   %sALU:\t", sync ? "(S)" : "   ");
+    Append("%s", iv.name);
+    if (alu->pred_select & 0x2) {
+      // seems to work similar to conditional execution in ARM instruction
+      // set, so let's use a similar syntax for now:
+      Append((alu->pred_select & 0x1) ? "EQ" : "NE");
+    }
+    Append("\t");
+    PrintDstReg(alu->vector_dest, alu->vector_write_mask, alu->export_data);
+    Append(" = ");
+    if (iv.num_srcs == 3) {
+      PrintSrcReg(alu->src3_reg, alu->src3_sel, alu->src3_swiz,
+                  alu->src3_reg_negate, alu->abs_constants);
+      Append(", ");
+    }
+    PrintSrcReg(alu->src1_reg, alu->src1_sel, alu->src1_swiz,
+                alu->src1_reg_negate, alu->abs_constants);
+    if (iv.num_srcs > 1) {
+      Append(", ");
+      PrintSrcReg(alu->src2_reg, alu->src2_sel, alu->src2_swiz,
+                  alu->src2_reg_negate, alu->abs_constants);
+    }
+    if (alu->vector_clamp) {
+      Append(" CLAMP");
+    }
+    if (alu->export_data) {
+      PrintExportComment(alu->vector_dest);
+    }
+    Append("\n");
+
+    // Translate vector op.
+    if (iv.fn) {
+      Append("  ");
+      if (!(this->*iv.fn)(*alu)) {
+        return false;
+      }
+    } else {
+      Append("  // <UNIMPLEMENTED>\n");
+    }
+  }
+
+  if (alu->scalar_write_mask || !alu->vector_write_mask) {
+    // 2nd optional scalar op:
+
+    // Disassemble scalar op.
+    const auto& is = scalar_alu_instrs[alu->scalar_opc];
+    Append("  //  ");
+    Append("\t");
+    if (is.name) {
+      Append("\t    \t%s\t", is.name);
+    } else {
+      Append("\t    \tOP(%u)\t", alu->scalar_opc);
+    }
+    PrintDstReg(get_alu_scalar_dest(*alu), alu->scalar_write_mask,
+                alu->export_data);
+    Append(" = ");
+    if (is.num_srcs == 2) {
+      // ADD_CONST_0 dest, [const], [reg]
+      uint32_t src3_swiz = alu->src3_swiz & ~0x3C;
+      uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
+      uint32_t swiz_b = (src3_swiz & 0x3);
+      PrintSrcReg(alu->src3_reg, 0, 0, alu->src3_reg_negate,
+                  alu->abs_constants);
+      Append(".%c", chan_names[swiz_a]);
+      Append(", ");
+      uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) |
+                      (alu->src3_sel << 1);
+      PrintSrcReg(reg2, 1, 0, alu->src3_reg_negate, alu->abs_constants);
+      Append(".%c", chan_names[swiz_b]);
+    } else {
+      PrintSrcReg(alu->src3_reg, alu->src3_sel, alu->src3_swiz,
+                  alu->src3_reg_negate, alu->abs_constants);
+    }
+    if (alu->scalar_clamp) {
+      Append(" CLAMP");
+    }
+    if (alu->export_data) {
+      PrintExportComment(get_alu_scalar_dest(*alu));
+    }
+    Append("\n");
+
+    // Translate scalar op.
+    if (is.fn) {
+      Append("  ");
+      if (!(this->*is.fn)(*alu)) {
+        return false;
+      }
+    } else {
+      Append("  // <UNIMPLEMENTED>\n");
+    }
+  }
+
+  return true;
+}
+
+void GL4ShaderTranslator::PrintDestFecth(uint32_t dst_reg, uint32_t dst_swiz) {
+  Append("\tR%u.", dst_reg);
+  for (int i = 0; i < 4; i++) {
+    Append("%c", chan_names[dst_swiz & 0x7]);
+    dst_swiz >>= 3;
+  }
+}
+
+void GL4ShaderTranslator::AppendFetchDest(uint32_t dst_reg, uint32_t dst_swiz) {
+  Append("r%u.", dst_reg);
+  for (int i = 0; i < 4; i++) {
+    Append("%c", chan_names[dst_swiz & 0x7]);
+    dst_swiz >>= 3;
+  }
+}
+
+bool GL4ShaderTranslator::TranslateExec(const instr_cf_exec_t& cf) {
+  static const struct {
+    const char* name;
+  } cf_instructions[] = {
+#define INSTR(opc, fxn) \
+  { #opc }
+      INSTR(NOP, print_cf_nop), INSTR(EXEC, print_cf_exec),
+      INSTR(EXEC_END, print_cf_exec), INSTR(COND_EXEC, print_cf_exec),
+      INSTR(COND_EXEC_END, print_cf_exec), INSTR(COND_PRED_EXEC, print_cf_exec),
+      INSTR(COND_PRED_EXEC_END, print_cf_exec),
+      INSTR(LOOP_START, print_cf_loop), INSTR(LOOP_END, print_cf_loop),
+      INSTR(COND_CALL, print_cf_jmp_call), INSTR(RETURN, print_cf_jmp_call),
+      INSTR(COND_JMP, print_cf_jmp_call), INSTR(ALLOC, print_cf_alloc),
+      INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec),
+      INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec),
+      INSTR(MARK_VS_FETCH_DONE, print_cf_nop),  // ??
+#undef INSTR
+  };
+
+  Append("  // %s ADDR(0x%x) CNT(0x%x)", cf_instructions[cf.opc].name,
+         cf.address, cf.count);
+  if (cf.yeild) {
+    Append(" YIELD");
+  }
+  uint8_t vc = cf.vc_hi | (cf.vc_lo << 2);
+  if (vc) {
+    Append(" VC(0x%x)", vc);
+  }
+  if (cf.bool_addr) {
+    Append(" BOOL_ADDR(0x%x)", cf.bool_addr);
+  }
+  if (cf.address_mode == ABSOLUTE_ADDR) {
+    Append(" ABSOLUTE_ADDR");
+  }
+  if (cf.is_cond_exec()) {
+    Append(" COND(%d)", cf.condition);
+  }
+  Append("\n");
+
+  uint32_t sequence = cf.serialize;
+  for (uint32_t i = 0; i < cf.count; i++) {
+    uint32_t alu_off = (cf.address + i);
+    int sync = sequence & 0x2;
+    if (sequence & 0x1) {
+      const instr_fetch_t* fetch =
+          (const instr_fetch_t*)(dwords_ + alu_off * 3);
+      switch (fetch->opc) {
+        case VTX_FETCH:
+          if (!TranslateVertexFetch(&fetch->vtx, sync)) {
+            return false;
+          }
+          break;
+        case TEX_FETCH:
+          if (!TranslateTextureFetch(&fetch->tex, sync)) {
+            return false;
+          }
+          break;
+        case TEX_GET_BORDER_COLOR_FRAC:
+        case TEX_GET_COMP_TEX_LOD:
+        case TEX_GET_GRADIENTS:
+        case TEX_GET_WEIGHTS:
+        case TEX_SET_TEX_LOD:
+        case TEX_SET_GRADIENTS_H:
+        case TEX_SET_GRADIENTS_V:
+        default:
+          assert_always();
+          break;
+      }
+    } else {
+      const instr_alu_t* alu = (const instr_alu_t*)(dwords_ + alu_off * 3);
+      if (!TranslateALU(alu, sync)) {
+        return false;
+      }
+    }
+    sequence >>= 2;
+  }
+
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx,
+                                               int sync) {
+  static const struct {
+    const char* name;
+  } fetch_types[0xff] = {
+#define TYPE(id) \
+  { #id }
+      TYPE(FMT_1_REVERSE),  // 0
+      {0},
+      TYPE(FMT_8),  // 2
+      {0},
+      {0},
+      {0},
+      TYPE(FMT_8_8_8_8),     // 6
+      TYPE(FMT_2_10_10_10),  // 7
+      {0},
+      {0},
+      TYPE(FMT_8_8),  // 10
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      TYPE(FMT_16),           // 24
+      TYPE(FMT_16_16),        // 25
+      TYPE(FMT_16_16_16_16),  // 26
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      TYPE(FMT_32),                 // 33
+      TYPE(FMT_32_32),              // 34
+      TYPE(FMT_32_32_32_32),        // 35
+      TYPE(FMT_32_FLOAT),           // 36
+      TYPE(FMT_32_32_FLOAT),        // 37
+      TYPE(FMT_32_32_32_32_FLOAT),  // 38
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      TYPE(FMT_32_32_32_FLOAT),  // 57
+#undef TYPE
+  };
+
+  // Disassemble.
+  Append("  //   %sFETCH:\t", sync ? "(S)" : "   ");
+  if (vtx->pred_select) {
+    Append(vtx->pred_condition ? "EQ" : "NE");
+  }
+  PrintDestFecth(vtx->dst_reg, vtx->dst_swiz);
+  Append(" = R%u.", vtx->src_reg);
+  Append("%c", chan_names[vtx->src_swiz & 0x3]);
+  if (fetch_types[vtx->format].name) {
+    Append(" %s", fetch_types[vtx->format].name);
+  } else {
+    Append(" TYPE(0x%x)", vtx->format);
+  }
+  Append(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED");
+  if (!vtx->num_format_all) {
+    Append(" NORMALIZED");
+  }
+  Append(" STRIDE(%u)", vtx->stride);
+  if (vtx->offset) {
+    Append(" OFFSET(%u)", vtx->offset);
+  }
+  Append(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel);
+  if (true) {
+    // XXX
+    Append(" src_reg_am=%u", vtx->src_reg_am);
+    Append(" dst_reg_am=%u", vtx->dst_reg_am);
+    Append(" num_format_all=%u", vtx->num_format_all);
+    Append(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all);
+    Append(" exp_adjust_all=%u", vtx->exp_adjust_all);
+  }
+  Append("\n");
+
+  // Translate.
+  Append("  ");
+  Append("r%u.xyzw", vtx->dst_reg);
+  Append(" = float4(");
+  uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
+  // TODO(benvanik): detect xyzw = xyzw, etc.
+  // TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc
+  uint32_t component_count =
+      GetVertexFormatComponentCount(static_cast<VertexFormat>(vtx->format));
+  uint32_t dst_swiz = vtx->dst_swiz;
+  for (int i = 0; i < 4; i++) {
+    if ((dst_swiz & 0x7) == 4) {
+      Append("0.0");
+    } else if ((dst_swiz & 0x7) == 5) {
+      Append("1.0");
+    } else if ((dst_swiz & 0x7) == 6) {
+      // ?
+      Append("?");
+    } else if ((dst_swiz & 0x7) == 7) {
+      Append("r%u.%c", vtx->dst_reg, chan_names[i]);
+    } else {
+      Append("i.vf%u_%d.%c", fetch_slot, vtx->offset,
+             chan_names[dst_swiz & 0x3]);
+    }
+    if (i < 3) {
+      Append(", ");
+    }
+    dst_swiz >>= 3;
+  }
+  Append(");\n");
+  return true;
+}
+
+bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
+                                                int sync) {
+  int src_component_count = 0;
+  switch (tex->dimension) {
+    case DIMENSION_1D:
+      src_component_count = 1;
+      break;
+    default:
+    case DIMENSION_2D:
+      src_component_count = 2;
+      break;
+    case DIMENSION_3D:
+      src_component_count = 3;
+      break;
+    case DIMENSION_CUBE:
+      src_component_count = 3;
+      break;
+  }
+
+  // Disassemble.
+  static const char* filter[] = {
+      "POINT",    // TEX_FILTER_POINT
+      "LINEAR",   // TEX_FILTER_LINEAR
+      "BASEMAP",  // TEX_FILTER_BASEMAP
+  };
+  static const char* aniso_filter[] = {
+      "DISABLED",  // ANISO_FILTER_DISABLED
+      "MAX_1_1",   // ANISO_FILTER_MAX_1_1
+      "MAX_2_1",   // ANISO_FILTER_MAX_2_1
+      "MAX_4_1",   // ANISO_FILTER_MAX_4_1
+      "MAX_8_1",   // ANISO_FILTER_MAX_8_1
+      "MAX_16_1",  // ANISO_FILTER_MAX_16_1
+  };
+  static const char* arbitrary_filter[] = {
+      "2x4_SYM",   // ARBITRARY_FILTER_2X4_SYM
+      "2x4_ASYM",  // ARBITRARY_FILTER_2X4_ASYM
+      "4x2_SYM",   // ARBITRARY_FILTER_4X2_SYM
+      "4x2_ASYM",  // ARBITRARY_FILTER_4X2_ASYM
+      "4x4_SYM",   // ARBITRARY_FILTER_4X4_SYM
+      "4x4_ASYM",  // ARBITRARY_FILTER_4X4_ASYM
+  };
+  static const char* sample_loc[] = {
+      "CENTROID",  // SAMPLE_CENTROID
+      "CENTER",    // SAMPLE_CENTER
+  };
+  uint32_t src_swiz = tex->src_swiz;
+  Append("  //   %sFETCH:\t", sync ? "(S)" : "   ");
+  if (tex->pred_select) {
+    Append(tex->pred_condition ? "EQ" : "NE");
+  }
+  PrintDestFecth(tex->dst_reg, tex->dst_swiz);
+  Append(" = R%u.", tex->src_reg);
+  for (int i = 0; i < src_component_count; i++) {
+    Append("%c", chan_names[src_swiz & 0x3]);
+    src_swiz >>= 2;
+  }
+  Append(" CONST(%u)", tex->const_idx);
+  if (tex->fetch_valid_only) {
+    Append(" VALID_ONLY");
+  }
+  if (tex->tx_coord_denorm) {
+    Append(" DENORM");
+  }
+  if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST) {
+    Append(" MAG(%s)", filter[tex->mag_filter]);
+  }
+  if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST) {
+    Append(" MIN(%s)", filter[tex->min_filter]);
+  }
+  if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST) {
+    Append(" MIP(%s)", filter[tex->mip_filter]);
+  }
+  if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST) {
+    Append(" ANISO(%s)", aniso_filter[tex->aniso_filter]);
+  }
+  if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST) {
+    Append(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]);
+  }
+  if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST) {
+    Append(" VOL_MAG(%s)", filter[tex->vol_mag_filter]);
+  }
+  if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST) {
+    Append(" VOL_MIN(%s)", filter[tex->vol_min_filter]);
+  }
+  if (!tex->use_comp_lod) {
+    Append(" LOD(%u)", tex->use_comp_lod);
+    Append(" LOD_BIAS(%u)", tex->lod_bias);
+  }
+  if (tex->use_reg_lod) {
+    Append(" REG_LOD(%u)", tex->use_reg_lod);
+  }
+  if (tex->use_reg_gradients) {
+    Append(" USE_REG_GRADIENTS");
+  }
+  Append(" LOCATION(%s)", sample_loc[tex->sample_location]);
+  if (tex->offset_x || tex->offset_y || tex->offset_z) {
+    Append(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z);
+  }
+  Append("\n");
+
+  // Translate.
+  Append("  t = ");
+  Append("x_texture_%d.Sample(x_sampler_%d, r%u.", tex->const_idx,
+         tex_fetch_index_++,  // hacky way to line up to tex buffers
+         tex->src_reg);
+  src_swiz = tex->src_swiz;
+  for (int i = 0; i < src_component_count; i++) {
+    Append("%c", chan_names[src_swiz & 0x3]);
+    src_swiz >>= 2;
+  }
+  Append(");\n");
+
+  Append("  r%u.xyzw = float4(", tex->dst_reg);
+  uint32_t dst_swiz = tex->dst_swiz;
+  for (int i = 0; i < 4; i++) {
+    if (i) {
+      Append(", ");
+    }
+    if ((dst_swiz & 0x7) == 4) {
+      Append("0.0");
+    } else if ((dst_swiz & 0x7) == 5) {
+      Append("1.0");
+    } else if ((dst_swiz & 0x7) == 6) {
+      // ?
+      Append("?");
+    } else if ((dst_swiz & 0x7) == 7) {
+      Append("r%u.%c", tex->dst_reg, chan_names[i]);
+    } else {
+      Append("t.%c", chan_names[dst_swiz & 0x3]);
+    }
+    dst_swiz >>= 3;
+  }
+  Append(");\n");
+  return true;
+}
+
+}  // namespace gl4
+}  // namespace gpu
+}  // namespace xe
diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.h b/src/xenia/gpu/gl4/gl4_shader_translator.h
new file mode 100644
index 000000000..22a9cdfbb
--- /dev/null
+++ b/src/xenia/gpu/gl4/gl4_shader_translator.h
@@ -0,0 +1,123 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_GL4_GL4_SHADER_TRANSLATOR_H_
+#define XENIA_GPU_GL4_GL4_SHADER_TRANSLATOR_H_
+
+#include <string>
+
+#include <alloy/string_buffer.h>
+#include <xenia/common.h>
+#include <xenia/gpu/gl4/gl_context.h>
+#include <xenia/gpu/gl4/gl4_shader.h>
+#include <xenia/gpu/ucode.h>
+#include <xenia/gpu/xenos.h>
+
+namespace xe {
+namespace gpu {
+namespace gl4 {
+
+class GL4ShaderTranslator {
+ public:
+  static const uint32_t kMaxInterpolators = 16;
+
+  GL4ShaderTranslator();
+  ~GL4ShaderTranslator();
+
+  std::string TranslateVertexShader(
+      GL4Shader* vertex_shader,
+      const xenos::xe_gpu_program_cntl_t& program_cntl);
+  std::string TranslatePixelShader(
+      GL4Shader* pixel_shader, const xenos::xe_gpu_program_cntl_t& program_cntl,
+      const GL4Shader::AllocCounts& alloc_counts);
+
+ protected:
+  ShaderType shader_type_;
+  uint32_t tex_fetch_index_;
+  const uint32_t* dwords_;
+
+  static const int kOutputCapacity = 64 * 1024;
+  alloy::StringBuffer output_;
+
+  bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; }
+  bool is_pixel_shader() const { return shader_type_ == ShaderType::kPixel; }
+
+  void Reset(GL4Shader* shader);
+  void Append(const char* format, ...) {
+    va_list args;
+    va_start(args, format);
+    output_.AppendVarargs(format, args);
+    va_end(args);
+  }
+
+  void AppendTextureHeader(const GL4Shader::SamplerInputs& sampler_inputs);
+
+  void AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
+                    uint32_t abs);
+  void AppendDestRegName(uint32_t num, uint32_t dst_exp);
+  void AppendDestReg(uint32_t num, uint32_t mask, uint32_t dst_exp);
+  void AppendDestRegPost(uint32_t num, uint32_t mask, uint32_t dst_exp);
+  void PrintSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
+                   uint32_t abs);
+  void PrintDstReg(uint32_t num, uint32_t mask, uint32_t dst_exp);
+  void PrintExportComment(uint32_t num);
+
+  bool TranslateALU(const ucode::instr_alu_t* alu, int sync);
+  bool TranslateALU_ADDv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_MULv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_MAXv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_MINv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_SETXXv(const ucode::instr_alu_t& alu, const char* op);
+  bool TranslateALU_SETEv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_SETGTv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_SETGTEv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_SETNEv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_FRACv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_TRUNCv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_FLOORv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_MULADDv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_CNDXXv(const ucode::instr_alu_t& alu, const char* op);
+  bool TranslateALU_CNDEv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_CNDGTEv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_CNDGTv(const ucode::instr_alu_t& alu);
+  bool TranslateALU_DOT4v(const ucode::instr_alu_t& alu);
+  bool TranslateALU_DOT3v(const ucode::instr_alu_t& alu);
+  bool TranslateALU_DOT2ADDv(const ucode::instr_alu_t& alu);
+  // CUBEv
+  bool TranslateALU_MAX4v(const ucode::instr_alu_t& alu);
+  // ...
+  bool TranslateALU_MAXs(const ucode::instr_alu_t& alu);
+  bool TranslateALU_MINs(const ucode::instr_alu_t& alu);
+  bool TranslateALU_SETXXs(const ucode::instr_alu_t& alu, const char* op);
+  bool TranslateALU_SETEs(const ucode::instr_alu_t& alu);
+  bool TranslateALU_SETGTs(const ucode::instr_alu_t& alu);
+  bool TranslateALU_SETGTEs(const ucode::instr_alu_t& alu);
+  bool TranslateALU_SETNEs(const ucode::instr_alu_t& alu);
+  bool TranslateALU_RECIP_IEEE(const ucode::instr_alu_t& alu);
+  bool TranslateALU_MUL_CONST_0(const ucode::instr_alu_t& alu);
+  bool TranslateALU_MUL_CONST_1(const ucode::instr_alu_t& alu);
+  bool TranslateALU_ADD_CONST_0(const ucode::instr_alu_t& alu);
+  bool TranslateALU_ADD_CONST_1(const ucode::instr_alu_t& alu);
+  bool TranslateALU_SUB_CONST_0(const ucode::instr_alu_t& alu);
+  bool TranslateALU_SUB_CONST_1(const ucode::instr_alu_t& alu);
+  bool TranslateALU_RETAIN_PREV(const ucode::instr_alu_t& alu);
+
+  void PrintDestFecth(uint32_t dst_reg, uint32_t dst_swiz);
+  void AppendFetchDest(uint32_t dst_reg, uint32_t dst_swiz);
+
+  bool TranslateExec(const ucode::instr_cf_exec_t& cf);
+  bool TranslateVertexFetch(const ucode::instr_fetch_vtx_t* vtx, int sync);
+  bool TranslateTextureFetch(const ucode::instr_fetch_tex_t* tex, int sync);
+};
+
+}  // namespace gl4
+}  // namespace gpu
+}  // namespace xe
+
+#endif  // XENIA_GPU_GL4_GL4_SHADER_TRANSLATOR_H_
diff --git a/src/xenia/gpu/gl4/sources.gypi b/src/xenia/gpu/gl4/sources.gypi
index 1bba6e3ba..3f0c349ce 100644
--- a/src/xenia/gpu/gl4/sources.gypi
+++ b/src/xenia/gpu/gl4/sources.gypi
@@ -12,6 +12,8 @@
     'gl4_graphics_system.h',
     'gl4_shader.cc',
     'gl4_shader.h',
+    'gl4_shader_translator.cc',
+    'gl4_shader_translator.h',
     'gl_context.cc',
     'gl_context.h',
   ],
diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h
index 05438657c..820080133 100644
--- a/src/xenia/gpu/shader.h
+++ b/src/xenia/gpu/shader.h
@@ -30,6 +30,8 @@ class Shader {
     return translated_disassembly_;
   }
 
+  const uint32_t* data() const { return data_.data(); }
+
   struct BufferDescElement {
     ucode::instr_fetch_vtx_t vtx_fetch;
     xenos::VertexFormat format;
diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h
index 668f94aae..f23ec50f4 100644
--- a/src/xenia/gpu/xenos.h
+++ b/src/xenia/gpu/xenos.h
@@ -140,6 +140,36 @@ enum class VertexFormat : uint32_t {
   k_32_32_32_32_FLOAT = 38,
   k_32_32_32_FLOAT = 57,
 };
+inline int GetVertexFormatComponentCount(VertexFormat format) {
+  switch (format) {
+    case VertexFormat::k_32:
+    case VertexFormat::k_32_FLOAT:
+      return 1;
+      break;
+    case VertexFormat::k_16_16:
+    case VertexFormat::k_16_16_FLOAT:
+    case VertexFormat::k_32_32:
+    case VertexFormat::k_32_32_FLOAT:
+      return 2;
+      break;
+    case VertexFormat::k_10_11_11:
+    case VertexFormat::k_11_11_10:
+    case VertexFormat::k_32_32_32_FLOAT:
+      return 3;
+      break;
+    case VertexFormat::k_8_8_8_8:
+    case VertexFormat::k_2_10_10_10:
+    case VertexFormat::k_16_16_16_16:
+    case VertexFormat::k_16_16_16_16_FLOAT:
+    case VertexFormat::k_32_32_32_32:
+    case VertexFormat::k_32_32_32_32_FLOAT:
+      return 4;
+      break;
+    default:
+      assert_unhandled_case(format);
+      return 0;
+  }
+}
 
 #define XE_GPU_MAKE_SWIZZLE(x, y, z, w)                        \
   (((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \