diff --git a/src/xenia/apu/xma_context.cc b/src/xenia/apu/xma_context.cc
index a26f0ab9d..e4b19a7de 100644
--- a/src/xenia/apu/xma_context.cc
+++ b/src/xenia/apu/xma_context.cc
@@ -122,7 +122,7 @@ void XmaContext::ConvertFrame(const uint8_t** samples, bool is_two_channel,
       auto in = reinterpret_cast<const float*>(samples[j]);
 
       // Raw samples sometimes aren't within [-1, 1]
-      float scaled_sample = xe::saturate_signed(in[i]) * scale;
+      float scaled_sample = xe::clamp_float(in[i], -1.0f, 1.0f) * scale;
 
       // Convert the sample and output it in big endian.
       auto sample = static_cast<int16_t>(scaled_sample);
diff --git a/src/xenia/base/math.h b/src/xenia/base/math.h
index 5d81bee9a..25aa89ede 100644
--- a/src/xenia/base/math.h
+++ b/src/xenia/base/math.h
@@ -72,20 +72,22 @@ constexpr T round_up(T value, V multiple, bool force_non_zero = true) {
   return (value + multiple - 1) / multiple * multiple;
 }
 
-// Using the same conventions as in shading languages, returning 0 for NaN.
-// std::max is `a < b ? b : a`, thus in case of NaN, the first argument is
-// always returned. Also -0 is not < +0, so +0 is also chosen for it.
+// For NaN, returns min_value (or, if it's NaN too, max_value).
+// If either of the boundaries is zero, and if the value is at that boundary or
+// exceeds it, the result will have the sign of that boundary. If both
+// boundaries are zero, which sign is selected among the argument signs is not
+// explicitly defined.
 template <typename T>
-constexpr T saturate_unsigned(T value) {
-  return std::min(static_cast<T>(1.0f), std::max(static_cast<T>(0.0f), value));
+T clamp_float(T value, T min_value, T max_value) {
+  float clamped_to_min = std::isgreater(value, min_value) ? value : min_value;
+  return std::isless(clamped_to_min, max_value) ? clamped_to_min : max_value;
 }
 
-// This diverges from the GPU NaN rules for signed normalized formats (NaN
-// should be converted to 0, not to -1), but this expectation is not needed most
-// of time, and cannot be met for free (unlike for 0...1 clamping).
+// Using the same conventions as in shading languages, returning 0 for NaN.
+// 0 is always returned as positive.
 template <typename T>
-constexpr T saturate_signed(T value) {
-  return std::min(static_cast<T>(1.0f), std::max(static_cast<T>(-1.0f), value));
+T saturate(T value) {
+  return clamp_float(value, static_cast<T>(0.0f), static_cast<T>(1.0f));
 }
 
 // Gets the next power of two value that is greater than or equal to the given
@@ -365,12 +367,6 @@ inline uint64_t rotate_right(uint64_t v, uint8_t sh) {
 }
 #endif  // XE_PLATFORM_WIN32
 
-template <typename T>
-T clamp(T value, T min_value, T max_value) {
-  const T t = value < min_value ? min_value : value;
-  return t > max_value ? max_value : t;
-}
-
 #if XE_ARCH_AMD64
 // Utilities for SSE values.
 template <int N>
diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h
index f7b1398c5..bf26ce220 100644
--- a/src/xenia/base/memory.h
+++ b/src/xenia/base/memory.h
@@ -16,12 +16,37 @@
 #include <functional>
 #include <string>
 #include <string_view>
+#include <type_traits>
 
 #include "xenia/base/byte_order.h"
 
 namespace xe {
 namespace memory {
 
+// For variable declarations (not return values or `this` pointer).
+// Not propagated.
+#define XE_RESTRICT_VAR __restrict
+
+// Aliasing-safe bit reinterpretation.
+// For more complex cases such as non-trivially-copyable types, write copying
+// code respecting the requirements for them externally instead of using these
+// functions.
+
+template <typename Dst, typename Src>
+void Reinterpret(Dst& XE_RESTRICT_VAR dst, const Src& XE_RESTRICT_VAR src) {
+  static_assert(sizeof(Dst) == sizeof(Src));
+  static_assert(std::is_trivially_copyable_v<Dst>);
+  static_assert(std::is_trivially_copyable_v<Src>);
+  std::memcpy(&dst, &src, sizeof(Dst));
+}
+
+template <typename Dst, typename Src>
+Dst Reinterpret(const Src& XE_RESTRICT_VAR src) {
+  Dst dst;
+  Reinterpret(dst, src);
+  return dst;
+}
+
 #if XE_PLATFORM_ANDROID
 void AndroidInitialize();
 void AndroidShutdown();
diff --git a/src/xenia/base/testing/chrono_test.cc b/src/xenia/base/testing/chrono_test.cc
index a63aac53c..f35f17ed8 100644
--- a/src/xenia/base/testing/chrono_test.cc
+++ b/src/xenia/base/testing/chrono_test.cc
@@ -107,10 +107,11 @@ TEST_CASE("WinSystemClock <-> XSystemClock", "[clock_cast]") {
     auto error2 = xsys.time_since_epoch() - wxsys.time_since_epoch();
     auto error3 = wsys - wxsys;
 
-    REQUIRE(error1 < 10ms);
-    REQUIRE(error1 > -10ms);
-    REQUIRE(error2 < 10ms);
-    REQUIRE(error2 > -10ms);
+    // In AppVeyor, the difference often can be as large as roughly 16ms.
+    REQUIRE(error1 < 20ms);
+    REQUIRE(error1 > -20ms);
+    REQUIRE(error2 < 20ms);
+    REQUIRE(error2 > -20ms);
     REQUIRE(error3 < duration);
     REQUIRE(error3 > -duration);
   }
diff --git a/src/xenia/debug/ui/debug_window.cc b/src/xenia/debug/ui/debug_window.cc
index 89c606769..eb10a5fa7 100644
--- a/src/xenia/debug/ui/debug_window.cc
+++ b/src/xenia/debug/ui/debug_window.cc
@@ -182,7 +182,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
                          ImVec2(kSplitterWidth, top_panes_height));
   if (ImGui::IsItemActive()) {
     function_pane_width += io.MouseDelta.x;
-    function_pane_width = xe::clamp(function_pane_width, 30.0f, FLT_MAX);
+    function_pane_width = xe::clamp_float(function_pane_width, 30.0f, FLT_MAX);
   }
   ImGui::SameLine();
   ImGui::BeginChild("##source_pane",
@@ -194,7 +194,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
                          ImVec2(kSplitterWidth, top_panes_height));
   if (ImGui::IsItemActive()) {
     source_pane_width += io.MouseDelta.x;
-    source_pane_width = xe::clamp(source_pane_width, 30.0f, FLT_MAX);
+    source_pane_width = xe::clamp_float(source_pane_width, 30.0f, FLT_MAX);
   }
   ImGui::SameLine();
   ImGui::BeginChild("##registers_pane",
@@ -206,7 +206,8 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
                          ImVec2(kSplitterWidth, top_panes_height));
   if (ImGui::IsItemActive()) {
     registers_pane_width += io.MouseDelta.x;
-    registers_pane_width = xe::clamp(registers_pane_width, 30.0f, FLT_MAX);
+    registers_pane_width =
+        xe::clamp_float(registers_pane_width, 30.0f, FLT_MAX);
   }
   ImGui::SameLine();
   ImGui::BeginChild("##right_pane", ImVec2(0, top_panes_height), true);
@@ -234,7 +235,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
   ImGui::InvisibleButton("##hsplitter0", ImVec2(-1, kSplitterWidth));
   if (ImGui::IsItemActive()) {
     bottom_panes_height -= io.MouseDelta.y;
-    bottom_panes_height = xe::clamp(bottom_panes_height, 30.0f, FLT_MAX);
+    bottom_panes_height = xe::clamp_float(bottom_panes_height, 30.0f, FLT_MAX);
   }
   ImGui::BeginChild("##log_pane", ImVec2(log_pane_width, bottom_panes_height),
                     true);
@@ -245,7 +246,8 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
                          ImVec2(kSplitterWidth, bottom_panes_height));
   if (ImGui::IsItemActive()) {
     breakpoints_pane_width -= io.MouseDelta.x;
-    breakpoints_pane_width = xe::clamp(breakpoints_pane_width, 30.0f, FLT_MAX);
+    breakpoints_pane_width =
+        xe::clamp_float(breakpoints_pane_width, 30.0f, FLT_MAX);
   }
   ImGui::SameLine();
   ImGui::BeginChild("##breakpoints_pane", ImVec2(0, 0), true);
diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc
index 3aee2ba87..8338d0dd2 100644
--- a/src/xenia/gpu/command_processor.cc
+++ b/src/xenia/gpu/command_processor.cc
@@ -455,9 +455,9 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
   // Scratch register writeback.
   if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
     uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
-    if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK].u32) {
+    if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK]) {
       // Enabled - write to address.
-      uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR].u32;
+      uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR];
       uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
       xe::store_and_swap<uint32_t>(memory_->TranslatePhysical(mem_addr), value);
     }
@@ -467,7 +467,7 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
       // This will block the command processor the next time it WAIT_MEM_REGs
       // and allow us to synchronize the memory.
       case XE_GPU_REG_COHER_STATUS_HOST: {
-        regs.values[index].u32 |= UINT32_C(0x80000000);
+        regs.values[index] |= UINT32_C(0x80000000);
       } break;
 
       case XE_GPU_REG_DC_LUT_RW_INDEX: {
@@ -478,12 +478,12 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
 
       case XE_GPU_REG_DC_LUT_SEQ_COLOR: {
         // Should be in the 256-entry table writing mode.
-        assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
-        auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
+        assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
+        auto gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
         // DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write
         // enable mask is blue, green, red.
         bool write_gamma_ramp_component =
-            (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 &
+            (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
              (UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
         if (write_gamma_ramp_component) {
           reg::DC_LUT_30_COLOR& gamma_ramp_entry =
@@ -505,7 +505,11 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
         }
         if (++gamma_ramp_rw_component_ >= 3) {
           gamma_ramp_rw_component_ = 0;
-          ++gamma_ramp_rw_index.rw_index;
+          reg::DC_LUT_RW_INDEX new_gamma_ramp_rw_index = gamma_ramp_rw_index;
+          ++new_gamma_ramp_rw_index.rw_index;
+          WriteRegister(
+              XE_GPU_REG_DC_LUT_RW_INDEX,
+              xe::memory::Reinterpret<uint32_t>(new_gamma_ramp_rw_index));
         }
         if (write_gamma_ramp_component) {
           OnGammaRamp256EntryTableValueWritten();
@@ -514,14 +518,14 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
 
       case XE_GPU_REG_DC_LUT_PWL_DATA: {
         // Should be in the PWL writing mode.
-        assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
-        auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
+        assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
+        auto gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
         // Bit 7 of the index is ignored for PWL.
         uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F;
         // DC_LUT_PWL_DATA is likely in the red, green, blue order because
         // DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red.
         bool write_gamma_ramp_component =
-            (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 &
+            (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
              (UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
         if (write_gamma_ramp_component) {
           reg::DC_LUT_PWL_DATA& gamma_ramp_entry =
@@ -534,13 +538,17 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
         }
         if (++gamma_ramp_rw_component_ >= 3) {
           gamma_ramp_rw_component_ = 0;
+          reg::DC_LUT_RW_INDEX new_gamma_ramp_rw_index = gamma_ramp_rw_index;
           // TODO(Triang3l): Should this increase beyond 7 bits for PWL?
           // Direct3D 9 explicitly sets rw_index to 0x80 after writing the last
           // PWL entry. However, the DC_LUT_RW_INDEX documentation says that for
           // PWL, the bit 7 is ignored.
-          gamma_ramp_rw_index.rw_index =
+          new_gamma_ramp_rw_index.rw_index =
               (gamma_ramp_rw_index.rw_index & ~UINT32_C(0x7F)) |
               ((gamma_ramp_rw_index_pwl + 1) & 0x7F);
+          WriteRegister(
+              XE_GPU_REG_DC_LUT_RW_INDEX,
+              xe::memory::Reinterpret<uint32_t>(new_gamma_ramp_rw_index));
         }
         if (write_gamma_ramp_component) {
           OnGammaRampPWLValueWritten();
@@ -549,10 +557,10 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
 
       case XE_GPU_REG_DC_LUT_30_COLOR: {
         // Should be in the 256-entry table writing mode.
-        assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
-        auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
+        assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
+        auto gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
         uint32_t gamma_ramp_write_enable_mask =
-            regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & 0b111;
+            regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & 0b111;
         if (gamma_ramp_write_enable_mask) {
           reg::DC_LUT_30_COLOR& gamma_ramp_entry =
               gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index];
@@ -567,11 +575,16 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
             gamma_ramp_entry.color_10_red = gamma_ramp_value.color_10_red;
           }
         }
-        ++gamma_ramp_rw_index.rw_index;
         // TODO(Triang3l): Should this reset the component write index? If this
         // increase is assumed to behave like a full DC_LUT_RW_INDEX write, it
-        // probably should.
+        // probably should. Currently this also calls WriteRegister for
+        // DC_LUT_RW_INDEX, which resets gamma_ramp_rw_component_ as well.
         gamma_ramp_rw_component_ = 0;
+        reg::DC_LUT_RW_INDEX new_gamma_ramp_rw_index = gamma_ramp_rw_index;
+        ++new_gamma_ramp_rw_index.rw_index;
+        WriteRegister(
+            XE_GPU_REG_DC_LUT_RW_INDEX,
+            xe::memory::Reinterpret<uint32_t>(new_gamma_ramp_rw_index));
         if (gamma_ramp_write_enable_mask) {
           OnGammaRamp256EntryTableValueWritten();
         }
@@ -583,7 +596,7 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
   // chrispy: rearrange check order, place set after checks
 
   if (XE_LIKELY(index < RegisterFile::kRegisterCount)) {
-    register_file_->values[index].u32 = value;
+    register_file_->values[index] = value;
 
     // quick pre-test
     // todo: figure out just how unlikely this is. if very (it ought to be,
@@ -708,10 +721,11 @@ void CommandProcessor::MakeCoherent() {
   // https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf
   // https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454
 
-  RegisterFile* regs = register_file_;
-  auto& status_host = regs->Get<reg::COHER_STATUS_HOST>();
-  auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32;
-  auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32;
+  volatile uint32_t* regs_volatile = register_file_->values;
+  auto status_host = xe::memory::Reinterpret<reg::COHER_STATUS_HOST>(
+      uint32_t(regs_volatile[XE_GPU_REG_COHER_STATUS_HOST]));
+  uint32_t base_host = regs_volatile[XE_GPU_REG_COHER_BASE_HOST];
+  uint32_t size_host = regs_volatile[XE_GPU_REG_COHER_SIZE_HOST];
 
   if (!status_host.status) {
     return;
@@ -731,7 +745,7 @@ void CommandProcessor::MakeCoherent() {
          base_host + size_host, size_host, action);
 
   // Mark coherent.
-  status_host.status = 0;
+  regs_volatile[XE_GPU_REG_COHER_STATUS_HOST] = 0;
 }
 
 void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); }
@@ -752,4 +766,4 @@ void CommandProcessor::InitializeTrace() {
 #define COMMAND_PROCESSOR CommandProcessor
 #include "pm4_command_processor_implement.h"
 }  // namespace gpu
-}  // namespace xe
+}  // namespace xe
\ No newline at end of file
diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc
index 1830cd0c1..5c2787bf3 100644
--- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc
+++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc
@@ -1768,7 +1768,7 @@ void D3D12CommandProcessor::WriteRegisterForceinline(uint32_t index,
   __m128i is_above_lower = _mm_cmpgt_epi16(to_rangecheck, lower_bounds);
   __m128i is_below_upper = _mm_cmplt_epi16(to_rangecheck, upper_bounds);
   __m128i is_within_range = _mm_and_si128(is_above_lower, is_below_upper);
-  register_file_->values[index].u32 = value;
+  register_file_->values[index] = value;
 
   uint32_t movmask = static_cast<uint32_t>(_mm_movemask_epi8(is_within_range));
 
@@ -2047,7 +2047,7 @@ void D3D12CommandProcessor::WritePossiblySpecialRegistersFromMem(
   for (uint32_t index = start_index; index < end; ++index, ++base) {
     uint32_t value = xe::load_and_swap<uint32_t>(base);
 
-    register_file_->values[index].u32 = value;
+    register_file_->values[index] = value;
 
     unsigned expr = 0;
 
@@ -2780,8 +2780,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
       while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) {
         vfetch_bits_remaining = xe::clear_lowest_bit(vfetch_bits_remaining);
         uint32_t vfetch_index = i * 32 + j;
-        const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
-            XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
+        xenos::xe_gpu_vertex_fetch_t vfetch_constant =
+            regs.GetVertexFetch(vfetch_index);
         switch (vfetch_constant.type) {
           case xenos::FetchConstantType::kVertex:
             break;
@@ -3554,10 +3554,10 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
 
     // Blend factor.
     float blend_factor[] = {
-        regs[XE_GPU_REG_RB_BLEND_RED].f32,
-        regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
-        regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
-        regs[XE_GPU_REG_RB_BLEND_ALPHA].f32,
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA),
     };
     // std::memcmp instead of != so in case of NaN, every draw won't be
     // invalidating it.
@@ -3599,7 +3599,7 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
   auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
   auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
   auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
-  float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
+  auto rb_alpha_ref = regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF);
   auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
   auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
   auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
@@ -3753,10 +3753,10 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
 
   // Tessellation factor range, plus 1.0 according to the images in
   // https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360
-  float tessellation_factor_min =
-      regs[XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL].f32 + 1.0f;
-  float tessellation_factor_max =
-      regs[XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL].f32 + 1.0f;
+  auto tessellation_factor_min =
+      regs.Get<float>(XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL) + 1.0f;
+  auto tessellation_factor_max =
+      regs.Get<float>(XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL) + 1.0f;
 
   update_dirty_floatmask(system_constants_.tessellation_factor_range_min,
                          tessellation_factor_min);
@@ -3804,12 +3804,12 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
                                 &user_clip_plane_index)) {
       user_clip_planes_remaining =
           xe::clear_lowest_bit(user_clip_planes_remaining);
-      const float* user_clip_plane =
-          &regs[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4].f32;
-      if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane,
+      const void* user_clip_plane_regs =
+          &regs[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4];
+      if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane_regs,
                       4 * sizeof(float))) {
         dirty = true;
-        std::memcpy(user_clip_plane_write_ptr, user_clip_plane,
+        std::memcpy(user_clip_plane_write_ptr, user_clip_plane_regs,
                     4 * sizeof(float));
       }
       user_clip_plane_write_ptr += 4;
@@ -3974,9 +3974,8 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
         color_exp_bias -= 5;
       }
     }
-    float color_exp_bias_scale;
-    *reinterpret_cast<int32_t*>(&color_exp_bias_scale) =
-        0x3F800000 + (color_exp_bias << 23);
+    auto color_exp_bias_scale = xe::memory::Reinterpret<float>(
+        int32_t(0x3F800000 + (color_exp_bias << 23)));
 
     update_dirty_floatmask(system_constants_.color_exp_bias[i],
                            color_exp_bias_scale);
@@ -4028,7 +4027,7 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
 
 #endif
         uint32_t blend_factors_ops =
-            regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
+            regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF;
 
         update_dirty_uint32_cmp(system_constants_.edram_rt_blend_factors_ops[i],
                                 blend_factors_ops);
@@ -4060,22 +4059,22 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
     if (primitive_polygonal) {
       if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
         poly_offset_front_scale =
-            regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
+            regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
         poly_offset_front_offset =
-            regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
+            regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
       }
       if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
         poly_offset_back_scale =
-            regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
+            regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
         poly_offset_back_offset =
-            regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
+            regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
       }
     } else {
       if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
         poly_offset_front_scale =
-            regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
+            regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
         poly_offset_front_offset =
-            regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
+            regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
         poly_offset_back_scale = poly_offset_front_scale;
         poly_offset_back_offset = poly_offset_front_offset;
       }
@@ -4153,26 +4152,26 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
       }
     }
     update_dirty_floatmask(system_constants_.edram_blend_constant[0],
-                           regs[XE_GPU_REG_RB_BLEND_RED].f32);
+                           regs.Get<float>(XE_GPU_REG_RB_BLEND_RED));
 
     system_constants_.edram_blend_constant[0] =
-        regs[XE_GPU_REG_RB_BLEND_RED].f32;
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
 
     update_dirty_floatmask(system_constants_.edram_blend_constant[1],
-                           regs[XE_GPU_REG_RB_BLEND_GREEN].f32);
+                           regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN));
 
     system_constants_.edram_blend_constant[1] =
-        regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
     update_dirty_floatmask(system_constants_.edram_blend_constant[2],
-                           regs[XE_GPU_REG_RB_BLEND_BLUE].f32);
+                           regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE));
 
     system_constants_.edram_blend_constant[2] =
-        regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
     update_dirty_floatmask(system_constants_.edram_blend_constant[3],
-                           regs[XE_GPU_REG_RB_BLEND_ALPHA].f32);
+                           regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA));
 
     system_constants_.edram_blend_constant[3] =
-        regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
   }
   dirty |= ArchFloatMaskSignbit(dirty_float_mask);
 
@@ -4266,10 +4265,10 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
   // These are the constant base addresses/ranges for shaders.
   // We have these hardcoded right now cause nothing seems to differ on the Xbox
   // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
-  assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
-              regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
-  assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
-              regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
+  assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 ||
+              regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000);
+  assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 ||
+              regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000);
   // Check if the float constant layout is still the same and get the counts.
   const Shader::ConstantRegisterMap& float_constant_map_vertex =
       vertex_shader->constant_register_map();
@@ -4344,8 +4343,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
             xe::clear_lowest_bit(float_constant_map_entry);
         std::memcpy(float_constants,
                     &regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
-                          (float_constant_index << 2)]
-                         .f32,
+                          (float_constant_index << 2)],
                     4 * sizeof(float));
         float_constants += 4 * sizeof(float);
       }
@@ -4376,8 +4374,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
               xe::clear_lowest_bit(float_constant_map_entry);
           std::memcpy(float_constants,
                       &regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
-                            (float_constant_index << 2)]
-                           .f32,
+                            (float_constant_index << 2)],
                       4 * sizeof(float));
           float_constants += 4 * sizeof(float);
         }
@@ -4397,8 +4394,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
       return false;
     }
     xe::smallcpy_const<kBoolLoopConstantsSize>(
-        bool_loop_constants,
-        &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32);
+        bool_loop_constants, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031]);
 
     cbuffer_binding_bool_loop_.up_to_date = true;
     current_graphics_root_up_to_date_ &=
@@ -4414,7 +4410,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
       return false;
     }
     xe::smallcpy_const<kFetchConstantsSize>(
-        fetch_constants, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32);
+        fetch_constants, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]);
 
     cbuffer_binding_fetch_.up_to_date = true;
     current_graphics_root_up_to_date_ &=
@@ -5152,4 +5148,4 @@ void D3D12CommandProcessor::WriteGammaRampSRV(
 #undef COMMAND_PROCESSOR
 }  // namespace d3d12
 }  // namespace gpu
-}  // namespace xe
+}  // namespace xe
\ No newline at end of file
diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc
index f4770e9fa..ac63881a7 100644
--- a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc
+++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc
@@ -679,8 +679,8 @@ void D3D12TextureCache::PrefetchSamplerParameters(
 D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters(
     const D3D12Shader::SamplerBinding& binding) const {
   const auto& regs = register_file();
-  const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
-      XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6);
+  xenos::xe_gpu_texture_fetch_t fetch =
+      regs.GetTextureFetch(binding.fetch_constant);
 
   SamplerParameters parameters;
 
@@ -1160,8 +1160,7 @@ ID3D12Resource* D3D12TextureCache::RequestSwapTexture(
     D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out,
     xenos::TextureFormat& format_out) {
   const auto& regs = register_file();
-  const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
-      XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
+  xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
   TextureKey key;
   BindingInfoFromFetchConstant(fetch, key, nullptr);
   if (!key.is_valid || key.base_page == 0 ||
diff --git a/src/xenia/gpu/draw_extent_estimator.cc b/src/xenia/gpu/draw_extent_estimator.cc
index 31e94dcbb..86e528639 100644
--- a/src/xenia/gpu/draw_extent_estimator.cc
+++ b/src/xenia/gpu/draw_extent_estimator.cc
@@ -15,6 +15,7 @@
 
 #include "xenia/base/assert.h"
 #include "xenia/base/cvar.h"
+#include "xenia/base/memory.h"
 #include "xenia/base/profiling.h"
 #include "xenia/gpu/registers.h"
 #include "xenia/gpu/ucode.h"
@@ -67,7 +68,7 @@ void DrawExtentEstimator::PositionYExportSink::Export(
       point_size_ = value[0];
     }
     if (value_mask & 0b0100) {
-      vertex_kill_ = *reinterpret_cast<const uint32_t*>(&value[2]);
+      vertex_kill_ = xe::memory::Reinterpret<uint32_t>(value[2]);
     }
   }
 }
@@ -110,7 +111,7 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
   xenos::Endian index_endian = vgt_dma_size.swap_mode;
   if (vgt_draw_initiator.source_select == xenos::SourceSelect::kDMA) {
     xenos::IndexFormat index_format = vgt_draw_initiator.index_size;
-    uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32;
+    uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE];
     uint32_t index_buffer_read_count =
         std::min(uint32_t(vgt_draw_initiator.num_indices),
                  uint32_t(vgt_dma_size.num_words));
@@ -145,21 +146,22 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
 
   auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
   float viewport_y_scale = pa_cl_vte_cntl.vport_y_scale_ena
-                               ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
+                               ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
                                : 1.0f;
-  float viewport_y_offset = pa_cl_vte_cntl.vport_y_offset_ena
-                                ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
-                                : 0.0f;
+  float viewport_y_offset =
+      pa_cl_vte_cntl.vport_y_offset_ena
+          ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
+          : 0.0f;
 
   int32_t point_vertex_min_diameter_float = 0;
   int32_t point_vertex_max_diameter_float = 0;
   float point_constant_radius_y = 0.0f;
   if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) {
     auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
-    *reinterpret_cast<float*>(&point_vertex_min_diameter_float) =
-        float(pa_su_point_minmax.min_size) * (2.0f / 16.0f);
-    *reinterpret_cast<float*>(&point_vertex_max_diameter_float) =
-        float(pa_su_point_minmax.max_size) * (2.0f / 16.0f);
+    point_vertex_min_diameter_float = xe::memory::Reinterpret<int32_t>(
+        float(pa_su_point_minmax.min_size) * (2.0f / 16.0f));
+    point_vertex_max_diameter_float = xe::memory::Reinterpret<int32_t>(
+        float(pa_su_point_minmax.max_size) * (2.0f / 16.0f));
     point_constant_radius_y =
         float(regs.Get<reg::PA_SU_POINT_SIZE>().height) * (1.0f / 16.0f);
   }
@@ -224,12 +226,13 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
         // Vertex-specified diameter. Clamped effectively as a signed integer in
         // the hardware, -NaN, -Infinity ... -0 to the minimum, +Infinity, +NaN
         // to the maximum.
-        point_radius_y = position_y_export_sink.point_size().value();
-        *reinterpret_cast<int32_t*>(&point_radius_y) = std::min(
-            point_vertex_max_diameter_float,
-            std::max(point_vertex_min_diameter_float,
-                     *reinterpret_cast<const int32_t*>(&point_radius_y)));
-        point_radius_y *= 0.5f;
+        point_radius_y =
+            0.5f *
+            xe::memory::Reinterpret<float>(std::min(
+                point_vertex_max_diameter_float,
+                std::max(point_vertex_min_diameter_float,
+                         xe::memory::Reinterpret<int32_t>(
+                             position_y_export_sink.point_size().value()))));
       } else {
         // Constant radius.
         point_radius_y = point_constant_radius_y;
@@ -329,7 +332,7 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
 
     float window_y_offset_f = float(window_y_offset);
 
-    float yoffset = regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32;
+    float yoffset = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET);
 
     // First calculate all the integer.0 or integer.5 offsetting exactly at full
     // precision.
@@ -347,11 +350,10 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
       sm3 = yoffset;
     }
     sm4 = pa_cl_vte_cntl.vport_y_scale_ena
-              ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32)
+              ? std::abs(regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE))
               : 1.0f;
 
     viewport_bottom = sm1 + sm2 + sm3 + sm4;
-
     // Using floor, or, rather, truncation (because maxing with zero anyway)
     // similar to how viewport scissoring behaves on real AMD, Intel and Nvidia
     // GPUs on Direct3D 12 (but not WARP), also like in
@@ -366,4 +368,4 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
 }
 
 }  // namespace gpu
-}  // namespace xe
+}  // namespace xe
\ No newline at end of file
diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc
index e3cd3acc9..97e6807b4 100644
--- a/src/xenia/gpu/draw_util.cc
+++ b/src/xenia/gpu/draw_util.cc
@@ -9,8 +9,6 @@
 
 #include "xenia/gpu/draw_util.h"
 
-#include <cstring>
-
 #include "xenia/base/cvar.h"
 #include "xenia/base/logging.h"
 #include "xenia/base/math.h"
@@ -93,22 +91,21 @@ void GetPreferredFacePolygonOffset(const RegisterFile& regs,
     // ones that are rendered (except for shadow volumes).
     if (pa_su_sc_mode_cntl.poly_offset_front_enable &&
         !pa_su_sc_mode_cntl.cull_front) {
-      scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
-      offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
-
+      scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
+      offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
       scale = roundToNearestOrderOfMagnitude(scale);
     }
     if (pa_su_sc_mode_cntl.poly_offset_back_enable &&
         !pa_su_sc_mode_cntl.cull_back && !scale && !offset) {
-      scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
-      offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
+      scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
+      offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
     }
   } else {
     // Non-triangle primitives use the front offset, but it's toggled via
     // poly_offset_para_enable.
     if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
-      scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
-      offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
+      scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
+      offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
     }
   }
   scale_out = scale;
@@ -143,7 +140,7 @@ bool IsPixelShaderNeededWithRasterization(const Shader& shader,
   }
 
   // Check if a color target is actually written.
-  uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
+  uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
   uint32_t rts_remaining = shader.writes_color_targets();
   uint32_t rt_index;
   while (xe::bit_scan_forward(rts_remaining, &rt_index)) {
@@ -306,7 +303,6 @@ void GetHostViewportInfo(GetViewportInfoArgs* XE_RESTRICT args,
 
   // Obtain the original viewport values in a normalized way.
   float scale_xy[] = {
-
       pa_cl_vte_cntl.vport_x_scale_ena ? args->PA_CL_VPORT_XSCALE : 1.0f,
       pa_cl_vte_cntl.vport_y_scale_ena ? args->PA_CL_VPORT_YSCALE : 1.0f,
   };
@@ -392,16 +388,11 @@ void GetHostViewportInfo(GetViewportInfoArgs* XE_RESTRICT args,
       float offset_axis = offset_base_xy[i] + offset_add_xy[i];
       float scale_axis = scale_xy[i];
       float scale_axis_abs = std::abs(scale_xy[i]);
-      float axis_0 = offset_axis - scale_axis_abs;
-      float axis_1 = offset_axis + scale_axis_abs;
       float axis_max_unscaled_float = float(xy_max_unscaled[i]);
-      // max(0.0f, xy) drops NaN and < 0 - max picks the first argument in the
-      // !(a < b) case (always for NaN), min as float (axis_max_unscaled_float
-      // is well below 2^24) to safely drop very large values.
-      uint32_t axis_0_int =
-          uint32_t(std::min(axis_max_unscaled_float, std::max(0.0f, axis_0)));
-      uint32_t axis_1_int =
-          uint32_t(std::min(axis_max_unscaled_float, std::max(0.0f, axis_1)));
+      uint32_t axis_0_int = uint32_t(xe::clamp_float(
+          offset_axis - scale_axis_abs, 0.0f, axis_max_unscaled_float));
+      uint32_t axis_1_int = uint32_t(xe::clamp_float(
+          offset_axis + scale_axis_abs, 0.0f, axis_max_unscaled_float));
       uint32_t axis_extent_int = axis_1_int - axis_0_int;
       viewport_info_out.xy_offset[i] = axis_0_int * axis_resolution_scale;
       viewport_info_out.xy_extent[i] = axis_extent_int * axis_resolution_scale;
@@ -507,8 +498,8 @@ void GetHostViewportInfo(GetViewportInfoArgs* XE_RESTRICT args,
       // extension. But cases when this really matters are yet to be found -
       // trying to fix this will result in more correct depth values, but
       // incorrect clipping.
-      z_min = xe::saturate_unsigned(host_clip_offset_z);
-      z_max = xe::saturate_unsigned(host_clip_offset_z + host_clip_scale_z);
+      z_min = xe::saturate(host_clip_offset_z);
+      z_max = xe::saturate(host_clip_offset_z + host_clip_scale_z);
       // Direct3D 12 doesn't allow reverse depth range - on some drivers it
       // works, on some drivers it doesn't, actually, but it was never
       // explicitly allowed by the specification.
@@ -730,7 +721,7 @@ uint32_t GetNormalizedColorMask(const RegisterFile& regs,
     return 0;
   }
   uint32_t normalized_color_mask = 0;
-  uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
+  uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
   for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
     // Exclude the render targets not statically written to by the pixel shader.
     // If the shader doesn't write to a render target, it shouldn't be written
@@ -776,10 +767,16 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader,
                                       ? regs.Get<reg::SQ_VS_CONST>().base
                                       : regs.Get<reg::SQ_PS_CONST>().base;
   for (uint32_t constant_index : shader.memexport_stream_constants()) {
-    const auto& stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
-        XE_GPU_REG_SHADER_CONSTANT_000_X +
-        (float_constants_base + constant_index) * 4);
-    if (!stream.index_count) {
+    xenos::xe_gpu_memexport_stream_t stream =
+        regs.GetMemExportStream(float_constants_base + constant_index);
+    // Safety checks for stream constants potentially not set up if the export
+    // isn't done on the control flow path taken by the shader (not checking the
+    // Y component because the index is more likely to be constructed
+    // arbitrarily).
+    // The hardware validates the upper bits of eA according to the
+    // IPR2015-00325 sequencer specification.
+    if (stream.const_0x1 != 0x1 || stream.const_0x4b0 != 0x4B0 ||
+        stream.const_0x96 != 0x96 || !stream.index_count) {
       continue;
     }
     const FormatInfo& format_info =
@@ -821,7 +818,7 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader,
     }
     // Add a new range if haven't expanded an existing one.
     if (!range_reused) {
-      ranges_out.emplace_back(stream.base_address, stream_size_bytes);
+      ranges_out.emplace_back(uint32_t(stream.base_address), stream_size_bytes);
     }
   }
 }
@@ -943,8 +940,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
   // Get the extent of pixels covered by the resolve rectangle, according to the
   // top-left rasterization rule.
   // D3D9 HACK: Vertices to use are always in vf0, and are written by the CPU.
-  auto fetch = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
-      XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
+  xenos::xe_gpu_vertex_fetch_t fetch = regs.GetVertexFetch(0);
   if (fetch.type != xenos::FetchConstantType::kVertex || fetch.size != 3 * 2) {
     XELOGE("Unsupported resolve vertex buffer format");
     assert_always();
@@ -997,10 +993,10 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
   GetScissor(regs, scissor, false);
   int32_t scissor_right = int32_t(scissor.offset[0] + scissor.extent[0]);
   int32_t scissor_bottom = int32_t(scissor.offset[1] + scissor.extent[1]);
-  x0 = xe::clamp(x0, int32_t(scissor.offset[0]), scissor_right);
-  y0 = xe::clamp(y0, int32_t(scissor.offset[1]), scissor_bottom);
-  x1 = xe::clamp(x1, int32_t(scissor.offset[0]), scissor_right);
-  y1 = xe::clamp(y1, int32_t(scissor.offset[1]), scissor_bottom);
+  x0 = std::clamp(x0, int32_t(scissor.offset[0]), scissor_right);
+  y0 = std::clamp(y0, int32_t(scissor.offset[1]), scissor_bottom);
+  x1 = std::clamp(x1, int32_t(scissor.offset[0]), scissor_right);
+  y1 = std::clamp(y1, int32_t(scissor.offset[1]), scissor_bottom);
 
   assert_true(x0 <= x1 && y0 <= y1);
 
@@ -1114,7 +1110,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
   }
 
   // Calculate the destination memory extent.
-  uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
+  uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
   uint32_t copy_dest_base_adjusted = rb_copy_dest_base;
   uint32_t copy_dest_extent_start, copy_dest_extent_end;
   auto rb_copy_dest_pitch = regs.Get<reg::RB_COPY_DEST_PITCH>();
@@ -1284,9 +1280,10 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
     info_out.copy_dest_info.copy_dest_swap = false;
   }
 
-  info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
-  info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
-  info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32;
+  info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR];
+  info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR];
+  info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO];
+
 #if 0
   XELOGD(
       "Resolve: {},{} <= x,y < {},{}, {} -> {} at 0x{:08X} (potentially "
@@ -1377,4 +1374,4 @@ ResolveCopyShaderIndex ResolveInfo::GetCopyShader(
 
 }  // namespace draw_util
 }  // namespace gpu
-}  // namespace xe
+}  // namespace xe
\ No newline at end of file
diff --git a/src/xenia/gpu/draw_util.h b/src/xenia/gpu/draw_util.h
index 08c710e6c..131d174e8 100644
--- a/src/xenia/gpu/draw_util.h
+++ b/src/xenia/gpu/draw_util.h
@@ -373,12 +373,12 @@ struct GetViewportInfoArgs {
     pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
     pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
     pa_su_vtx_cntl = regs.Get<reg::PA_SU_VTX_CNTL>();
-    PA_CL_VPORT_XSCALE = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32;
-    PA_CL_VPORT_YSCALE = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32;
-    PA_CL_VPORT_ZSCALE = regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32;
-    PA_CL_VPORT_XOFFSET = regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32;
-    PA_CL_VPORT_YOFFSET = regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32;
-    PA_CL_VPORT_ZOFFSET = regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32;
+    PA_CL_VPORT_XSCALE = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XSCALE);
+    PA_CL_VPORT_YSCALE = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE);
+    PA_CL_VPORT_ZSCALE = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZSCALE);
+    PA_CL_VPORT_XOFFSET = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XOFFSET);
+    PA_CL_VPORT_YOFFSET = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET);
+    PA_CL_VPORT_ZOFFSET = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZOFFSET);
     pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>();
     depth_format = regs.Get<reg::RB_DEPTH_INFO>().depth_format;
   }
@@ -767,4 +767,4 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
 }  // namespace gpu
 }  // namespace xe
 
-#endif  // XENIA_GPU_DRAW_UTIL_H_
+#endif  // XENIA_GPU_DRAW_UTIL_H_
\ No newline at end of file
diff --git a/src/xenia/gpu/dxbc.h b/src/xenia/gpu/dxbc.h
index e1587a7a5..ca3dfd1e3 100644
--- a/src/xenia/gpu/dxbc.h
+++ b/src/xenia/gpu/dxbc.h
@@ -17,6 +17,7 @@
 
 #include "xenia/base/assert.h"
 #include "xenia/base/math.h"
+#include "xenia/base/memory.h"
 
 namespace xe {
 namespace gpu {
@@ -1103,10 +1104,10 @@ struct Src : OperandAddress {
   }
   static Src LI(int32_t x) { return LI(x, x, x, x); }
   static Src LF(float x, float y, float z, float w) {
-    return LU(*reinterpret_cast<const uint32_t*>(&x),
-              *reinterpret_cast<const uint32_t*>(&y),
-              *reinterpret_cast<const uint32_t*>(&z),
-              *reinterpret_cast<const uint32_t*>(&w));
+    return LU(xe::memory::Reinterpret<uint32_t>(x),
+              xe::memory::Reinterpret<uint32_t>(y),
+              xe::memory::Reinterpret<uint32_t>(z),
+              xe::memory::Reinterpret<uint32_t>(w));
   }
   static Src LF(float x) { return LF(x, x, x, x); }
   static Src LP(const uint32_t* xyzw) {
@@ -1223,12 +1224,10 @@ struct Src : OperandAddress {
                                                  bool negate) {
     if (is_integer) {
       if (absolute) {
-        *reinterpret_cast<int32_t*>(&value) =
-            std::abs(*reinterpret_cast<const int32_t*>(&value));
+        value = uint32_t(std::abs(int32_t(value)));
       }
       if (negate) {
-        *reinterpret_cast<int32_t*>(&value) =
-            -*reinterpret_cast<const int32_t*>(&value);
+        value = uint32_t(-int32_t(value));
       }
     } else {
       if (absolute) {
diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc
index 5a9d8ac64..0f1ec6f8d 100644
--- a/src/xenia/gpu/graphics_system.cc
+++ b/src/xenia/gpu/graphics_system.cc
@@ -258,7 +258,7 @@ uint32_t GraphicsSystem::ReadRegister(uint32_t addr) {
   }
 
   assert_true(r < RegisterFile::kRegisterCount);
-  return register_file()->values[r].u32;
+  return register_file()->values[r];
 }
 
 void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
@@ -276,7 +276,7 @@ void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
   }
 
   assert_true(r < RegisterFile::kRegisterCount);
-  this->register_file()->values[r].u32 = value;
+  this->register_file()->values[r] = value;
 }
 
 void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t size_log2) {
@@ -379,4 +379,4 @@ bool GraphicsSystem::Restore(ByteStream* stream) {
 }
 
 }  // namespace gpu
-}  // namespace xe
+}  // namespace xe
\ No newline at end of file
diff --git a/src/xenia/gpu/packet_disassembler.h b/src/xenia/gpu/packet_disassembler.h
index 8aa60a4a2..26aa91371 100644
--- a/src/xenia/gpu/packet_disassembler.h
+++ b/src/xenia/gpu/packet_disassembler.h
@@ -67,7 +67,7 @@ struct PacketAction {
   union {
     struct {
       uint32_t index;
-      RegisterFile::RegisterValue value;
+      uint32_t value;
     } register_write;
     struct {
       uint64_t value;
@@ -194,7 +194,7 @@ struct PacketAction {
     PacketAction action;
     action.type = Type::kRegisterWrite;
     action.register_write.index = index;
-    action.register_write.value.u32 = value;
+    action.register_write.value = value;
     return action;
   }
 
diff --git a/src/xenia/gpu/pm4_command_processor_implement.h b/src/xenia/gpu/pm4_command_processor_implement.h
index 3ec3e71b7..739599dde 100644
--- a/src/xenia/gpu/pm4_command_processor_implement.h
+++ b/src/xenia/gpu/pm4_command_processor_implement.h
@@ -706,23 +706,27 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_WAIT_REG_MEM(
   uint32_t ref = reader_.ReadAndSwap<uint32_t>();
   uint32_t mask = reader_.ReadAndSwap<uint32_t>();
   uint32_t wait = reader_.ReadAndSwap<uint32_t>();
+
+  bool is_memory = (wait_info & 0x10) != 0;
+  assert_true(is_memory || poll_reg_addr < RegisterFile::kRegisterCount);
+  const volatile uint32_t& value_ref =
+      is_memory ? *reinterpret_cast<uint32_t*>(memory_->TranslatePhysical(
+                      poll_reg_addr & ~uint32_t(0x3)))
+                : register_file_->values[poll_reg_addr];
+
   bool matched = false;
+
   do {
-    uint32_t value;
-    if (wait_info & 0x10) {
-      // Memory.
-      auto endianness = static_cast<xenos::Endian>(poll_reg_addr & 0x3);
-      poll_reg_addr &= ~0x3;
-      value = xe::load<uint32_t>(memory_->TranslatePhysical(poll_reg_addr));
-      value = GpuSwap(value, endianness);
-      trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4);
+    uint32_t value = value_ref;
+    if (is_memory) {
+      trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr & ~uint32_t(0x3)),
+                                    sizeof(uint32_t));
+      value = xenos::GpuSwap(value,
+                             static_cast<xenos::Endian>(poll_reg_addr & 0x3));
     } else {
-      // Register.
-      assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
-      value = register_file_->values[poll_reg_addr].u32;
       if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) {
         MakeCoherent();
-        value = register_file_->values[poll_reg_addr].u32;
+        value = value_ref;
       }
     }
     matched = MatchValueAndRef(value & mask, ref, wait_info);
@@ -758,17 +762,17 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_REG_RMW(uint32_t packet,
   uint32_t rmw_info = reader_.ReadAndSwap<uint32_t>();
   uint32_t and_mask = reader_.ReadAndSwap<uint32_t>();
   uint32_t or_mask = reader_.ReadAndSwap<uint32_t>();
-  uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32;
+  uint32_t value = register_file_->values[rmw_info & 0x1FFF];
   if ((rmw_info >> 31) & 0x1) {
     // & reg
-    value &= register_file_->values[and_mask & 0x1FFF].u32;
+    value &= register_file_->values[and_mask & 0x1FFF];
   } else {
     // & imm
     value &= and_mask;
   }
   if ((rmw_info >> 30) & 0x1) {
     // | reg
-    value |= register_file_->values[or_mask & 0x1FFF].u32;
+    value |= register_file_->values[or_mask & 0x1FFF];
   } else {
     // | imm
     value |= or_mask;
@@ -788,7 +792,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_REG_TO_MEM(
   uint32_t reg_val;
 
   assert_true(reg_addr < RegisterFile::kRegisterCount);
-  reg_val = register_file_->values[reg_addr].u32;
+  reg_val = register_file_->values[reg_addr];
 
   auto endianness = static_cast<xenos::Endian>(mem_addr & 0x3);
   mem_addr &= ~0x3;
@@ -836,7 +840,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_COND_WRITE(
   } else {
     // Register.
     assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
-    value = register_file_->values[poll_reg_addr].u32;
+    value = register_file_->values[poll_reg_addr];
   }
   bool matched = MatchValueAndRef(value & mask, ref, wait_info);
 
@@ -858,7 +862,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_COND_WRITE(
 }
 XE_FORCEINLINE
 void COMMAND_PROCESSOR::WriteEventInitiator(uint32_t value) XE_RESTRICT {
-  register_file_->values[XE_GPU_REG_VGT_EVENT_INITIATOR].u32 = value;
+  register_file_->values[XE_GPU_REG_VGT_EVENT_INITIATOR] = value;
 }
 bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE(
     uint32_t packet, uint32_t count) XE_RESTRICT {
@@ -898,10 +902,8 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE_SHD(
   data_value = GpuSwap(data_value, endianness);
   uint8_t* write_destination = memory_->TranslatePhysical(address);
   if (address > 0x1FFFFFFF) {
-    uint32_t writeback_base =
-        register_file_->values[XE_GPU_REG_WRITEBACK_BASE].u32;
-    uint32_t writeback_size =
-        register_file_->values[XE_GPU_REG_WRITEBACK_SIZE].u32;
+    uint32_t writeback_base = register_file_->values[XE_GPU_REG_WRITEBACK_BASE];
+    uint32_t writeback_size = register_file_->values[XE_GPU_REG_WRITEBACK_SIZE];
     uint32_t writeback_offset = address - writeback_base;
     // check whether the guest has written writeback base. if they haven't, skip
     // the offset check
@@ -967,7 +969,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE_ZPD(
   if (fake_sample_count >= 0) {
     auto* pSampleCounts =
         memory_->TranslatePhysical<xe_gpu_depth_sample_counts*>(
-            register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR].u32);
+            register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR]);
     // 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END
     // and used to detect a finished query.
     bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished &&
@@ -1003,7 +1005,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3Draw(
   vgt_draw_initiator.value = reader_.ReadAndSwap<uint32_t>();
   --count_remaining;
 
-  register_file_->values[XE_GPU_REG_VGT_DRAW_INITIATOR].u32 =
+  register_file_->values[XE_GPU_REG_VGT_DRAW_INITIATOR] =
       vgt_draw_initiator.value;
   bool draw_succeeded = true;
   // TODO(Triang3l): Remove IndexBufferInfo and replace handling of all this
@@ -1025,7 +1027,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3Draw(
       }
       uint32_t vgt_dma_base = reader_.ReadAndSwap<uint32_t>();
       --count_remaining;
-      register_file_->values[XE_GPU_REG_VGT_DMA_BASE].u32 = vgt_dma_base;
+      register_file_->values[XE_GPU_REG_VGT_DMA_BASE] = vgt_dma_base;
       reg::VGT_DMA_SIZE vgt_dma_size;
       assert_not_zero(count_remaining);
       if (!count_remaining) {
@@ -1034,7 +1036,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3Draw(
       }
       vgt_dma_size.value = reader_.ReadAndSwap<uint32_t>();
       --count_remaining;
-      register_file_->values[XE_GPU_REG_VGT_DMA_SIZE].u32 = vgt_dma_size.value;
+      register_file_->values[XE_GPU_REG_VGT_DMA_SIZE] = vgt_dma_size.value;
 
       uint32_t index_size_bytes =
           vgt_draw_initiator.index_size == xenos::IndexFormat::kInt16
@@ -1341,10 +1343,10 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_VIZ_QUERY(
     // The scan converter writes the internal result back to the register here.
     // We just fake it and say it was visible in case it is read back.
     if (id < 32) {
-      register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0].u32 |=
-          uint32_t(1) << id;
+      register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0] |= uint32_t(1)
+                                                                     << id;
     } else {
-      register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1].u32 |=
+      register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1] |=
           uint32_t(1) << (id - 32);
     }
   }
@@ -1423,4 +1425,4 @@ void COMMAND_PROCESSOR::ExecutePacket(uint32_t ptr, uint32_t count) {
     }
   } while (reader_.read_count());
   reader_ = old_reader;
-}
+}
\ No newline at end of file
diff --git a/src/xenia/gpu/primitive_processor.cc b/src/xenia/gpu/primitive_processor.cc
index 827fb7b4e..9e20be2c4 100644
--- a/src/xenia/gpu/primitive_processor.cc
+++ b/src/xenia/gpu/primitive_processor.cc
@@ -498,8 +498,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
       uint32_t index_size_log2 =
           guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
       // The base should already be aligned, but aligning here too for safety.
-      guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 &
-                         ~uint32_t((1 << index_size_log2) - 1);
+      guest_index_base =
+          regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1);
       guest_index_buffer_needed_bytes = guest_draw_vertex_count
                                         << index_size_log2;
       if (guest_index_base > SharedMemory::kBufferSize ||
@@ -652,8 +652,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
     uint32_t index_size_log2 =
         guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
     // The base should already be aligned, but aligning here too for safety.
-    guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 &
-                       ~uint32_t((1 << index_size_log2) - 1);
+    guest_index_base =
+        regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1);
     guest_index_buffer_needed_bytes = guest_draw_vertex_count
                                       << index_size_log2;
     if (guest_index_base > SharedMemory::kBufferSize ||
diff --git a/src/xenia/gpu/register_file.h b/src/xenia/gpu/register_file.h
index 11eebd8c5..8128bdcc9 100644
--- a/src/xenia/gpu/register_file.h
+++ b/src/xenia/gpu/register_file.h
@@ -12,8 +12,12 @@
 
 #include <cstdint>
 #include <cstdlib>
+#include <cstring>
 
+#include "xenia/base/assert.h"
+#include "xenia/base/memory.h"
 #include "xenia/gpu/registers.h"
+#include "xenia/gpu/xenos.h"
 
 namespace xe {
 namespace gpu {
@@ -34,39 +38,53 @@ class RegisterFile {
   static const RegisterInfo* GetRegisterInfo(uint32_t index);
   static bool IsValidRegister(uint32_t index);
   static constexpr size_t kRegisterCount = 0x5003;
-  union RegisterValue {
-    uint32_t u32;
-    float f32;
-  };
-  RegisterValue values[kRegisterCount];
+  uint32_t values[kRegisterCount];
+
+  const uint32_t& operator[](uint32_t reg) const { return values[reg]; }
+  uint32_t& operator[](uint32_t reg) { return values[reg]; }
 
-  const RegisterValue& operator[](uint32_t reg) const { return values[reg]; }
-  RegisterValue& operator[](uint32_t reg) { return values[reg]; }
-  const RegisterValue& operator[](Register reg) const { return values[reg]; }
-  RegisterValue& operator[](Register reg) { return values[reg]; }
   template <typename T>
-  const T& Get(uint32_t reg) const {
-    return *reinterpret_cast<const T*>(&values[reg]);
+  T Get(uint32_t reg) const {
+    return xe::memory::Reinterpret<T>(values[reg]);
   }
   template <typename T>
-  T& Get(uint32_t reg) {
-    return *reinterpret_cast<T*>(&values[reg]);
+  T Get(Register reg) const {
+    return Get<T>(static_cast<uint32_t>(reg));
   }
   template <typename T>
-  const T& Get(Register reg) const {
-    return *reinterpret_cast<const T*>(&values[reg]);
+  T Get() const {
+    return Get<T>(T::register_index);
   }
-  template <typename T>
-  T& Get(Register reg) {
-    return *reinterpret_cast<T*>(&values[reg]);
+
+  xenos::xe_gpu_vertex_fetch_t GetVertexFetch(uint32_t index) const {
+    assert_true(index < 96);
+    xenos::xe_gpu_vertex_fetch_t fetch;
+    std::memcpy(&fetch,
+                &values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
+                        (sizeof(fetch) / sizeof(uint32_t)) * index],
+                sizeof(fetch));
+    return fetch;
   }
-  template <typename T>
-  const T& Get() const {
-    return *reinterpret_cast<const T*>(&values[T::register_index]);
+
+  xenos::xe_gpu_texture_fetch_t GetTextureFetch(uint32_t index) const {
+    assert_true(index < 32);
+    xenos::xe_gpu_texture_fetch_t fetch;
+    std::memcpy(&fetch,
+                &values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
+                        (sizeof(fetch) / sizeof(uint32_t)) * index],
+                sizeof(fetch));
+    return fetch;
   }
-  template <typename T>
-  T& Get() {
-    return *reinterpret_cast<T*>(&values[T::register_index]);
+
+  xenos::xe_gpu_memexport_stream_t GetMemExportStream(
+      uint32_t float_constant_index) const {
+    assert_true(float_constant_index < 512);
+    xenos::xe_gpu_memexport_stream_t stream;
+    std::memcpy(
+        &stream,
+        &values[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * float_constant_index],
+        sizeof(stream));
+    return stream;
   }
 };
 
diff --git a/src/xenia/gpu/shader_interpreter.cc b/src/xenia/gpu/shader_interpreter.cc
index 6eda12f42..ec7cf9a02 100644
--- a/src/xenia/gpu/shader_interpreter.cc
+++ b/src/xenia/gpu/shader_interpreter.cc
@@ -21,10 +21,7 @@ void ShaderInterpreter::Execute() {
   state_.Reset();
 
   const uint32_t* bool_constants =
-      &register_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32;
-  const xenos::LoopConstant* loop_constants =
-      reinterpret_cast<const xenos::LoopConstant*>(
-          &register_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32);
+      &register_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031];
 
   bool exec_ended = false;
   uint32_t cf_index_next = 1;
@@ -133,8 +130,8 @@ void ShaderInterpreter::Execute() {
           cf_index_next = cf_loop_start.address();
           continue;
         }
-        xenos::LoopConstant loop_constant =
-            loop_constants[cf_loop_start.loop_id()];
+        auto loop_constant = register_file_.Get<xenos::LoopConstant>(
+            XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + cf_loop_start.loop_id());
         state_.loop_constants[state_.loop_stack_depth] = loop_constant;
         uint32_t& loop_iterator_ref =
             state_.loop_iterators[state_.loop_stack_depth];
@@ -163,8 +160,11 @@ void ShaderInterpreter::Execute() {
                 &cf_instr);
         xenos::LoopConstant loop_constant =
             state_.loop_constants[state_.loop_stack_depth - 1];
-        assert_true(loop_constant.value ==
-                    loop_constants[cf_loop_end.loop_id()].value);
+        assert_zero(
+            std::memcmp(&loop_constant,
+                        &register_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 +
+                                        cf_loop_end.loop_id()],
+                        sizeof(loop_constant)));
         uint32_t loop_iterator =
             ++state_.loop_iterators[state_.loop_stack_depth - 1];
         if (loop_iterator < loop_constant.count &&
@@ -250,28 +250,31 @@ void ShaderInterpreter::Execute() {
   }
 }
 
-const float* ShaderInterpreter::GetFloatConstant(
+const std::array<float, 4> ShaderInterpreter::GetFloatConstant(
     uint32_t address, bool is_relative, bool relative_address_is_a0) const {
-  static const float zero[4] = {};
   int32_t index = int32_t(address);
   if (is_relative) {
     index += relative_address_is_a0 ? state_.address_register
                                     : state_.GetLoopAddress();
   }
   if (index < 0) {
-    return zero;
+    return std::array<float, 4>();
   }
   auto base_and_size_minus_1 = register_file_.Get<reg::SQ_VS_CONST>(
       shader_type_ == xenos::ShaderType::kVertex ? XE_GPU_REG_SQ_VS_CONST
                                                  : XE_GPU_REG_SQ_PS_CONST);
   if (uint32_t(index) > base_and_size_minus_1.size) {
-    return zero;
+    return std::array<float, 4>();
   }
   index += base_and_size_minus_1.base;
   if (index >= 512) {
-    return zero;
+    return std::array<float, 4>();
   }
-  return &register_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index].f32;
+  std::array<float, 4> value;
+  std::memcpy(value.data(),
+              &register_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index],
+              sizeof(float) * 4);
+  return value;
 }
 
 void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
@@ -290,6 +293,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
       const float* vector_src_ptr;
       uint32_t vector_src_register = instr.src_reg(1 + i);
       bool vector_src_absolute = false;
+      std::array<float, 4> vector_src_float_constant;
       if (instr.src_is_temp(1 + i)) {
         vector_src_ptr = GetTempRegister(
             ucode::AluInstruction::src_temp_reg(vector_src_register),
@@ -297,9 +301,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
         vector_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute(
             vector_src_register);
       } else {
-        vector_src_ptr = GetFloatConstant(
+        vector_src_float_constant = GetFloatConstant(
             vector_src_register, instr.src_const_is_addressed(1 + i),
             instr.is_const_address_register_relative());
+        vector_src_ptr = vector_src_float_constant.data();
       }
       uint32_t vector_src_absolute_mask =
           ~(uint32_t(vector_src_absolute) << 31);
@@ -334,16 +339,18 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
       } break;
       case ucode::AluVectorOpcode::kMax: {
         for (uint32_t i = 0; i < 4; ++i) {
-          vector_result[i] = vector_operands[0][i] >= vector_operands[1][i]
-                                 ? vector_operands[0][i]
-                                 : vector_operands[1][i];
+          vector_result[i] =
+              std::isgreaterequal(vector_operands[0][i], vector_operands[1][i])
+                  ? vector_operands[0][i]
+                  : vector_operands[1][i];
         }
       } break;
       case ucode::AluVectorOpcode::kMin: {
         for (uint32_t i = 0; i < 4; ++i) {
-          vector_result[i] = vector_operands[0][i] < vector_operands[1][i]
-                                 ? vector_operands[0][i]
-                                 : vector_operands[1][i];
+          vector_result[i] =
+              std::isless(vector_operands[0][i], vector_operands[1][i])
+                  ? vector_operands[0][i]
+                  : vector_operands[1][i];
         }
       } break;
       case ucode::AluVectorOpcode::kSeq: {
@@ -354,14 +361,14 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
       } break;
       case ucode::AluVectorOpcode::kSgt: {
         for (uint32_t i = 0; i < 4; ++i) {
-          vector_result[i] =
-              float(vector_operands[0][i] > vector_operands[1][i]);
+          vector_result[i] = float(
+              std::isgreater(vector_operands[0][i], vector_operands[1][i]));
         }
       } break;
       case ucode::AluVectorOpcode::kSge: {
         for (uint32_t i = 0; i < 4; ++i) {
-          vector_result[i] =
-              float(vector_operands[0][i] >= vector_operands[1][i]);
+          vector_result[i] = float(std::isgreaterequal(vector_operands[0][i],
+                                                       vector_operands[1][i]));
         }
       } break;
       case ucode::AluVectorOpcode::kSne: {
@@ -407,14 +414,14 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
       } break;
       case ucode::AluVectorOpcode::kCndGe: {
         for (uint32_t i = 0; i < 4; ++i) {
-          vector_result[i] = vector_operands[0][i] >= 0.0f
+          vector_result[i] = std::isgreaterequal(vector_operands[0][i], 0.0f)
                                  ? vector_operands[1][i]
                                  : vector_operands[2][i];
         }
       } break;
       case ucode::AluVectorOpcode::kCndGt: {
         for (uint32_t i = 0; i < 4; ++i) {
-          vector_result[i] = vector_operands[0][i] > 0.0f
+          vector_result[i] = std::isgreater(vector_operands[0][i], 0.0f)
                                  ? vector_operands[1][i]
                                  : vector_operands[2][i];
         }
@@ -466,32 +473,38 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
         float x_abs = std::abs(x), y_abs = std::abs(y), z_abs = std::abs(z);
         // Result is T coordinate, S coordinate, 2 * major axis, face ID.
         if (z_abs >= x_abs && z_abs >= y_abs) {
+          bool z_negative = std::isless(z, 0.0f);
           vector_result[0] = -y;
-          vector_result[1] = z < 0.0f ? -x : x;
+          vector_result[1] = z_negative ? -x : x;
           vector_result[2] = z;
-          vector_result[3] = z < 0.0f ? 5.0f : 4.0f;
+          vector_result[3] = z_negative ? 5.0f : 4.0f;
         } else if (y_abs >= x_abs) {
-          vector_result[0] = y < 0.0f ? -z : z;
+          bool y_negative = std::isless(y, 0.0f);
+          vector_result[0] = y_negative ? -z : z;
           vector_result[1] = x;
           vector_result[2] = y;
-          vector_result[3] = y < 0.0f ? 3.0f : 2.0f;
+          vector_result[3] = y_negative ? 3.0f : 2.0f;
         } else {
+          bool x_negative = std::isless(x, 0.0f);
           vector_result[0] = -y;
-          vector_result[1] = x < 0.0f ? z : -z;
+          vector_result[1] = x_negative ? z : -z;
           vector_result[2] = x;
-          vector_result[3] = x < 0.0f ? 1.0f : 0.0f;
+          vector_result[3] = x_negative ? 1.0f : 0.0f;
         }
         vector_result[2] *= 2.0f;
       } break;
       case ucode::AluVectorOpcode::kMax4: {
-        if (vector_operands[0][0] >= vector_operands[0][1] &&
-            vector_operands[0][0] >= vector_operands[0][2] &&
-            vector_operands[0][0] >= vector_operands[0][3]) {
+        if (std::isgreaterequal(vector_operands[0][0], vector_operands[0][1]) &&
+            std::isgreaterequal(vector_operands[0][0], vector_operands[0][2]) &&
+            std::isgreaterequal(vector_operands[0][0], vector_operands[0][3])) {
           vector_result[0] = vector_operands[0][0];
-        } else if (vector_operands[0][1] >= vector_operands[0][2] &&
-                   vector_operands[0][1] >= vector_operands[0][3]) {
+        } else if (std::isgreaterequal(vector_operands[0][1],
+                                       vector_operands[0][2]) &&
+                   std::isgreaterequal(vector_operands[0][1],
+                                       vector_operands[0][3])) {
           vector_result[0] = vector_operands[0][1];
-        } else if (vector_operands[0][2] >= vector_operands[0][3]) {
+        } else if (std::isgreaterequal(vector_operands[0][2],
+                                       vector_operands[0][3])) {
           vector_result[0] = vector_operands[0][2];
         } else {
           vector_result[0] = vector_operands[0][3];
@@ -517,21 +530,21 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
         replicate_vector_result_x = true;
       } break;
       case ucode::AluVectorOpcode::kSetpGtPush: {
-        state_.predicate =
-            vector_operands[0][3] == 0.0f && vector_operands[1][3] > 0.0f;
-        vector_result[0] =
-            (vector_operands[0][0] == 0.0f && vector_operands[1][0] > 0.0f)
-                ? 0.0f
-                : vector_operands[0][0] + 1.0f;
+        state_.predicate = vector_operands[0][3] == 0.0f &&
+                           std::isgreater(vector_operands[1][3], 0.0f);
+        vector_result[0] = (vector_operands[0][0] == 0.0f &&
+                            std::isgreater(vector_operands[1][0], 0.0f))
+                               ? 0.0f
+                               : vector_operands[0][0] + 1.0f;
         replicate_vector_result_x = true;
       } break;
       case ucode::AluVectorOpcode::kSetpGePush: {
-        state_.predicate =
-            vector_operands[0][3] == 0.0f && vector_operands[1][3] >= 0.0f;
-        vector_result[0] =
-            (vector_operands[0][0] == 0.0f && vector_operands[1][0] >= 0.0f)
-                ? 0.0f
-                : vector_operands[0][0] + 1.0f;
+        state_.predicate = vector_operands[0][3] == 0.0f &&
+                           std::isgreaterequal(vector_operands[1][3], 0.0f);
+        vector_result[0] = (vector_operands[0][0] == 0.0f &&
+                            std::isgreaterequal(vector_operands[1][0], 0.0f))
+                               ? 0.0f
+                               : vector_operands[0][0] + 1.0f;
         replicate_vector_result_x = true;
       } break;
       // Not implementing pixel kill currently, the interpreter is currently
@@ -545,19 +558,19 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
         replicate_vector_result_x = true;
       } break;
       case ucode::AluVectorOpcode::kKillGt: {
-        vector_result[0] =
-            float(vector_operands[0][0] > vector_operands[1][0] ||
-                  vector_operands[0][1] > vector_operands[1][1] ||
-                  vector_operands[0][2] > vector_operands[1][2] ||
-                  vector_operands[0][3] > vector_operands[1][3]);
+        vector_result[0] = float(
+            std::isgreater(vector_operands[0][0], vector_operands[1][0]) ||
+            std::isgreater(vector_operands[0][1], vector_operands[1][1]) ||
+            std::isgreater(vector_operands[0][2], vector_operands[1][2]) ||
+            std::isgreater(vector_operands[0][3], vector_operands[1][3]));
         replicate_vector_result_x = true;
       } break;
       case ucode::AluVectorOpcode::kKillGe: {
-        vector_result[0] =
-            float(vector_operands[0][0] >= vector_operands[1][0] ||
-                  vector_operands[0][1] >= vector_operands[1][1] ||
-                  vector_operands[0][2] >= vector_operands[1][2] ||
-                  vector_operands[0][3] >= vector_operands[1][3]);
+        vector_result[0] = float(
+            std::isgreaterequal(vector_operands[0][0], vector_operands[1][0]) ||
+            std::isgreaterequal(vector_operands[0][1], vector_operands[1][1]) ||
+            std::isgreaterequal(vector_operands[0][2], vector_operands[1][2]) ||
+            std::isgreaterequal(vector_operands[0][3], vector_operands[1][3]));
         replicate_vector_result_x = true;
       } break;
       case ucode::AluVectorOpcode::kKillNe: {
@@ -578,14 +591,13 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
         vector_result[3] = vector_operands[1][3];
       } break;
       case ucode::AluVectorOpcode::kMaxA: {
-        // std::max is `a < b ? b : a`, thus in case of NaN, the first argument
-        // (-256.0f) is always the result.
         state_.address_register = int32_t(std::floor(
-            std::min(255.0f, std::max(-256.0f, vector_operands[0][3])) + 0.5f));
+            xe::clamp_float(vector_operands[0][3], -256.0f, 255.0f) + 0.5f));
         for (uint32_t i = 0; i < 4; ++i) {
-          vector_result[i] = vector_operands[0][i] >= vector_operands[1][i]
-                                 ? vector_operands[0][i]
-                                 : vector_operands[1][i];
+          vector_result[i] =
+              std::isgreaterequal(vector_operands[0][i], vector_operands[1][i])
+                  ? vector_operands[0][i]
+                  : vector_operands[1][i];
         }
       } break;
       default: {
@@ -611,6 +623,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
       // r#/c#.w or r#/c#.wx.
       const float* scalar_src_ptr;
       uint32_t scalar_src_register = instr.src_reg(3);
+      std::array<float, 4> scalar_src_float_constant;
       if (instr.src_is_temp(3)) {
         scalar_src_ptr = GetTempRegister(
             ucode::AluInstruction::src_temp_reg(scalar_src_register),
@@ -618,9 +631,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
         scalar_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute(
             scalar_src_register);
       } else {
-        scalar_src_ptr = GetFloatConstant(
+        scalar_src_float_constant = GetFloatConstant(
             scalar_src_register, instr.src_const_is_addressed(3),
             instr.is_const_address_register_relative());
+        scalar_src_ptr = scalar_src_float_constant.data();
       }
       uint32_t scalar_src_swizzle = instr.src_swizzle(3);
       scalar_operand_component_count =
@@ -688,7 +702,8 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
     case ucode::AluScalarOpcode::kMulsPrev2: {
       if (state_.previous_scalar == -FLT_MAX ||
           !std::isfinite(state_.previous_scalar) ||
-          !std::isfinite(scalar_operands[1]) || scalar_operands[1] <= 0.0f) {
+          !std::isfinite(scalar_operands[1]) ||
+          std::islessequal(scalar_operands[1], 0.0f)) {
         state_.previous_scalar = -FLT_MAX;
       } else {
         // Direct3D 9 behavior (0 or denormal * anything = +0).
@@ -699,23 +714,26 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
       }
     } break;
     case ucode::AluScalarOpcode::kMaxs: {
-      state_.previous_scalar = scalar_operands[0] >= scalar_operands[1]
-                                   ? scalar_operands[0]
-                                   : scalar_operands[1];
+      state_.previous_scalar =
+          std::isgreaterequal(scalar_operands[0], scalar_operands[1])
+              ? scalar_operands[0]
+              : scalar_operands[1];
     } break;
     case ucode::AluScalarOpcode::kMins: {
-      state_.previous_scalar = scalar_operands[0] >= scalar_operands[1]
-                                   ? scalar_operands[0]
-                                   : scalar_operands[1];
+      state_.previous_scalar =
+          std::isless(scalar_operands[0], scalar_operands[1])
+              ? scalar_operands[0]
+              : scalar_operands[1];
     } break;
     case ucode::AluScalarOpcode::kSeqs: {
       state_.previous_scalar = float(scalar_operands[0] == 0.0f);
     } break;
     case ucode::AluScalarOpcode::kSgts: {
-      state_.previous_scalar = float(scalar_operands[0] > 0.0f);
+      state_.previous_scalar = float(std::isgreater(scalar_operands[0], 0.0f));
     } break;
     case ucode::AluScalarOpcode::kSges: {
-      state_.previous_scalar = float(scalar_operands[0] >= 0.0f);
+      state_.previous_scalar =
+          float(std::isgreaterequal(scalar_operands[0], 0.0f));
     } break;
     case ucode::AluScalarOpcode::kSnes: {
       state_.previous_scalar = float(scalar_operands[0] != 0.0f);
@@ -781,22 +799,20 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
       state_.previous_scalar = 1.0f / std::sqrt(scalar_operands[0]);
     } break;
     case ucode::AluScalarOpcode::kMaxAs: {
-      // std::max is `a < b ? b : a`, thus in case of NaN, the first argument
-      // (-256.0f) is always the result.
       state_.address_register = int32_t(std::floor(
-          std::min(255.0f, std::max(-256.0f, scalar_operands[0])) + 0.5f));
-      state_.previous_scalar = scalar_operands[0] >= scalar_operands[1]
-                                   ? scalar_operands[0]
-                                   : scalar_operands[1];
+          xe::clamp_float(scalar_operands[0], -256.0f, 255.0f) + 0.5f));
+      state_.previous_scalar =
+          std::isgreaterequal(scalar_operands[0], scalar_operands[1])
+              ? scalar_operands[0]
+              : scalar_operands[1];
     } break;
     case ucode::AluScalarOpcode::kMaxAsf: {
-      // std::max is `a < b ? b : a`, thus in case of NaN, the first argument
-      // (-256.0f) is always the result.
       state_.address_register = int32_t(
-          std::floor(std::min(255.0f, std::max(-256.0f, scalar_operands[0]))));
-      state_.previous_scalar = scalar_operands[0] >= scalar_operands[1]
-                                   ? scalar_operands[0]
-                                   : scalar_operands[1];
+          std::floor(xe::clamp_float(scalar_operands[0], -256.0f, 255.0f)));
+      state_.previous_scalar =
+          std::isgreaterequal(scalar_operands[0], scalar_operands[1])
+              ? scalar_operands[0]
+              : scalar_operands[1];
     } break;
     case ucode::AluScalarOpcode::kSubs:
     case ucode::AluScalarOpcode::kSubsc0:
@@ -815,11 +831,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
       state_.previous_scalar = float(!state_.predicate);
     } break;
     case ucode::AluScalarOpcode::kSetpGt: {
-      state_.predicate = scalar_operands[0] > 0.0f;
+      state_.predicate = std::isgreater(scalar_operands[0], 0.0f);
       state_.previous_scalar = float(!state_.predicate);
     } break;
     case ucode::AluScalarOpcode::kSetpGe: {
-      state_.predicate = scalar_operands[0] >= 0.0f;
+      state_.predicate = std::isgreaterequal(scalar_operands[0], 0.0f);
       state_.previous_scalar = float(!state_.predicate);
     } break;
     case ucode::AluScalarOpcode::kSetpInv: {
@@ -831,7 +847,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
     } break;
     case ucode::AluScalarOpcode::kSetpPop: {
       float new_counter = scalar_operands[0] - 1.0f;
-      state_.predicate = new_counter <= 0.0f;
+      state_.predicate = std::islessequal(new_counter, 0.0f);
       state_.previous_scalar = state_.predicate ? 0.0f : new_counter;
     } break;
     case ucode::AluScalarOpcode::kSetpClr: {
@@ -848,10 +864,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
       state_.previous_scalar = float(scalar_operands[0] == 0.0f);
     } break;
     case ucode::AluScalarOpcode::kKillsGt: {
-      state_.previous_scalar = float(scalar_operands[0] > 0.0f);
+      state_.previous_scalar = float(std::isgreater(scalar_operands[0], 0.0f));
     } break;
     case ucode::AluScalarOpcode::kKillsGe: {
-      state_.previous_scalar = float(scalar_operands[0] >= 0.0f);
+      state_.previous_scalar =
+          float(std::isgreaterequal(scalar_operands[0], 0.0f));
     } break;
     case ucode::AluScalarOpcode::kKillsNe: {
       state_.previous_scalar = float(scalar_operands[0] != 0.0f);
@@ -877,11 +894,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
 
   if (instr.vector_clamp()) {
     for (uint32_t i = 0; i < 4; ++i) {
-      vector_result[i] = xe::saturate_unsigned(vector_result[i]);
+      vector_result[i] = xe::saturate(vector_result[i]);
     }
   }
   float scalar_result = instr.scalar_clamp()
-                            ? xe::saturate_unsigned(state_.previous_scalar)
+                            ? xe::saturate(state_.previous_scalar)
                             : state_.previous_scalar;
 
   uint32_t scalar_result_write_mask = instr.GetScalarOpResultWriteMask();
@@ -977,10 +994,8 @@ void ShaderInterpreter::ExecuteVertexFetchInstruction(
     state_.vfetch_full_last = instr;
   }
 
-  xenos::xe_gpu_vertex_fetch_t fetch_constant =
-      *reinterpret_cast<const xenos::xe_gpu_vertex_fetch_t*>(
-          &register_file_[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
-                          state_.vfetch_full_last.fetch_constant_index()]);
+  xenos::xe_gpu_vertex_fetch_t fetch_constant = register_file_.GetVertexFetch(
+      state_.vfetch_full_last.fetch_constant_index());
 
   if (!instr.is_mini_fetch()) {
     // Get the part of the address that depends on vfetch_full data.
diff --git a/src/xenia/gpu/shader_interpreter.h b/src/xenia/gpu/shader_interpreter.h
index 759f606eb..46808562e 100644
--- a/src/xenia/gpu/shader_interpreter.h
+++ b/src/xenia/gpu/shader_interpreter.h
@@ -11,6 +11,7 @@
 #define XENIA_GPU_SHADER_INTERPRETER_H_
 
 #include <algorithm>
+#include <array>
 #include <cstddef>
 #include <cstdint>
 
@@ -117,8 +118,8 @@ class ShaderInterpreter {
   float* GetTempRegister(uint32_t address, bool is_relative) {
     return temp_registers_[GetTempRegisterIndex(address, is_relative)];
   }
-  const float* GetFloatConstant(uint32_t address, bool is_relative,
-                                bool relative_address_is_a0) const;
+  const std::array<float, 4> GetFloatConstant(
+      uint32_t address, bool is_relative, bool relative_address_is_a0) const;
 
   void ExecuteAluInstruction(ucode::AluInstruction instr);
   void StoreFetchResult(uint32_t dest, bool is_dest_relative, uint32_t swizzle,
diff --git a/src/xenia/gpu/spirv_builder.cc b/src/xenia/gpu/spirv_builder.cc
index 2ba9446bc..fc2e92850 100644
--- a/src/xenia/gpu/spirv_builder.cc
+++ b/src/xenia/gpu/spirv_builder.cc
@@ -13,6 +13,8 @@
 #include <utility>
 #include <vector>
 
+#include "xenia/base/assert.h"
+
 namespace xe {
 namespace gpu {
 
@@ -101,5 +103,195 @@ spv::Id SpirvBuilder::createTriBuiltinCall(spv::Id result_type,
   return result;
 }
 
+SpirvBuilder::IfBuilder::IfBuilder(spv::Id condition, unsigned int control,
+                                   SpirvBuilder& builder,
+                                   unsigned int thenWeight,
+                                   unsigned int elseWeight)
+    : builder(builder),
+      condition(condition),
+      control(control),
+      thenWeight(thenWeight),
+      elseWeight(elseWeight),
+      function(builder.getBuildPoint()->getParent()) {
+  // Make the blocks, but only put the then-block into the function, the
+  // else-block and merge-block will be added later, in order, after earlier
+  // code is emitted.
+  thenBlock = new spv::Block(builder.getUniqueId(), function);
+  elseBlock = nullptr;
+  mergeBlock = new spv::Block(builder.getUniqueId(), function);
+
+  // Save the current block, so that we can add in the flow control split when
+  // makeEndIf is called.
+  headerBlock = builder.getBuildPoint();
+
+  spv::Id headerBlockId = headerBlock->getId();
+  thenPhiParent = headerBlockId;
+  elsePhiParent = headerBlockId;
+
+  function.addBlock(thenBlock);
+  builder.setBuildPoint(thenBlock);
+}
+
+void SpirvBuilder::IfBuilder::makeBeginElse(bool branchToMerge) {
+#ifndef NDEBUG
+  assert_true(currentBranch == Branch::kThen);
+#endif
+
+  if (branchToMerge) {
+    // Close out the "then" by having it jump to the mergeBlock.
+    thenPhiParent = builder.getBuildPoint()->getId();
+    builder.createBranch(mergeBlock);
+  }
+
+  // Make the first else block and add it to the function.
+  elseBlock = new spv::Block(builder.getUniqueId(), function);
+  function.addBlock(elseBlock);
+
+  // Start building the else block.
+  builder.setBuildPoint(elseBlock);
+
+#ifndef NDEBUG
+  currentBranch = Branch::kElse;
+#endif
+}
+
+void SpirvBuilder::IfBuilder::makeEndIf(bool branchToMerge) {
+#ifndef NDEBUG
+  assert_true(currentBranch == Branch::kThen || currentBranch == Branch::kElse);
+#endif
+
+  if (branchToMerge) {
+    // Jump to the merge block.
+    (elseBlock ? elsePhiParent : thenPhiParent) =
+        builder.getBuildPoint()->getId();
+    builder.createBranch(mergeBlock);
+  }
+
+  // Go back to the headerBlock and make the flow control split.
+  builder.setBuildPoint(headerBlock);
+  builder.createSelectionMerge(mergeBlock, control);
+  {
+    spv::Block* falseBlock = elseBlock ? elseBlock : mergeBlock;
+    std::unique_ptr<spv::Instruction> branch =
+        std::make_unique<spv::Instruction>(spv::OpBranchConditional);
+    branch->addIdOperand(condition);
+    branch->addIdOperand(thenBlock->getId());
+    branch->addIdOperand(falseBlock->getId());
+    if (thenWeight || elseWeight) {
+      branch->addImmediateOperand(thenWeight);
+      branch->addImmediateOperand(elseWeight);
+    }
+    builder.getBuildPoint()->addInstruction(std::move(branch));
+    thenBlock->addPredecessor(builder.getBuildPoint());
+    falseBlock->addPredecessor(builder.getBuildPoint());
+  }
+
+  // Add the merge block to the function.
+  function.addBlock(mergeBlock);
+  builder.setBuildPoint(mergeBlock);
+
+#ifndef NDEBUG
+  currentBranch = Branch::kMerge;
+#endif
+}
+
+spv::Id SpirvBuilder::IfBuilder::createMergePhi(spv::Id then_variable,
+                                                spv::Id else_variable) const {
+  assert_true(builder.getBuildPoint() == mergeBlock);
+  return builder.createQuadOp(spv::OpPhi, builder.getTypeId(then_variable),
+                              then_variable, getThenPhiParent(), else_variable,
+                              getElsePhiParent());
+}
+
+SpirvBuilder::SwitchBuilder::SwitchBuilder(spv::Id selector,
+                                           unsigned int selection_control,
+                                           SpirvBuilder& builder)
+    : builder_(builder),
+      selector_(selector),
+      selection_control_(selection_control),
+      function_(builder.getBuildPoint()->getParent()),
+      header_block_(builder.getBuildPoint()),
+      default_phi_parent_(builder.getBuildPoint()->getId()) {
+  merge_block_ = new spv::Block(builder_.getUniqueId(), function_);
+}
+
+void SpirvBuilder::SwitchBuilder::makeBeginDefault() {
+  assert_null(default_block_);
+
+  endSegment();
+
+  default_block_ = new spv::Block(builder_.getUniqueId(), function_);
+  function_.addBlock(default_block_);
+  default_block_->addPredecessor(header_block_);
+  builder_.setBuildPoint(default_block_);
+
+  current_branch_ = Branch::kDefault;
+}
+
+void SpirvBuilder::SwitchBuilder::makeBeginCase(unsigned int literal) {
+  endSegment();
+
+  auto case_block = new spv::Block(builder_.getUniqueId(), function_);
+  function_.addBlock(case_block);
+  cases_.emplace_back(literal, case_block->getId());
+  case_block->addPredecessor(header_block_);
+  builder_.setBuildPoint(case_block);
+
+  current_branch_ = Branch::kCase;
+}
+
+void SpirvBuilder::SwitchBuilder::addCurrentCaseLiteral(unsigned int literal) {
+  assert_true(current_branch_ == Branch::kCase);
+
+  cases_.emplace_back(literal, cases_.back().second);
+}
+
+void SpirvBuilder::SwitchBuilder::makeEndSwitch() {
+  endSegment();
+
+  builder_.setBuildPoint(header_block_);
+
+  builder_.createSelectionMerge(merge_block_, selection_control_);
+
+  std::unique_ptr<spv::Instruction> switch_instruction =
+      std::make_unique<spv::Instruction>(spv::OpSwitch);
+  switch_instruction->addIdOperand(selector_);
+  if (default_block_) {
+    switch_instruction->addIdOperand(default_block_->getId());
+  } else {
+    switch_instruction->addIdOperand(merge_block_->getId());
+    merge_block_->addPredecessor(header_block_);
+  }
+  for (const std::pair<unsigned int, spv::Id>& case_pair : cases_) {
+    switch_instruction->addImmediateOperand(case_pair.first);
+    switch_instruction->addIdOperand(case_pair.second);
+  }
+  builder_.getBuildPoint()->addInstruction(std::move(switch_instruction));
+
+  function_.addBlock(merge_block_);
+  builder_.setBuildPoint(merge_block_);
+
+  current_branch_ = Branch::kMerge;
+}
+
+void SpirvBuilder::SwitchBuilder::endSegment() {
+  assert_true(current_branch_ == Branch::kSelection ||
+              current_branch_ == Branch::kDefault ||
+              current_branch_ == Branch::kCase);
+
+  if (current_branch_ == Branch::kSelection) {
+    return;
+  }
+
+  if (!builder_.getBuildPoint()->isTerminated()) {
+    builder_.createBranch(merge_block_);
+    if (current_branch_ == Branch::kDefault) {
+      default_phi_parent_ = builder_.getBuildPoint()->getId();
+    }
+  }
+
+  current_branch_ = Branch::kSelection;
+}
+
 }  // namespace gpu
 }  // namespace xe
diff --git a/src/xenia/gpu/spirv_builder.h b/src/xenia/gpu/spirv_builder.h
index 0496aa7c4..7422d7c63 100644
--- a/src/xenia/gpu/spirv_builder.h
+++ b/src/xenia/gpu/spirv_builder.h
@@ -10,7 +10,13 @@
 #ifndef XENIA_GPU_SPIRV_BUILDER_H_
 #define XENIA_GPU_SPIRV_BUILDER_H_
 
+#include <memory>
+#include <optional>
+#include <utility>
+#include <vector>
+
 #include "third_party/glslang/SPIRV/SpvBuilder.h"
+#include "xenia/base/assert.h"
 
 namespace xe {
 namespace gpu {
@@ -42,6 +48,104 @@ class SpirvBuilder : public spv::Builder {
   spv::Id createTriBuiltinCall(spv::Id result_type, spv::Id builtins,
                                int entry_point, spv::Id operand1,
                                spv::Id operand2, spv::Id operand3);
+
+  // Helper to use for building nested control flow with if-then-else with
+  // additions over SpvBuilder::If.
+  class IfBuilder {
+   public:
+    IfBuilder(spv::Id condition, unsigned int control, SpirvBuilder& builder,
+              unsigned int thenWeight = 0, unsigned int elseWeight = 0);
+
+    ~IfBuilder() {
+#ifndef NDEBUG
+      assert_true(currentBranch == Branch::kMerge);
+#endif
+    }
+
+    void makeBeginElse(bool branchToMerge = true);
+    void makeEndIf(bool branchToMerge = true);
+
+    // If there's no then/else block that branches to the merge block, the phi
+    // parent is the header block - this simplifies then-only usage.
+    spv::Id getThenPhiParent() const { return thenPhiParent; }
+    spv::Id getElsePhiParent() const { return elsePhiParent; }
+
+    spv::Id createMergePhi(spv::Id then_variable, spv::Id else_variable) const;
+
+   private:
+    enum class Branch {
+      kThen,
+      kElse,
+      kMerge,
+    };
+
+    IfBuilder(const IfBuilder& ifBuilder) = delete;
+    IfBuilder& operator=(const IfBuilder& ifBuilder) = delete;
+
+    SpirvBuilder& builder;
+    spv::Id condition;
+    unsigned int control;
+    unsigned int thenWeight;
+    unsigned int elseWeight;
+
+    spv::Function& function;
+
+    spv::Block* headerBlock;
+    spv::Block* thenBlock;
+    spv::Block* elseBlock;
+    spv::Block* mergeBlock;
+
+    spv::Id thenPhiParent;
+    spv::Id elsePhiParent;
+
+#ifndef NDEBUG
+    Branch currentBranch = Branch::kThen;
+#endif
+  };
+
+  // Simpler and more flexible (such as multiple cases pointing to the same
+  // block) compared to makeSwitch.
+  class SwitchBuilder {
+   public:
+    SwitchBuilder(spv::Id selector, unsigned int selection_control,
+                  SpirvBuilder& builder);
+    ~SwitchBuilder() { assert_true(current_branch_ == Branch::kMerge); }
+
+    void makeBeginDefault();
+    void makeBeginCase(unsigned int literal);
+    void addCurrentCaseLiteral(unsigned int literal);
+    void makeEndSwitch();
+
+    // If there's no default block that branches to the merge block, the phi
+    // parent is the header block - this simplifies case-only usage.
+    spv::Id getDefaultPhiParent() const { return default_phi_parent_; }
+
+   private:
+    enum class Branch {
+      kSelection,
+      kDefault,
+      kCase,
+      kMerge,
+    };
+
+    void endSegment();
+
+    SpirvBuilder& builder_;
+    spv::Id selector_;
+    unsigned int selection_control_;
+
+    spv::Function& function_;
+
+    spv::Block* header_block_;
+    spv::Block* merge_block_;
+    spv::Block* default_block_ = nullptr;
+
+    std::vector<std::pair<unsigned int, spv::Id>> cases_;
+
+    spv::Id default_phi_parent_;
+
+    Branch current_branch_ = Branch::kSelection;
+  };
 };
 
 }  // namespace gpu
diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc
index 8bcaa19fd..399b7079f 100644
--- a/src/xenia/gpu/spirv_shader_translator.cc
+++ b/src/xenia/gpu/spirv_shader_translator.cc
@@ -30,30 +30,35 @@ namespace gpu {
 SpirvShaderTranslator::Features::Features(bool all)
     : spirv_version(all ? spv::Spv_1_5 : spv::Spv_1_0),
       max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)),
+      full_draw_index_uint32(all),
+      vertex_pipeline_stores_and_atomics(all),
+      fragment_stores_and_atomics(all),
       clip_distance(all),
       cull_distance(all),
-      demote_to_helper_invocation(all),
-      fragment_shader_sample_interlock(all),
-      full_draw_index_uint32(all),
       image_view_format_swizzle(all),
       signed_zero_inf_nan_preserve_float32(all),
       denorm_flush_to_zero_float32(all),
-      rounding_mode_rte_float32(all) {}
+      rounding_mode_rte_float32(all),
+      fragment_shader_sample_interlock(all),
+      demote_to_helper_invocation(all) {}
 
 SpirvShaderTranslator::Features::Features(
     const ui::vulkan::VulkanProvider::DeviceInfo& device_info)
     : max_storage_buffer_range(device_info.maxStorageBufferRange),
+      full_draw_index_uint32(device_info.fullDrawIndexUint32),
+      vertex_pipeline_stores_and_atomics(
+          device_info.vertexPipelineStoresAndAtomics),
+      fragment_stores_and_atomics(device_info.fragmentStoresAndAtomics),
       clip_distance(device_info.shaderClipDistance),
       cull_distance(device_info.shaderCullDistance),
-      demote_to_helper_invocation(device_info.shaderDemoteToHelperInvocation),
-      fragment_shader_sample_interlock(
-          device_info.fragmentShaderSampleInterlock),
-      full_draw_index_uint32(device_info.fullDrawIndexUint32),
       image_view_format_swizzle(device_info.imageViewFormatSwizzle),
       signed_zero_inf_nan_preserve_float32(
           device_info.shaderSignedZeroInfNanPreserveFloat32),
       denorm_flush_to_zero_float32(device_info.shaderDenormFlushToZeroFloat32),
-      rounding_mode_rte_float32(device_info.shaderRoundingModeRTEFloat32) {
+      rounding_mode_rte_float32(device_info.shaderRoundingModeRTEFloat32),
+      fragment_shader_sample_interlock(
+          device_info.fragmentShaderSampleInterlock),
+      demote_to_helper_invocation(device_info.shaderDemoteToHelperInvocation) {
   if (device_info.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) {
     spirv_version = spv::Spv_1_5;
   } else if (device_info.ext_1_2_VK_KHR_spirv_1_4) {
@@ -117,6 +122,14 @@ void SpirvShaderTranslator::Reset() {
 
   main_interface_.clear();
   var_main_registers_ = spv::NoResult;
+  var_main_memexport_address_ = spv::NoResult;
+  for (size_t memexport_eM_index = 0;
+       memexport_eM_index < xe::countof(var_main_memexport_data_);
+       ++memexport_eM_index) {
+    var_main_memexport_data_[memexport_eM_index] = spv::NoResult;
+  }
+  var_main_memexport_data_written_ = spv::NoResult;
+  main_memexport_allowed_ = spv::NoResult;
   var_main_point_size_edge_flag_kill_vertex_ = spv::NoResult;
   var_main_kill_pixel_ = spv::NoResult;
   var_main_fsi_color_written_ = spv::NoResult;
@@ -310,6 +323,8 @@ void SpirvShaderTranslator::StartTranslation() {
     main_interface_.push_back(uniform_system_constants_);
   }
 
+  bool memexport_used = IsMemoryExportUsed();
+
   if (!is_depth_only_fragment_shader_) {
     // Common uniform buffer - float constants.
     uint32_t float_constant_count =
@@ -420,9 +435,10 @@ void SpirvShaderTranslator::StartTranslation() {
     builder_->addMemberName(type_shared_memory, 0, "shared_memory");
     builder_->addMemberDecoration(type_shared_memory, 0,
                                   spv::DecorationRestrict);
-    // TODO(Triang3l): Make writable when memexport is implemented.
-    builder_->addMemberDecoration(type_shared_memory, 0,
-                                  spv::DecorationNonWritable);
+    if (!memexport_used) {
+      builder_->addMemberDecoration(type_shared_memory, 0,
+                                    spv::DecorationNonWritable);
+    }
     builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset,
                                   0);
     builder_->addDecoration(type_shared_memory,
@@ -509,6 +525,24 @@ void SpirvShaderTranslator::StartTranslation() {
           builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction,
                                    type_register_array, "xe_var_registers");
     }
+    if (memexport_used) {
+      var_main_memexport_address_ = builder_->createVariable(
+          spv::NoPrecision, spv::StorageClassFunction, type_float4_,
+          "xe_var_memexport_address", const_float4_0_);
+      uint8_t memexport_eM_remaining = current_shader().memexport_eM_written();
+      uint32_t memexport_eM_index;
+      while (
+          xe::bit_scan_forward(memexport_eM_remaining, &memexport_eM_index)) {
+        memexport_eM_remaining &= ~(uint8_t(1) << memexport_eM_index);
+        var_main_memexport_data_[memexport_eM_index] = builder_->createVariable(
+            spv::NoPrecision, spv::StorageClassFunction, type_float4_,
+            fmt::format("xe_var_memexport_data_{}", memexport_eM_index).c_str(),
+            const_float4_0_);
+      }
+      var_main_memexport_data_written_ = builder_->createVariable(
+          spv::NoPrecision, spv::StorageClassFunction, type_uint_,
+          "xe_var_memexport_data_written", const_uint_0_);
+    }
   }
 
   // Write the execution model-specific prologue with access to variables in the
@@ -647,6 +681,10 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
     builder_->setBuildPoint(main_loop_merge_);
   }
 
+  // Write data for the last memexport.
+  ExportToMemory(
+      current_shader().memexport_eM_potentially_written_before_end());
+
   if (is_vertex_shader()) {
     CompleteVertexOrTessEvalShaderInMain();
   } else if (is_pixel_shader()) {
@@ -1077,6 +1115,34 @@ void SpirvShaderTranslator::ProcessJumpInstruction(
   builder_->createBranch(main_loop_continue_);
 }
 
+void SpirvShaderTranslator::ProcessAllocInstruction(
+    const ParsedAllocInstruction& instr, uint8_t export_eM) {
+  bool start_memexport = instr.type == ucode::AllocType::kMemory &&
+                         current_shader().memexport_eM_written();
+  if (export_eM || start_memexport) {
+    CloseExecConditionals();
+  }
+
+  if (export_eM) {
+    ExportToMemory(export_eM);
+    // Reset which eM# elements have been written.
+    builder_->createStore(const_uint_0_, var_main_memexport_data_written_);
+    // Break dependencies from the previous memexport.
+    uint8_t export_eM_remaining = export_eM;
+    uint32_t eM_index;
+    while (xe::bit_scan_forward(export_eM_remaining, &eM_index)) {
+      export_eM_remaining &= ~(uint8_t(1) << eM_index);
+      builder_->createStore(const_float4_0_,
+                            var_main_memexport_data_[eM_index]);
+    }
+  }
+
+  if (start_memexport) {
+    // Initialize eA to an invalid address.
+    builder_->createStore(const_float4_0_, var_main_memexport_address_);
+  }
+}
+
 spv::Id SpirvShaderTranslator::SpirvSmearScalarResultOrConstant(
     spv::Id scalar, spv::Id vector_type) {
   bool is_constant = builder_->isConstant(scalar);
@@ -1205,6 +1271,8 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() {
 }
 
 void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
+  Modification shader_modification = GetSpirvShaderModification();
+
   // The edge flag isn't used for any purpose by the translator.
   if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b101) {
     id_vector_temp_.clear();
@@ -1244,11 +1312,40 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
     }
   }
 
-  Modification shader_modification = GetSpirvShaderModification();
-
   // TODO(Triang3l): For HostVertexShaderType::kRectangeListAsTriangleStrip,
   // start the vertex loop, and load the index there.
 
+  // Check if memory export should be allowed for this host vertex of the guest
+  // primitive to make sure export is done only once for each guest vertex.
+  if (IsMemoryExportUsed()) {
+    spv::Id memexport_allowed_for_host_vertex_of_guest_primitive =
+        spv::NoResult;
+    if (shader_modification.vertex.host_vertex_shader_type ==
+        Shader::HostVertexShaderType::kPointListAsTriangleStrip) {
+      // Only for one host vertex for the point.
+      memexport_allowed_for_host_vertex_of_guest_primitive =
+          builder_->createBinOp(
+              spv::OpIEqual, type_bool_,
+              builder_->createBinOp(
+                  spv::OpBitwiseAnd, type_uint_,
+                  builder_->createUnaryOp(
+                      spv::OpBitcast, type_uint_,
+                      builder_->createLoad(input_vertex_index_,
+                                           spv::NoPrecision)),
+                  builder_->makeUintConstant(3)),
+              const_uint_0_);
+    }
+
+    if (memexport_allowed_for_host_vertex_of_guest_primitive != spv::NoResult) {
+      main_memexport_allowed_ =
+          main_memexport_allowed_ != spv::NoResult
+              ? builder_->createBinOp(
+                    spv::OpLogicalAnd, type_bool_, main_memexport_allowed_,
+                    memexport_allowed_for_host_vertex_of_guest_primitive)
+              : memexport_allowed_for_host_vertex_of_guest_primitive;
+    }
+  }
+
   // Load the vertex index or the tessellation parameters.
   if (register_count()) {
     // TODO(Triang3l): Barycentric coordinates and patch index.
@@ -1272,89 +1369,70 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
                 builder_->makeUintConstant(static_cast<unsigned int>(
                     kSysFlag_ComputeOrPrimitiveVertexIndexLoad))),
             const_uint_0_);
-        spv::Block& block_load_vertex_index_pre = *builder_->getBuildPoint();
-        spv::Block& block_load_vertex_index_start = builder_->makeNewBlock();
-        spv::Block& block_load_vertex_index_merge = builder_->makeNewBlock();
-        builder_->createSelectionMerge(&block_load_vertex_index_merge,
-                                       spv::SelectionControlDontFlattenMask);
-        builder_->createConditionalBranch(load_vertex_index,
-                                          &block_load_vertex_index_start,
-                                          &block_load_vertex_index_merge);
-        builder_->setBuildPoint(&block_load_vertex_index_start);
-        // Check if the index is 32-bit.
-        spv::Id vertex_index_is_32bit = builder_->createBinOp(
-            spv::OpINotEqual, type_bool_,
-            builder_->createBinOp(
-                spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
-                builder_->makeUintConstant(static_cast<unsigned int>(
-                    kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit))),
-            const_uint_0_);
-        // Calculate the vertex index address in the shared memory.
-        id_vector_temp_.clear();
-        id_vector_temp_.push_back(
-            builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress));
-        spv::Id vertex_index_address = builder_->createBinOp(
-            spv::OpIAdd, type_uint_,
-            builder_->createLoad(
-                builder_->createAccessChain(spv::StorageClassUniform,
-                                            uniform_system_constants_,
-                                            id_vector_temp_),
-                spv::NoPrecision),
-            builder_->createBinOp(
-                spv::OpShiftLeftLogical, type_uint_, vertex_index,
-                builder_->createTriOp(spv::OpSelect, type_uint_,
-                                      vertex_index_is_32bit, const_uint_2,
-                                      builder_->makeUintConstant(1))));
-        // Load the 32 bits containing the whole vertex index or two 16-bit
-        // vertex indices.
-        // TODO(Triang3l): Bounds checking.
-        spv::Id loaded_vertex_index =
-            LoadUint32FromSharedMemory(builder_->createUnaryOp(
-                spv::OpBitcast, type_int_,
-                builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
-                                      vertex_index_address, const_uint_2)));
-        // Extract the 16-bit index from the loaded 32 bits if needed.
-        loaded_vertex_index = builder_->createTriOp(
-            spv::OpSelect, type_uint_, vertex_index_is_32bit,
-            loaded_vertex_index,
-            builder_->createTriOp(
-                spv::OpBitFieldUExtract, type_uint_, loaded_vertex_index,
-                builder_->createBinOp(
-                    spv::OpShiftLeftLogical, type_uint_,
-                    builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
-                                          vertex_index_address, const_uint_2),
-                    builder_->makeUintConstant(4 - 1)),
-                builder_->makeUintConstant(16)));
-        // Endian-swap the loaded index.
-        id_vector_temp_.clear();
-        id_vector_temp_.push_back(
-            builder_->makeIntConstant(kSystemConstantVertexIndexEndian));
-        loaded_vertex_index = EndianSwap32Uint(
-            loaded_vertex_index,
-            builder_->createLoad(
-                builder_->createAccessChain(spv::StorageClassUniform,
-                                            uniform_system_constants_,
-                                            id_vector_temp_),
-                spv::NoPrecision));
-        // Get the actual build point for phi.
-        spv::Block& block_load_vertex_index_end = *builder_->getBuildPoint();
-        builder_->createBranch(&block_load_vertex_index_merge);
-        // Select between the loaded index and the original index from Vulkan.
-        builder_->setBuildPoint(&block_load_vertex_index_merge);
+        SpirvBuilder::IfBuilder load_vertex_index_if(
+            load_vertex_index, spv::SelectionControlDontFlattenMask, *builder_);
+        spv::Id loaded_vertex_index;
         {
-          std::unique_ptr<spv::Instruction> loaded_vertex_index_phi_op =
-              std::make_unique<spv::Instruction>(builder_->getUniqueId(),
-                                                 type_uint_, spv::OpPhi);
-          loaded_vertex_index_phi_op->addIdOperand(loaded_vertex_index);
-          loaded_vertex_index_phi_op->addIdOperand(
-              block_load_vertex_index_end.getId());
-          loaded_vertex_index_phi_op->addIdOperand(vertex_index);
-          loaded_vertex_index_phi_op->addIdOperand(
-              block_load_vertex_index_pre.getId());
-          vertex_index = loaded_vertex_index_phi_op->getResultId();
-          builder_->getBuildPoint()->addInstruction(
-              std::move(loaded_vertex_index_phi_op));
+          // Check if the index is 32-bit.
+          spv::Id vertex_index_is_32bit = builder_->createBinOp(
+              spv::OpINotEqual, type_bool_,
+              builder_->createBinOp(
+                  spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
+                  builder_->makeUintConstant(static_cast<unsigned int>(
+                      kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit))),
+              const_uint_0_);
+          // Calculate the vertex index address in the shared memory.
+          id_vector_temp_.clear();
+          id_vector_temp_.push_back(
+              builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress));
+          spv::Id vertex_index_address = builder_->createBinOp(
+              spv::OpIAdd, type_uint_,
+              builder_->createLoad(
+                  builder_->createAccessChain(spv::StorageClassUniform,
+                                              uniform_system_constants_,
+                                              id_vector_temp_),
+                  spv::NoPrecision),
+              builder_->createBinOp(
+                  spv::OpShiftLeftLogical, type_uint_, vertex_index,
+                  builder_->createTriOp(spv::OpSelect, type_uint_,
+                                        vertex_index_is_32bit, const_uint_2,
+                                        builder_->makeUintConstant(1))));
+          // Load the 32 bits containing the whole vertex index or two 16-bit
+          // vertex indices.
+          // TODO(Triang3l): Bounds checking.
+          loaded_vertex_index =
+              LoadUint32FromSharedMemory(builder_->createUnaryOp(
+                  spv::OpBitcast, type_int_,
+                  builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
+                                        vertex_index_address, const_uint_2)));
+          // Extract the 16-bit index from the loaded 32 bits if needed.
+          loaded_vertex_index = builder_->createTriOp(
+              spv::OpSelect, type_uint_, vertex_index_is_32bit,
+              loaded_vertex_index,
+              builder_->createTriOp(
+                  spv::OpBitFieldUExtract, type_uint_, loaded_vertex_index,
+                  builder_->createBinOp(
+                      spv::OpShiftLeftLogical, type_uint_,
+                      builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
+                                            vertex_index_address, const_uint_2),
+                      builder_->makeUintConstant(4 - 1)),
+                  builder_->makeUintConstant(16)));
+          // Endian-swap the loaded index.
+          id_vector_temp_.clear();
+          id_vector_temp_.push_back(
+              builder_->makeIntConstant(kSystemConstantVertexIndexEndian));
+          loaded_vertex_index = EndianSwap32Uint(
+              loaded_vertex_index,
+              builder_->createLoad(
+                  builder_->createAccessChain(spv::StorageClassUniform,
+                                              uniform_system_constants_,
+                                              id_vector_temp_),
+                  spv::NoPrecision));
         }
+        load_vertex_index_if.makeEndIf();
+        // Select between the loaded index and the original index from Vulkan.
+        vertex_index = load_vertex_index_if.createMergePhi(loaded_vertex_index,
+                                                           vertex_index);
       } else {
         // TODO(Triang3l): Close line loop primitive.
         // Load the unswapped index as uint for swapping, or for indirect
@@ -1368,53 +1446,35 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
                   builder_->makeUintConstant(
                       static_cast<unsigned int>(kSysFlag_VertexIndexLoad))),
               const_uint_0_);
-          spv::Block& block_load_vertex_index_pre = *builder_->getBuildPoint();
-          spv::Block& block_load_vertex_index_start = builder_->makeNewBlock();
-          spv::Block& block_load_vertex_index_merge = builder_->makeNewBlock();
-          builder_->createSelectionMerge(&block_load_vertex_index_merge,
-                                         spv::SelectionControlDontFlattenMask);
-          builder_->createConditionalBranch(load_vertex_index,
-                                            &block_load_vertex_index_start,
-                                            &block_load_vertex_index_merge);
-          builder_->setBuildPoint(&block_load_vertex_index_start);
-          // Load the 32-bit index.
-          // TODO(Triang3l): Bounds checking.
-          id_vector_temp_.clear();
-          id_vector_temp_.push_back(
-              builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress));
-          spv::Id loaded_vertex_index =
-              LoadUint32FromSharedMemory(builder_->createUnaryOp(
-                  spv::OpBitcast, type_int_,
-                  builder_->createBinOp(
-                      spv::OpIAdd, type_uint_,
-                      builder_->createBinOp(
-                          spv::OpShiftRightLogical, type_uint_,
-                          builder_->createLoad(
-                              builder_->createAccessChain(
-                                  spv::StorageClassUniform,
-                                  uniform_system_constants_, id_vector_temp_),
-                              spv::NoPrecision),
-                          builder_->makeUintConstant(2)),
-                      vertex_index)));
-          // Get the actual build point for phi.
-          spv::Block& block_load_vertex_index_end = *builder_->getBuildPoint();
-          builder_->createBranch(&block_load_vertex_index_merge);
-          // Select between the loaded index and the original index from Vulkan.
-          builder_->setBuildPoint(&block_load_vertex_index_merge);
+          SpirvBuilder::IfBuilder load_vertex_index_if(
+              load_vertex_index, spv::SelectionControlDontFlattenMask,
+              *builder_);
+          spv::Id loaded_vertex_index;
           {
-            std::unique_ptr<spv::Instruction> loaded_vertex_index_phi_op =
-                std::make_unique<spv::Instruction>(builder_->getUniqueId(),
-                                                   type_uint_, spv::OpPhi);
-            loaded_vertex_index_phi_op->addIdOperand(loaded_vertex_index);
-            loaded_vertex_index_phi_op->addIdOperand(
-                block_load_vertex_index_end.getId());
-            loaded_vertex_index_phi_op->addIdOperand(vertex_index);
-            loaded_vertex_index_phi_op->addIdOperand(
-                block_load_vertex_index_pre.getId());
-            vertex_index = loaded_vertex_index_phi_op->getResultId();
-            builder_->getBuildPoint()->addInstruction(
-                std::move(loaded_vertex_index_phi_op));
+            // Load the 32-bit index.
+            // TODO(Triang3l): Bounds checking.
+            id_vector_temp_.clear();
+            id_vector_temp_.push_back(builder_->makeIntConstant(
+                kSystemConstantVertexIndexLoadAddress));
+            loaded_vertex_index =
+                LoadUint32FromSharedMemory(builder_->createUnaryOp(
+                    spv::OpBitcast, type_int_,
+                    builder_->createBinOp(
+                        spv::OpIAdd, type_uint_,
+                        builder_->createBinOp(
+                            spv::OpShiftRightLogical, type_uint_,
+                            builder_->createLoad(
+                                builder_->createAccessChain(
+                                    spv::StorageClassUniform,
+                                    uniform_system_constants_, id_vector_temp_),
+                                spv::NoPrecision),
+                            builder_->makeUintConstant(2)),
+                        vertex_index)));
           }
+          load_vertex_index_if.makeEndIf();
+          // Select between the loaded index and the original index from Vulkan.
+          vertex_index = load_vertex_index_if.createMergePhi(
+              loaded_vertex_index, vertex_index);
         }
         // Endian-swap the index.
         id_vector_temp_.clear();
@@ -1864,6 +1924,13 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
 }
 
 void SpirvShaderTranslator::StartFragmentShaderInMain() {
+  // TODO(Triang3l): Allow memory export with resolution scaling only for the
+  // center host pixel, with sample shading (for depth format conversion) only
+  // for the bottom-right sample (unlike in Direct3D, the sample mask input
+  // doesn't include covered samples of the primitive that correspond to other
+  // invocations, so use the sample that's the most friendly to the half-pixel
+  // offset).
+
   // Set up pixel killing from within the translated shader without affecting
   // the control flow (unlike with OpKill), similarly to how pixel killing works
   // on the Xenos, and also keeping a single critical section exit and return
@@ -2497,6 +2564,26 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
             var_main_fsi_color_written_);
       }
     } break;
+    case InstructionStorageTarget::kExportAddress: {
+      // spv::NoResult if memory export usage is unsupported or invalid.
+      target_pointer = var_main_memexport_address_;
+    } break;
+    case InstructionStorageTarget::kExportData: {
+      // spv::NoResult if memory export usage is unsupported or invalid.
+      target_pointer = var_main_memexport_data_[result.storage_index];
+      if (target_pointer != spv::NoResult) {
+        // Mark that the eM# has been written to and needs to be exported.
+        assert_true(var_main_memexport_data_written_ != spv::NoResult);
+        builder_->createStore(
+            builder_->createBinOp(
+                spv::OpBitwiseOr, type_uint_,
+                builder_->createLoad(var_main_memexport_data_written_,
+                                     spv::NoPrecision),
+                builder_->makeUintConstant(uint32_t(1)
+                                           << result.storage_index)),
+            var_main_memexport_data_written_);
+      }
+    } break;
     default:
       // TODO(Triang3l): All storage targets.
       break;
@@ -2808,40 +2895,25 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) {
           static_cast<unsigned int>(xenos::Endian::k8in32)));
   spv::Id is_8in16_or_8in32 =
       builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in16, is_8in32);
-  spv::Block& block_pre_8in16 = *builder_->getBuildPoint();
-  assert_false(block_pre_8in16.isTerminated());
-  spv::Block& block_8in16 = builder_->makeNewBlock();
-  spv::Block& block_8in16_merge = builder_->makeNewBlock();
-  builder_->createSelectionMerge(&block_8in16_merge,
-                                 spv::SelectionControlMaskNone);
-  builder_->createConditionalBranch(is_8in16_or_8in32, &block_8in16,
-                                    &block_8in16_merge);
-  builder_->setBuildPoint(&block_8in16);
-  spv::Id swapped_8in16 = builder_->createBinOp(
-      spv::OpBitwiseOr, type,
-      builder_->createBinOp(
-          spv::OpBitwiseAnd, type,
-          builder_->createBinOp(spv::OpShiftRightLogical, type, value,
-                                const_uint_8_typed),
-          const_uint_00ff00ff_typed),
-      builder_->createBinOp(
-          spv::OpShiftLeftLogical, type,
-          builder_->createBinOp(spv::OpBitwiseAnd, type, value,
-                                const_uint_00ff00ff_typed),
-          const_uint_8_typed));
-  builder_->createBranch(&block_8in16_merge);
-  builder_->setBuildPoint(&block_8in16_merge);
+  SpirvBuilder::IfBuilder if_8in16(is_8in16_or_8in32,
+                                   spv::SelectionControlMaskNone, *builder_);
+  spv::Id swapped_8in16;
   {
-    std::unique_ptr<spv::Instruction> phi_op =
-        std::make_unique<spv::Instruction>(builder_->getUniqueId(), type,
-                                           spv::OpPhi);
-    phi_op->addIdOperand(swapped_8in16);
-    phi_op->addIdOperand(block_8in16.getId());
-    phi_op->addIdOperand(value);
-    phi_op->addIdOperand(block_pre_8in16.getId());
-    value = phi_op->getResultId();
-    builder_->getBuildPoint()->addInstruction(std::move(phi_op));
+    swapped_8in16 = builder_->createBinOp(
+        spv::OpBitwiseOr, type,
+        builder_->createBinOp(
+            spv::OpBitwiseAnd, type,
+            builder_->createBinOp(spv::OpShiftRightLogical, type, value,
+                                  const_uint_8_typed),
+            const_uint_00ff00ff_typed),
+        builder_->createBinOp(
+            spv::OpShiftLeftLogical, type,
+            builder_->createBinOp(spv::OpBitwiseAnd, type, value,
+                                  const_uint_00ff00ff_typed),
+            const_uint_8_typed));
   }
+  if_8in16.makeEndIf();
+  value = if_8in16.createMergePhi(swapped_8in16, value);
 
   // 16-in-32 or another half of 8-in-32 (doing 16-in-32 swap).
   spv::Id is_16in32 = builder_->createBinOp(
@@ -2850,46 +2922,75 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) {
           static_cast<unsigned int>(xenos::Endian::k16in32)));
   spv::Id is_8in32_or_16in32 =
       builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in32, is_16in32);
-  spv::Block& block_pre_16in32 = *builder_->getBuildPoint();
-  spv::Block& block_16in32 = builder_->makeNewBlock();
-  spv::Block& block_16in32_merge = builder_->makeNewBlock();
-  builder_->createSelectionMerge(&block_16in32_merge,
-                                 spv::SelectionControlMaskNone);
-  builder_->createConditionalBranch(is_8in32_or_16in32, &block_16in32,
-                                    &block_16in32_merge);
-  builder_->setBuildPoint(&block_16in32);
-  spv::Id swapped_16in32 = builder_->createQuadOp(
-      spv::OpBitFieldInsert, type,
-      builder_->createBinOp(spv::OpShiftRightLogical, type, value,
-                            const_uint_16_typed),
-      value, builder_->makeIntConstant(16), builder_->makeIntConstant(16));
-  builder_->createBranch(&block_16in32_merge);
-  builder_->setBuildPoint(&block_16in32_merge);
+  SpirvBuilder::IfBuilder if_16in32(is_8in32_or_16in32,
+                                    spv::SelectionControlMaskNone, *builder_);
+  spv::Id swapped_16in32;
   {
-    std::unique_ptr<spv::Instruction> phi_op =
-        std::make_unique<spv::Instruction>(builder_->getUniqueId(), type,
-                                           spv::OpPhi);
-    phi_op->addIdOperand(swapped_16in32);
-    phi_op->addIdOperand(block_16in32.getId());
-    phi_op->addIdOperand(value);
-    phi_op->addIdOperand(block_pre_16in32.getId());
-    value = phi_op->getResultId();
-    builder_->getBuildPoint()->addInstruction(std::move(phi_op));
+    swapped_16in32 = builder_->createQuadOp(
+        spv::OpBitFieldInsert, type,
+        builder_->createBinOp(spv::OpShiftRightLogical, type, value,
+                              const_uint_16_typed),
+        value, builder_->makeIntConstant(16), builder_->makeIntConstant(16));
   }
+  if_16in32.makeEndIf();
+  value = if_16in32.createMergePhi(swapped_16in32, value);
 
   return value;
 }
 
+spv::Id SpirvShaderTranslator::EndianSwap128Uint4(spv::Id value,
+                                                  spv::Id endian) {
+  // Change 8-in-64 and 8-in-128 to 8-in-32, and then swap within 32 bits.
+
+  spv::Id is_8in64 = builder_->createBinOp(
+      spv::OpIEqual, type_bool_, endian,
+      builder_->makeUintConstant(
+          static_cast<unsigned int>(xenos::Endian128::k8in64)));
+  uint_vector_temp_.clear();
+  uint_vector_temp_.push_back(1);
+  uint_vector_temp_.push_back(0);
+  uint_vector_temp_.push_back(3);
+  uint_vector_temp_.push_back(2);
+  value = builder_->createTriOp(
+      spv::OpSelect, type_uint4_, is_8in64,
+      builder_->createRvalueSwizzle(spv::NoPrecision, type_uint4_, value,
+                                    uint_vector_temp_),
+      value);
+
+  spv::Id is_8in128 = builder_->createBinOp(
+      spv::OpIEqual, type_bool_, endian,
+      builder_->makeUintConstant(
+          static_cast<unsigned int>(xenos::Endian128::k8in128)));
+  uint_vector_temp_.clear();
+  uint_vector_temp_.push_back(3);
+  uint_vector_temp_.push_back(2);
+  uint_vector_temp_.push_back(1);
+  uint_vector_temp_.push_back(0);
+  value = builder_->createTriOp(
+      spv::OpSelect, type_uint4_, is_8in128,
+      builder_->createRvalueSwizzle(spv::NoPrecision, type_uint4_, value,
+                                    uint_vector_temp_),
+      value);
+
+  endian = builder_->createTriOp(
+      spv::OpSelect, type_uint_,
+      builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in64, is_8in128),
+      builder_->makeUintConstant(
+          static_cast<unsigned int>(xenos::Endian128::k8in32)),
+      endian);
+
+  return EndianSwap32Uint(value, endian);
+}
+
 spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory(
     spv::Id address_dwords_int) {
-  spv::Block& head_block = *builder_->getBuildPoint();
-  assert_false(head_block.isTerminated());
-
   spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3
                                         ? spv::StorageClassStorageBuffer
                                         : spv::StorageClassUniform;
-  uint32_t buffer_count_log2 = GetSharedMemoryStorageBufferCountLog2();
-  if (!buffer_count_log2) {
+
+  uint32_t binding_count_log2 = GetSharedMemoryStorageBufferCountLog2();
+
+  if (!binding_count_log2) {
     // Single binding - load directly.
     id_vector_temp_.clear();
     // The only SSBO struct member.
@@ -2903,8 +3004,10 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory(
 
   // The memory is split into multiple bindings - check which binding to load
   // from. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the
-  // array with the variable itself because it needs VK_EXT_descriptor_indexing.
-  uint32_t binding_address_bits = (29 - 2) - buffer_count_log2;
+  // array with the variable itself because it needs non-uniform storage buffer
+  // indexing.
+
+  uint32_t binding_address_bits = (29 - 2) - binding_count_log2;
   spv::Id binding_index = builder_->createBinOp(
       spv::OpShiftRightLogical, type_uint_,
       builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int),
@@ -2913,51 +3016,119 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory(
       spv::OpBitwiseAnd, type_int_, address_dwords_int,
       builder_->makeIntConstant(
           int((uint32_t(1) << binding_address_bits) - 1)));
-  uint32_t buffer_count = 1 << buffer_count_log2;
-  spv::Block* switch_case_blocks[512 / 128];
-  for (uint32_t i = 0; i < buffer_count; ++i) {
-    switch_case_blocks[i] = &builder_->makeNewBlock();
-  }
-  spv::Block& switch_merge_block = builder_->makeNewBlock();
-  spv::Id value_phi_result = builder_->getUniqueId();
-  std::unique_ptr<spv::Instruction> value_phi_op =
-      std::make_unique<spv::Instruction>(value_phi_result, type_uint_,
-                                         spv::OpPhi);
-  builder_->createSelectionMerge(&switch_merge_block,
-                                 spv::SelectionControlDontFlattenMask);
-  {
-    std::unique_ptr<spv::Instruction> switch_op =
-        std::make_unique<spv::Instruction>(spv::OpSwitch);
-    switch_op->addIdOperand(binding_index);
-    // Highest binding index is the default case.
-    switch_op->addIdOperand(switch_case_blocks[buffer_count - 1]->getId());
-    switch_case_blocks[buffer_count - 1]->addPredecessor(&head_block);
-    for (uint32_t i = 0; i < buffer_count - 1; ++i) {
-      switch_op->addImmediateOperand(int(i));
-      switch_op->addIdOperand(switch_case_blocks[i]->getId());
-      switch_case_blocks[i]->addPredecessor(&head_block);
-    }
-    builder_->getBuildPoint()->addInstruction(std::move(switch_op));
-  }
-  for (uint32_t i = 0; i < buffer_count; ++i) {
-    builder_->setBuildPoint(switch_case_blocks[i]);
-    id_vector_temp_.clear();
-    id_vector_temp_.push_back(builder_->makeIntConstant(int(i)));
-    // The only SSBO struct member.
-    id_vector_temp_.push_back(const_int_0_);
-    id_vector_temp_.push_back(binding_address);
+
+  auto value_phi_op = std::make_unique<spv::Instruction>(
+      builder_->getUniqueId(), type_uint_, spv::OpPhi);
+  // Zero if out of bounds.
+  value_phi_op->addIdOperand(const_uint_0_);
+  value_phi_op->addIdOperand(builder_->getBuildPoint()->getId());
+
+  SpirvBuilder::SwitchBuilder binding_switch(
+      binding_index, spv::SelectionControlDontFlattenMask, *builder_);
+  uint32_t binding_count = uint32_t(1) << binding_count_log2;
+
+  id_vector_temp_.clear();
+  id_vector_temp_.push_back(spv::NoResult);
+  // The only SSBO struct member.
+  id_vector_temp_.push_back(const_int_0_);
+  id_vector_temp_.push_back(binding_address);
+
+  for (uint32_t i = 0; i < binding_count; ++i) {
+    binding_switch.makeBeginCase(i);
+    id_vector_temp_[0] = builder_->makeIntConstant(int(i));
     value_phi_op->addIdOperand(builder_->createLoad(
         builder_->createAccessChain(storage_class, buffers_shared_memory_,
                                     id_vector_temp_),
         spv::NoPrecision));
-    value_phi_op->addIdOperand(switch_case_blocks[i]->getId());
-    builder_->createBranch(&switch_merge_block);
+    value_phi_op->addIdOperand(builder_->getBuildPoint()->getId());
   }
-  builder_->setBuildPoint(&switch_merge_block);
+
+  binding_switch.makeEndSwitch();
+
+  spv::Id value_phi_result = value_phi_op->getResultId();
   builder_->getBuildPoint()->addInstruction(std::move(value_phi_op));
   return value_phi_result;
 }
 
+void SpirvShaderTranslator::StoreUint32ToSharedMemory(
+    spv::Id value, spv::Id address_dwords_int, spv::Id replace_mask) {
+  spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3
+                                        ? spv::StorageClassStorageBuffer
+                                        : spv::StorageClassUniform;
+
+  spv::Id keep_mask = spv::NoResult;
+  if (replace_mask != spv::NoResult) {
+    keep_mask = builder_->createUnaryOp(spv::OpNot, type_uint_, replace_mask);
+    value = builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, value,
+                                  replace_mask);
+  }
+
+  auto store = [&](spv::Id pointer) {
+    if (replace_mask != spv::NoResult) {
+      // Don't touch the other bits in the buffer, just modify the needed bits
+      // in the most up to date uint32 at the address.
+      spv::Id const_scope_device = builder_->makeUintConstant(
+          static_cast<unsigned int>(spv::ScopeDevice));
+      spv::Id const_semantics_relaxed = const_uint_0_;
+      builder_->createQuadOp(spv::OpAtomicAnd, type_uint_, pointer,
+                             const_scope_device, const_semantics_relaxed,
+                             keep_mask);
+      builder_->createQuadOp(spv::OpAtomicOr, type_uint_, pointer,
+                             const_scope_device, const_semantics_relaxed,
+                             value);
+    } else {
+      builder_->createStore(value, pointer);
+    }
+  };
+
+  uint32_t binding_count_log2 = GetSharedMemoryStorageBufferCountLog2();
+
+  if (!binding_count_log2) {
+    // Single binding - store directly.
+    id_vector_temp_.clear();
+    // The only SSBO struct member.
+    id_vector_temp_.push_back(const_int_0_);
+    id_vector_temp_.push_back(address_dwords_int);
+    store(builder_->createAccessChain(storage_class, buffers_shared_memory_,
+                                      id_vector_temp_));
+    return;
+  }
+
+  // The memory is split into multiple bindings - check which binding to store
+  // to. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the
+  // array with the variable itself because it needs non-uniform storage buffer
+  // indexing.
+
+  uint32_t binding_address_bits = (29 - 2) - binding_count_log2;
+  spv::Id binding_index = builder_->createBinOp(
+      spv::OpShiftRightLogical, type_uint_,
+      builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int),
+      builder_->makeUintConstant(binding_address_bits));
+  spv::Id binding_address = builder_->createBinOp(
+      spv::OpBitwiseAnd, type_int_, address_dwords_int,
+      builder_->makeIntConstant(
+          int((uint32_t(1) << binding_address_bits) - 1)));
+
+  SpirvBuilder::SwitchBuilder binding_switch(
+      binding_index, spv::SelectionControlDontFlattenMask, *builder_);
+  uint32_t binding_count = uint32_t(1) << binding_count_log2;
+
+  id_vector_temp_.clear();
+  id_vector_temp_.push_back(spv::NoResult);
+  // The only SSBO struct member.
+  id_vector_temp_.push_back(const_int_0_);
+  id_vector_temp_.push_back(binding_address);
+
+  for (uint32_t i = 0; i < binding_count; ++i) {
+    binding_switch.makeBeginCase(i);
+    id_vector_temp_[0] = builder_->makeIntConstant(int(i));
+    store(builder_->createAccessChain(storage_class, buffers_shared_memory_,
+                                      id_vector_temp_));
+  }
+
+  binding_switch.makeEndSwitch();
+}
+
 spv::Id SpirvShaderTranslator::PWLGammaToLinear(spv::Id gamma,
                                                 bool gamma_pre_saturated) {
   spv::Id value_type = builder_->getTypeId(gamma);
diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h
index 0ed368ae4..aefb00bf6 100644
--- a/src/xenia/gpu/spirv_shader_translator.h
+++ b/src/xenia/gpu/spirv_shader_translator.h
@@ -323,17 +323,28 @@ class SpirvShaderTranslator : public ShaderTranslator {
     explicit Features(
         const ui::vulkan::VulkanProvider::DeviceInfo& device_info);
     explicit Features(bool all = false);
+
     unsigned int spirv_version;
+
     uint32_t max_storage_buffer_range;
+
+    bool full_draw_index_uint32;
+
+    bool vertex_pipeline_stores_and_atomics;
+    bool fragment_stores_and_atomics;
+
     bool clip_distance;
     bool cull_distance;
-    bool demote_to_helper_invocation;
-    bool fragment_shader_sample_interlock;
-    bool full_draw_index_uint32;
+
     bool image_view_format_swizzle;
+
     bool signed_zero_inf_nan_preserve_float32;
     bool denorm_flush_to_zero_float32;
     bool rounding_mode_rte_float32;
+
+    bool fragment_shader_sample_interlock;
+
+    bool demote_to_helper_invocation;
   };
 
   SpirvShaderTranslator(const Features& features,
@@ -424,6 +435,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
   void ProcessLoopEndInstruction(
       const ParsedLoopEndInstruction& instr) override;
   void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
+  void ProcessAllocInstruction(const ParsedAllocInstruction& instr,
+                               uint8_t export_eM) override;
 
   void ProcessVertexFetchInstruction(
       const ParsedVertexFetchInstruction& instr) override;
@@ -470,6 +483,11 @@ class SpirvShaderTranslator : public ShaderTranslator {
            Shader::IsHostVertexShaderTypeDomain(
                GetSpirvShaderModification().vertex.host_vertex_shader_type);
   }
+  bool IsSpirvComputeShader() const {
+    return is_vertex_shader() &&
+           GetSpirvShaderModification().vertex.host_vertex_shader_type ==
+               Shader::HostVertexShaderType::kMemExportCompute;
+  }
 
   bool IsExecutionModeEarlyFragmentTests() const {
     return is_pixel_shader() &&
@@ -567,24 +585,48 @@ class SpirvShaderTranslator : public ShaderTranslator {
   spv::Id ZeroIfAnyOperandIsZero(spv::Id value, spv::Id operand_0_abs,
                                  spv::Id operand_1_abs);
   // Conditionally discard the current fragment. Changes the build point.
-  void KillPixel(spv::Id condition);
+  void KillPixel(spv::Id condition,
+                 uint8_t memexport_eM_potentially_written_before);
   // Return type is a xe::bit_count(result.GetUsedResultComponents())-component
   // float vector or a single float, depending on whether it's a reduction
   // instruction (check getTypeId of the result), or returns spv::NoResult if
   // nothing to store.
-  spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr,
-                                    bool& predicate_written);
+  spv::Id ProcessVectorAluOperation(
+      const ParsedAluInstruction& instr,
+      uint8_t memexport_eM_potentially_written_before, bool& predicate_written);
   // Returns a float value to write to the previous scalar register and to the
   // destination. If the return value is ps itself (in the retain_prev case),
   // returns spv::NoResult (handled as a special case, so if it's retain_prev,
   // but don't need to write to anywhere, no OpLoad(ps) will be done).
-  spv::Id ProcessScalarAluOperation(const ParsedAluInstruction& instr,
-                                    bool& predicate_written);
+  spv::Id ProcessScalarAluOperation(
+      const ParsedAluInstruction& instr,
+      uint8_t memexport_eM_potentially_written_before, bool& predicate_written);
 
   // Perform endian swap of a uint scalar or vector.
   spv::Id EndianSwap32Uint(spv::Id value, spv::Id endian);
+  // Perform endian swap of a uint4 vector.
+  spv::Id EndianSwap128Uint4(spv::Id value, spv::Id endian);
 
   spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int);
+  // If `replace_mask` is provided, the bits specified in the mask will be
+  // replaced with those from the value via OpAtomicAnd/Or.
+  // Bits of `value` not in `replace_mask` will be ignored.
+  void StoreUint32ToSharedMemory(spv::Id value, spv::Id address_dwords_int,
+                                 spv::Id replace_mask = spv::NoResult);
+
+  bool IsMemoryExportSupported() const {
+    if (is_pixel_shader()) {
+      return features_.fragment_stores_and_atomics;
+    }
+    return features_.vertex_pipeline_stores_and_atomics ||
+           IsSpirvComputeShader();
+  }
+
+  bool IsMemoryExportUsed() const {
+    return current_shader().memexport_eM_written() && IsMemoryExportSupported();
+  }
+
+  void ExportToMemory(uint8_t export_eM);
 
   // The source may be a floating-point scalar or a vector.
   spv::Id PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated);
@@ -605,7 +647,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
   void SampleTexture(spv::Builder::TextureParameters& texture_parameters,
                      spv::ImageOperandsMask image_operands_mask,
                      spv::Id image_unsigned, spv::Id image_signed,
-                     spv::Id sampler, spv::Id is_all_signed,
+                     spv::Id sampler, spv::Id is_any_unsigned,
                      spv::Id is_any_signed, spv::Id& result_unsigned_out,
                      spv::Id& result_signed_out,
                      spv::Id lerp_factor = spv::NoResult,
@@ -872,6 +914,21 @@ class SpirvShaderTranslator : public ShaderTranslator {
   spv::Id var_main_tfetch_gradients_v_;
   // float4[register_count()].
   spv::Id var_main_registers_;
+  // Memory export variables are created only when needed.
+  // float4.
+  spv::Id var_main_memexport_address_;
+  // Each is float4.
+  spv::Id var_main_memexport_data_[ucode::kMaxMemExportElementCount];
+  // Bit field of which eM# elements have been written so far by the invocation
+  // since the last memory write - uint.
+  spv::Id var_main_memexport_data_written_;
+  // If memory export is disabled in certain invocations or (if emulating some
+  // primitive types without a geometry shader) at specific guest vertex loop
+  // iterations because the translated shader is executed multiple times for the
+  // same guest vertex or pixel, this contains whether memory export is allowed
+  // in the current execution of the translated code.
+  // bool.
+  spv::Id main_memexport_allowed_;
   // VS only - float3 (special exports).
   spv::Id var_main_point_size_edge_flag_kill_vertex_;
   // PS, only when needed - bool.
diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc
index 05e41d5ab..1e7580e34 100644
--- a/src/xenia/gpu/spirv_shader_translator_alu.cc
+++ b/src/xenia/gpu/spirv_shader_translator_alu.cc
@@ -39,31 +39,23 @@ spv::Id SpirvShaderTranslator::ZeroIfAnyOperandIsZero(spv::Id value,
       const_float_vectors_0_[num_components - 1], value);
 }
 
-void SpirvShaderTranslator::KillPixel(spv::Id condition) {
-  // Same calls as in spv::Builder::If.
-  spv::Function& function = builder_->getBuildPoint()->getParent();
-  spv::Block* kill_block = new spv::Block(builder_->getUniqueId(), function);
-  spv::Block* merge_block = new spv::Block(builder_->getUniqueId(), function);
-  spv::Block& header_block = *builder_->getBuildPoint();
-
-  function.addBlock(kill_block);
-  builder_->setBuildPoint(kill_block);
-  // Kill without influencing the control flow in the translated shader.
-  if (var_main_kill_pixel_ != spv::NoResult) {
-    builder_->createStore(builder_->makeBoolConstant(true),
-                          var_main_kill_pixel_);
+void SpirvShaderTranslator::KillPixel(
+    spv::Id condition, uint8_t memexport_eM_potentially_written_before) {
+  SpirvBuilder::IfBuilder kill_if(condition, spv::SelectionControlMaskNone,
+                                  *builder_);
+  {
+    // Perform outstanding memory exports before the invocation becomes inactive
+    // and storage writes are disabled.
+    ExportToMemory(memexport_eM_potentially_written_before);
+    if (var_main_kill_pixel_ != spv::NoResult) {
+      builder_->createStore(builder_->makeBoolConstant(true),
+                            var_main_kill_pixel_);
+    }
+    if (features_.demote_to_helper_invocation) {
+      builder_->createNoResultOp(spv::OpDemoteToHelperInvocationEXT);
+    }
   }
-  if (features_.demote_to_helper_invocation) {
-    builder_->createNoResultOp(spv::OpDemoteToHelperInvocationEXT);
-  }
-  builder_->createBranch(merge_block);
-
-  builder_->setBuildPoint(&header_block);
-  builder_->createSelectionMerge(merge_block, spv::SelectionControlMaskNone);
-  builder_->createConditionalBranch(condition, kill_block, merge_block);
-
-  function.addBlock(merge_block);
-  builder_->setBuildPoint(merge_block);
+  kill_if.makeEndIf();
 }
 
 void SpirvShaderTranslator::ProcessAluInstruction(
@@ -89,12 +81,12 @@ void SpirvShaderTranslator::ProcessAluInstruction(
   // Whether the instruction has changed the predicate, and it needs to be
   // checked again later.
   bool predicate_written_vector = false;
-  spv::Id vector_result =
-      ProcessVectorAluOperation(instr, predicate_written_vector);
+  spv::Id vector_result = ProcessVectorAluOperation(
+      instr, memexport_eM_potentially_written_before, predicate_written_vector);
 
   bool predicate_written_scalar = false;
-  spv::Id scalar_result =
-      ProcessScalarAluOperation(instr, predicate_written_scalar);
+  spv::Id scalar_result = ProcessScalarAluOperation(
+      instr, memexport_eM_potentially_written_before, predicate_written_scalar);
   if (scalar_result != spv::NoResult) {
     EnsureBuildPointAvailable();
     builder_->createStore(scalar_result, var_main_previous_scalar_);
@@ -118,7 +110,8 @@ void SpirvShaderTranslator::ProcessAluInstruction(
 }
 
 spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
-    const ParsedAluInstruction& instr, bool& predicate_written) {
+    const ParsedAluInstruction& instr,
+    uint8_t memexport_eM_potentially_written_before, bool& predicate_written) {
   predicate_written = false;
 
   uint32_t used_result_components =
@@ -564,7 +557,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
       spv::Id ma_z_result[4] = {}, ma_yx_result[4] = {};
 
       // Check if the major axis is Z (abs(z) >= abs(x) && abs(z) >= abs(y)).
-      spv::Builder::If ma_z_if(
+      SpirvBuilder::IfBuilder ma_z_if(
           builder_->createBinOp(
               spv::OpLogicalAnd, type_bool_,
               builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_,
@@ -596,14 +589,13 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
           }
         }
       }
-      spv::Block& ma_z_end_block = *builder_->getBuildPoint();
       ma_z_if.makeBeginElse();
       {
         spv::Id ma_y_result[4] = {}, ma_x_result[4] = {};
 
         // The major axis is not Z - create an inner conditional to check if the
         // major axis is Y (abs(y) >= abs(x)).
-        spv::Builder::If ma_y_if(
+        SpirvBuilder::IfBuilder ma_y_if(
             builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_,
                                   operand_abs[1], operand_abs[0]),
             spv::SelectionControlMaskNone, *builder_);
@@ -629,7 +621,6 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
             }
           }
         }
-        spv::Block& ma_y_end_block = *builder_->getBuildPoint();
         ma_y_if.makeBeginElse();
         {
           // The major axis is X.
@@ -654,7 +645,6 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
             }
           }
         }
-        spv::Block& ma_x_end_block = *builder_->getBuildPoint();
         ma_y_if.makeEndIf();
 
         // The major axis is Y or X - choose the options of the result from Y
@@ -663,18 +653,10 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
           if (!(used_result_components & (1 << i))) {
             continue;
           }
-          std::unique_ptr<spv::Instruction> phi_op =
-              std::make_unique<spv::Instruction>(builder_->getUniqueId(),
-                                                 type_float_, spv::OpPhi);
-          phi_op->addIdOperand(ma_y_result[i]);
-          phi_op->addIdOperand(ma_y_end_block.getId());
-          phi_op->addIdOperand(ma_x_result[i]);
-          phi_op->addIdOperand(ma_x_end_block.getId());
-          ma_yx_result[i] = phi_op->getResultId();
-          builder_->getBuildPoint()->addInstruction(std::move(phi_op));
+          ma_yx_result[i] =
+              ma_y_if.createMergePhi(ma_y_result[i], ma_x_result[i]);
         }
       }
-      spv::Block& ma_yx_end_block = *builder_->getBuildPoint();
       ma_z_if.makeEndIf();
 
       // Choose the result options from Z and YX cases.
@@ -683,15 +665,8 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
         if (!(used_result_components & (1 << i))) {
           continue;
         }
-        std::unique_ptr<spv::Instruction> phi_op =
-            std::make_unique<spv::Instruction>(builder_->getUniqueId(),
-                                               type_float_, spv::OpPhi);
-        phi_op->addIdOperand(ma_z_result[i]);
-        phi_op->addIdOperand(ma_z_end_block.getId());
-        phi_op->addIdOperand(ma_yx_result[i]);
-        phi_op->addIdOperand(ma_yx_end_block.getId());
-        id_vector_temp_.push_back(phi_op->getResultId());
-        builder_->getBuildPoint()->addInstruction(std::move(phi_op));
+        id_vector_temp_.push_back(
+            ma_z_if.createMergePhi(ma_z_result[i], ma_yx_result[i]));
       }
       assert_true(id_vector_temp_.size() == used_result_component_count);
       if (used_result_components & 0b0100) {
@@ -799,14 +774,16 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
     case ucode::AluVectorOpcode::kKillGt:
     case ucode::AluVectorOpcode::kKillGe:
     case ucode::AluVectorOpcode::kKillNe: {
-      KillPixel(builder_->createUnaryOp(
-          spv::OpAny, type_bool_,
-          builder_->createBinOp(
-              spv::Op(kOps[size_t(instr.vector_opcode)]), type_bool4_,
-              GetOperandComponents(operand_storage[0], instr.vector_operands[0],
-                                   0b1111),
-              GetOperandComponents(operand_storage[1], instr.vector_operands[1],
-                                   0b1111))));
+      KillPixel(
+          builder_->createUnaryOp(
+              spv::OpAny, type_bool_,
+              builder_->createBinOp(
+                  spv::Op(kOps[size_t(instr.vector_opcode)]), type_bool4_,
+                  GetOperandComponents(operand_storage[0],
+                                       instr.vector_operands[0], 0b1111),
+                  GetOperandComponents(operand_storage[1],
+                                       instr.vector_operands[1], 0b1111))),
+          memexport_eM_potentially_written_before);
       return const_float_0_;
     }
 
@@ -892,7 +869,8 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
 }
 
 spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
-    const ParsedAluInstruction& instr, bool& predicate_written) {
+    const ParsedAluInstruction& instr,
+    uint8_t memexport_eM_potentially_written_before, bool& predicate_written) {
   predicate_written = false;
 
   spv::Id operand_storage[2] = {};
@@ -1044,10 +1022,9 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
           spv::OpLogicalAnd, type_bool_, condition,
           builder_->createBinOp(spv::OpFOrdGreaterThan, type_bool_, b,
                                 const_float_0_));
-      spv::Block& pre_multiply_if_block = *builder_->getBuildPoint();
+      SpirvBuilder::IfBuilder multiply_if(
+          condition, spv::SelectionControlMaskNone, *builder_);
       spv::Id product;
-      spv::Builder::If multiply_if(condition, spv::SelectionControlMaskNone,
-                                   *builder_);
       {
         // Multiplication case.
         spv::Id a = instr.scalar_operands[0].GetComponent(0) !=
@@ -1061,21 +1038,9 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
         product = ZeroIfAnyOperandIsZero(
             product, GetAbsoluteOperand(a, instr.scalar_operands[0]), ps_abs);
       }
-      spv::Block& multiply_end_block = *builder_->getBuildPoint();
       multiply_if.makeEndIf();
       // Merge - choose between the product and -FLT_MAX.
-      {
-        std::unique_ptr<spv::Instruction> phi_op =
-            std::make_unique<spv::Instruction>(builder_->getUniqueId(),
-                                               type_float_, spv::OpPhi);
-        phi_op->addIdOperand(product);
-        phi_op->addIdOperand(multiply_end_block.getId());
-        phi_op->addIdOperand(const_float_max_neg);
-        phi_op->addIdOperand(pre_multiply_if_block.getId());
-        spv::Id phi_result = phi_op->getResultId();
-        builder_->getBuildPoint()->addInstruction(std::move(phi_op));
-        return phi_result;
-      }
+      return multiply_if.createMergePhi(product, const_float_max_neg);
     }
 
     case ucode::AluScalarOpcode::kMaxs:
@@ -1300,12 +1265,13 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
     case ucode::AluScalarOpcode::kKillsNe:
     case ucode::AluScalarOpcode::kKillsOne: {
       KillPixel(builder_->createBinOp(
-          spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_,
-          GetOperandComponents(operand_storage[0], instr.scalar_operands[0],
-                               0b0001),
-          instr.scalar_opcode == ucode::AluScalarOpcode::kKillsOne
-              ? const_float_1_
-              : const_float_0_));
+                    spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_,
+                    GetOperandComponents(operand_storage[0],
+                                         instr.scalar_operands[0], 0b0001),
+                    instr.scalar_opcode == ucode::AluScalarOpcode::kKillsOne
+                        ? const_float_1_
+                        : const_float_0_),
+                memexport_eM_potentially_written_before);
       return const_float_0_;
     }
 
diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc
index 265082ba1..8f5a74690 100644
--- a/src/xenia/gpu/spirv_shader_translator_fetch.cc
+++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc
@@ -1145,31 +1145,18 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
             z_coordinate_ref = builder_->createNoContractionBinOp(
                 spv::OpFAdd, type_float_, z_coordinate_ref, z_offset);
           }
-          spv::Block& block_dimension_head = *builder_->getBuildPoint();
-          spv::Block& block_dimension_merge = builder_->makeNewBlock();
-          spv::Block& block_dimension_3d = builder_->makeNewBlock();
-          builder_->createSelectionMerge(&block_dimension_merge,
-                                         spv::SelectionControlDontFlattenMask);
           assert_true(data_is_3d != spv::NoResult);
-          builder_->createConditionalBranch(data_is_3d, &block_dimension_3d,
-                                            &block_dimension_merge);
-          builder_->setBuildPoint(&block_dimension_3d);
-          assert_true(z_size != spv::NoResult);
-          spv::Id z_3d = builder_->createNoContractionBinOp(
-              spv::OpFDiv, type_float_, z_coordinate_ref, z_size);
-          builder_->createBranch(&block_dimension_merge);
-          builder_->setBuildPoint(&block_dimension_merge);
+          SpirvBuilder::IfBuilder if_data_is_3d(
+              data_is_3d, spv::SelectionControlDontFlattenMask, *builder_);
+          spv::Id z_3d;
           {
-            std::unique_ptr<spv::Instruction> z_phi_op =
-                std::make_unique<spv::Instruction>(builder_->getUniqueId(),
-                                                   type_float_, spv::OpPhi);
-            z_phi_op->addIdOperand(z_3d);
-            z_phi_op->addIdOperand(block_dimension_3d.getId());
-            z_phi_op->addIdOperand(z_coordinate_ref);
-            z_phi_op->addIdOperand(block_dimension_head.getId());
-            z_coordinate_ref = z_phi_op->getResultId();
-            builder_->getBuildPoint()->addInstruction(std::move(z_phi_op));
+            assert_true(z_size != spv::NoResult);
+            z_3d = builder_->createNoContractionBinOp(spv::OpFDiv, type_float_,
+                                                      z_coordinate_ref, z_size);
           }
+          if_data_is_3d.makeEndIf();
+          z_coordinate_ref =
+              if_data_is_3d.createMergePhi(z_3d, z_coordinate_ref);
         } else {
           // Denormalize the Z coordinate for a stacked texture, and apply the
           // offset.
@@ -1394,63 +1381,39 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
         // OpSampledImage must be in the same block as where its result is used.
         if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) {
           // Check if the texture is 3D or stacked.
-          spv::Block& block_dimension_head = *builder_->getBuildPoint();
-          spv::Block& block_dimension_3d_start = builder_->makeNewBlock();
-          spv::Block& block_dimension_stacked_start = builder_->makeNewBlock();
-          spv::Block& block_dimension_merge = builder_->makeNewBlock();
-          builder_->createSelectionMerge(&block_dimension_merge,
-                                         spv::SelectionControlDontFlattenMask);
           assert_true(data_is_3d != spv::NoResult);
-          builder_->createConditionalBranch(data_is_3d,
-                                            &block_dimension_3d_start,
-                                            &block_dimension_stacked_start);
-
-          // 3D.
-          builder_->setBuildPoint(&block_dimension_3d_start);
-          id_vector_temp_.clear();
-          for (uint32_t i = 0; i < 3; ++i) {
-            id_vector_temp_.push_back(coordinates[i]);
-          }
-          texture_parameters.coords =
-              builder_->createCompositeConstruct(type_float3_, id_vector_temp_);
-          spv::Id lod_3d = QueryTextureLod(texture_parameters,
-                                           image_3d_unsigned, image_3d_signed,
-                                           sampler, swizzled_signs_all_signed);
-          // Get the actual build point for phi.
-          spv::Block& block_dimension_3d_end = *builder_->getBuildPoint();
-          builder_->createBranch(&block_dimension_merge);
-
-          // 2D stacked.
-          builder_->setBuildPoint(&block_dimension_stacked_start);
-          id_vector_temp_.clear();
-          for (uint32_t i = 0; i < 2; ++i) {
-            id_vector_temp_.push_back(coordinates[i]);
-          }
-          texture_parameters.coords =
-              builder_->createCompositeConstruct(type_float2_, id_vector_temp_);
-          spv::Id lod_stacked = QueryTextureLod(
-              texture_parameters, image_2d_array_or_cube_unsigned,
-              image_2d_array_or_cube_signed, sampler,
-              swizzled_signs_all_signed);
-          // Get the actual build point for phi.
-          spv::Block& block_dimension_stacked_end = *builder_->getBuildPoint();
-          builder_->createBranch(&block_dimension_merge);
-
-          // Choose between the 3D and the stacked result based on the actual
-          // data dimensionality.
-          builder_->setBuildPoint(&block_dimension_merge);
+          SpirvBuilder::IfBuilder if_data_is_3d(
+              data_is_3d, spv::SelectionControlDontFlattenMask, *builder_);
+          spv::Id lod_3d;
           {
-            std::unique_ptr<spv::Instruction> dimension_phi_op =
-                std::make_unique<spv::Instruction>(builder_->getUniqueId(),
-                                                   type_float_, spv::OpPhi);
-            dimension_phi_op->addIdOperand(lod_3d);
-            dimension_phi_op->addIdOperand(block_dimension_3d_end.getId());
-            dimension_phi_op->addIdOperand(lod_stacked);
-            dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId());
-            result[0] = dimension_phi_op->getResultId();
-            builder_->getBuildPoint()->addInstruction(
-                std::move(dimension_phi_op));
+            // 3D.
+            id_vector_temp_.clear();
+            for (uint32_t i = 0; i < 3; ++i) {
+              id_vector_temp_.push_back(coordinates[i]);
+            }
+            texture_parameters.coords = builder_->createCompositeConstruct(
+                type_float3_, id_vector_temp_);
+            lod_3d = QueryTextureLod(texture_parameters, image_3d_unsigned,
+                                     image_3d_signed, sampler,
+                                     swizzled_signs_all_signed);
           }
+          if_data_is_3d.makeBeginElse();
+          spv::Id lod_stacked;
+          {
+            // 2D stacked.
+            id_vector_temp_.clear();
+            for (uint32_t i = 0; i < 2; ++i) {
+              id_vector_temp_.push_back(coordinates[i]);
+            }
+            texture_parameters.coords = builder_->createCompositeConstruct(
+                type_float2_, id_vector_temp_);
+            lod_stacked = QueryTextureLod(texture_parameters,
+                                          image_2d_array_or_cube_unsigned,
+                                          image_2d_array_or_cube_signed,
+                                          sampler, swizzled_signs_all_signed);
+          }
+          if_data_is_3d.makeEndIf();
+          result[0] = if_data_is_3d.createMergePhi(lod_3d, lod_stacked);
         } else {
           uint32_t lod_query_coordinate_component_count =
               instr.dimension == xenos::FetchOpDimension::kCube ? 3 : 2;
@@ -1512,6 +1475,8 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
             }
           }
         }
+        spv::Id is_any_unsigned = builder_->createUnaryOp(
+            spv::OpLogicalNot, type_bool_, is_all_signed);
 
         // Load the fetch constant word 4, needed unconditionally for LOD
         // biasing, for result exponent biasing, and conditionally for stacked
@@ -1765,273 +1730,247 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
           // component, 2 gradient components, two fetches if the Z axis is
           // linear-filtered).
 
-          spv::Block& block_dimension_head = *builder_->getBuildPoint();
-          spv::Block& block_dimension_3d_start = builder_->makeNewBlock();
-          spv::Block& block_dimension_stacked_start = builder_->makeNewBlock();
-          spv::Block& block_dimension_merge = builder_->makeNewBlock();
-          builder_->createSelectionMerge(&block_dimension_merge,
-                                         spv::SelectionControlDontFlattenMask);
           assert_true(data_is_3d != spv::NoResult);
-          builder_->createConditionalBranch(data_is_3d,
-                                            &block_dimension_3d_start,
-                                            &block_dimension_stacked_start);
-
-          // 3D.
-          builder_->setBuildPoint(&block_dimension_3d_start);
-          if (use_computed_lod) {
-            texture_parameters.gradX = gradients_h;
-            texture_parameters.gradY = gradients_v;
-          }
-          id_vector_temp_.clear();
-          for (uint32_t i = 0; i < 3; ++i) {
-            id_vector_temp_.push_back(coordinates[i]);
-          }
-          texture_parameters.coords =
-              builder_->createCompositeConstruct(type_float3_, id_vector_temp_);
+          SpirvBuilder::IfBuilder if_data_is_3d(
+              data_is_3d, spv::SelectionControlDontFlattenMask, *builder_);
           spv::Id sample_result_unsigned_3d, sample_result_signed_3d;
-          SampleTexture(texture_parameters, image_operands_mask,
-                        image_3d_unsigned, image_3d_signed, sampler,
-                        is_all_signed, is_any_signed, sample_result_unsigned_3d,
-                        sample_result_signed_3d);
-          // Get the actual build point after the SampleTexture call for phi.
-          spv::Block& block_dimension_3d_end = *builder_->getBuildPoint();
-          builder_->createBranch(&block_dimension_merge);
-
-          // 2D stacked.
-          builder_->setBuildPoint(&block_dimension_stacked_start);
-          if (use_computed_lod) {
-            // Extract 2D gradients for stacked textures which are 2D arrays.
-            uint_vector_temp_.clear();
-            uint_vector_temp_.push_back(0);
-            uint_vector_temp_.push_back(1);
-            texture_parameters.gradX = builder_->createRvalueSwizzle(
-                spv::NoPrecision, type_float2_, gradients_h, uint_vector_temp_);
-            texture_parameters.gradY = builder_->createRvalueSwizzle(
-                spv::NoPrecision, type_float2_, gradients_v, uint_vector_temp_);
-          }
-          // Check if linear filtering is needed.
-          bool vol_mag_filter_is_fetch_const =
-              instr.attributes.vol_mag_filter ==
-              xenos::TextureFilter::kUseFetchConst;
-          bool vol_min_filter_is_fetch_const =
-              instr.attributes.vol_min_filter ==
-              xenos::TextureFilter::kUseFetchConst;
-          bool vol_mag_filter_is_linear =
-              instr.attributes.vol_mag_filter == xenos::TextureFilter::kLinear;
-          bool vol_min_filter_is_linear =
-              instr.attributes.vol_min_filter == xenos::TextureFilter::kLinear;
-          spv::Id vol_filter_is_linear = spv::NoResult;
-          if (use_computed_lod &&
-              (vol_mag_filter_is_fetch_const || vol_min_filter_is_fetch_const ||
-               vol_mag_filter_is_linear != vol_min_filter_is_linear)) {
-            // Check if minifying along layers (derivative > 1 along any axis).
-            spv::Id layer_max_gradient = builder_->createBinBuiltinCall(
-                type_float_, ext_inst_glsl_std_450_, GLSLstd450NMax,
-                builder_->createCompositeExtract(gradients_h, type_float_, 2),
-                builder_->createCompositeExtract(gradients_v, type_float_, 2));
-            if (!instr.attributes.unnormalized_coordinates) {
-              // Denormalize the gradient if provided as normalized.
-              assert_true(size[2] != spv::NoResult);
-              layer_max_gradient = builder_->createNoContractionBinOp(
-                  spv::OpFMul, type_float_, layer_max_gradient, size[2]);
+          {
+            // 3D.
+            if (use_computed_lod) {
+              texture_parameters.gradX = gradients_h;
+              texture_parameters.gradY = gradients_v;
             }
-            // For NaN, considering that magnification is being done.
-            spv::Id is_minifying_z = builder_->createBinOp(
-                spv::OpFOrdLessThan, type_bool_, layer_max_gradient,
-                builder_->makeFloatConstant(1.0f));
-            // Choose what filter is actually used, the minification or the
-            // magnification one.
-            spv::Id vol_mag_filter_is_linear_loaded =
-                vol_mag_filter_is_fetch_const
-                    ? builder_->createBinOp(
-                          spv::OpINotEqual, type_bool_,
-                          builder_->createBinOp(
-                              spv::OpBitwiseAnd, type_uint_,
-                              fetch_constant_word_4,
-                              builder_->makeUintConstant(UINT32_C(1) << 0)),
-                          const_uint_0_)
-                    : builder_->makeBoolConstant(vol_mag_filter_is_linear);
-            spv::Id vol_min_filter_is_linear_loaded =
-                vol_min_filter_is_fetch_const
-                    ? builder_->createBinOp(
-                          spv::OpINotEqual, type_bool_,
-                          builder_->createBinOp(
-                              spv::OpBitwiseAnd, type_uint_,
-                              fetch_constant_word_4,
-                              builder_->makeUintConstant(UINT32_C(1) << 1)),
-                          const_uint_0_)
-                    : builder_->makeBoolConstant(vol_min_filter_is_linear);
-            vol_filter_is_linear =
-                builder_->createTriOp(spv::OpSelect, type_bool_, is_minifying_z,
-                                      vol_min_filter_is_linear_loaded,
-                                      vol_mag_filter_is_linear_loaded);
-          } else {
-            // No gradients, or using the same filter overrides for magnifying
-            // and minifying. Assume always magnifying if no gradients (LOD 0,
-            // always <= 0). LOD is within 2D layers, not between them (unlike
-            // in 3D textures, which have mips with depth reduced), so it
-            // shouldn't have effect on filtering between layers.
-            if (vol_mag_filter_is_fetch_const) {
-              vol_filter_is_linear = builder_->createBinOp(
-                  spv::OpINotEqual, type_bool_,
-                  builder_->createBinOp(
-                      spv::OpBitwiseAnd, type_uint_, fetch_constant_word_4,
-                      builder_->makeUintConstant(UINT32_C(1) << 0)),
-                  const_uint_0_);
+            id_vector_temp_.clear();
+            for (uint32_t i = 0; i < 3; ++i) {
+              id_vector_temp_.push_back(coordinates[i]);
             }
+            texture_parameters.coords = builder_->createCompositeConstruct(
+                type_float3_, id_vector_temp_);
+            SampleTexture(texture_parameters, image_operands_mask,
+                          image_3d_unsigned, image_3d_signed, sampler,
+                          is_any_unsigned, is_any_signed,
+                          sample_result_unsigned_3d, sample_result_signed_3d);
           }
-          spv::Id layer_coordinate = coordinates[2];
-          // Linear filtering may be needed either based on a dynamic condition
-          // (the filtering mode is taken from the fetch constant, or it's
-          // different for magnification and minification), or on a static one
-          // (with gradients - specified in the instruction for both
-          // magnification and minification as linear, without gradients -
-          // specified for magnification as linear).
-          // If the filter is linear, subtract 0.5 from the Z coordinate of the
-          // first layer in filtering because 0.5 is in the middle of it.
-          if (vol_filter_is_linear != spv::NoResult) {
-            layer_coordinate = builder_->createTriOp(
-                spv::OpSelect, type_float_, vol_filter_is_linear,
-                builder_->createNoContractionBinOp(
-                    spv::OpFSub, type_float_, layer_coordinate,
-                    builder_->makeFloatConstant(0.5f)),
-                layer_coordinate);
-          } else if (vol_mag_filter_is_linear) {
-            layer_coordinate = builder_->createNoContractionBinOp(
-                spv::OpFSub, type_float_, layer_coordinate,
-                builder_->makeFloatConstant(0.5f));
-          }
-          // Sample the first layer, needed regardless of whether filtering is
-          // needed.
-          // Floor the array layer (Vulkan does rounding to nearest or + 0.5 and
-          // floor even for the layer index, but on the Xenos, addressing is
-          // similar to that of 3D textures). This is needed for both point and
-          // linear filtering (with linear, 0.5 was subtracted previously).
-          spv::Id layer_0_coordinate = builder_->createUnaryBuiltinCall(
-              type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor,
-              layer_coordinate);
-          id_vector_temp_.clear();
-          id_vector_temp_.push_back(coordinates[0]);
-          id_vector_temp_.push_back(coordinates[1]);
-          id_vector_temp_.push_back(layer_0_coordinate);
-          texture_parameters.coords =
-              builder_->createCompositeConstruct(type_float3_, id_vector_temp_);
+          if_data_is_3d.makeBeginElse();
           spv::Id sample_result_unsigned_stacked, sample_result_signed_stacked;
-          SampleTexture(texture_parameters, image_operands_mask,
-                        image_2d_array_or_cube_unsigned,
-                        image_2d_array_or_cube_signed, sampler, is_all_signed,
-                        is_any_signed, sample_result_unsigned_stacked,
-                        sample_result_signed_stacked);
-          // Sample the second layer if linear filtering is potentially needed
-          // (conditionally or unconditionally, depending on whether the filter
-          // needs to be chosen at runtime), and filter.
-          if (vol_filter_is_linear != spv::NoResult ||
-              vol_mag_filter_is_linear) {
-            spv::Block& block_z_head = *builder_->getBuildPoint();
-            spv::Block& block_z_linear = (vol_filter_is_linear != spv::NoResult)
-                                             ? builder_->makeNewBlock()
-                                             : block_z_head;
-            spv::Block& block_z_merge = (vol_filter_is_linear != spv::NoResult)
-                                            ? builder_->makeNewBlock()
-                                            : block_z_head;
-            if (vol_filter_is_linear != spv::NoResult) {
-              builder_->createSelectionMerge(
-                  &block_z_merge, spv::SelectionControlDontFlattenMask);
-              builder_->createConditionalBranch(
-                  vol_filter_is_linear, &block_z_linear, &block_z_merge);
-              builder_->setBuildPoint(&block_z_linear);
+          {
+            // 2D stacked.
+            if (use_computed_lod) {
+              // Extract 2D gradients for stacked textures which are 2D arrays.
+              uint_vector_temp_.clear();
+              uint_vector_temp_.push_back(0);
+              uint_vector_temp_.push_back(1);
+              texture_parameters.gradX =
+                  builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
+                                                gradients_h, uint_vector_temp_);
+              texture_parameters.gradY =
+                  builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
+                                                gradients_v, uint_vector_temp_);
             }
-            spv::Id layer_1_coordinate = builder_->createBinOp(
-                spv::OpFAdd, type_float_, layer_0_coordinate,
-                builder_->makeFloatConstant(1.0f));
+            // Check if linear filtering is needed.
+            bool vol_mag_filter_is_fetch_const =
+                instr.attributes.vol_mag_filter ==
+                xenos::TextureFilter::kUseFetchConst;
+            bool vol_min_filter_is_fetch_const =
+                instr.attributes.vol_min_filter ==
+                xenos::TextureFilter::kUseFetchConst;
+            bool vol_mag_filter_is_linear = instr.attributes.vol_mag_filter ==
+                                            xenos::TextureFilter::kLinear;
+            bool vol_min_filter_is_linear = instr.attributes.vol_min_filter ==
+                                            xenos::TextureFilter::kLinear;
+            spv::Id vol_filter_is_linear = spv::NoResult;
+            if (use_computed_lod &&
+                (vol_mag_filter_is_fetch_const ||
+                 vol_min_filter_is_fetch_const ||
+                 vol_mag_filter_is_linear != vol_min_filter_is_linear)) {
+              // Check if minifying along layers (derivative > 1 along any
+              // axis).
+              spv::Id layer_max_gradient = builder_->createBinBuiltinCall(
+                  type_float_, ext_inst_glsl_std_450_, GLSLstd450NMax,
+                  builder_->createCompositeExtract(gradients_h, type_float_, 2),
+                  builder_->createCompositeExtract(gradients_v, type_float_,
+                                                   2));
+              if (!instr.attributes.unnormalized_coordinates) {
+                // Denormalize the gradient if provided as normalized.
+                assert_true(size[2] != spv::NoResult);
+                layer_max_gradient = builder_->createNoContractionBinOp(
+                    spv::OpFMul, type_float_, layer_max_gradient, size[2]);
+              }
+              // For NaN, considering that magnification is being done.
+              spv::Id is_minifying_z = builder_->createBinOp(
+                  spv::OpFOrdLessThan, type_bool_, layer_max_gradient,
+                  builder_->makeFloatConstant(1.0f));
+              // Choose what filter is actually used, the minification or the
+              // magnification one.
+              spv::Id vol_mag_filter_is_linear_loaded =
+                  vol_mag_filter_is_fetch_const
+                      ? builder_->createBinOp(
+                            spv::OpINotEqual, type_bool_,
+                            builder_->createBinOp(
+                                spv::OpBitwiseAnd, type_uint_,
+                                fetch_constant_word_4,
+                                builder_->makeUintConstant(UINT32_C(1) << 0)),
+                            const_uint_0_)
+                      : builder_->makeBoolConstant(vol_mag_filter_is_linear);
+              spv::Id vol_min_filter_is_linear_loaded =
+                  vol_min_filter_is_fetch_const
+                      ? builder_->createBinOp(
+                            spv::OpINotEqual, type_bool_,
+                            builder_->createBinOp(
+                                spv::OpBitwiseAnd, type_uint_,
+                                fetch_constant_word_4,
+                                builder_->makeUintConstant(UINT32_C(1) << 1)),
+                            const_uint_0_)
+                      : builder_->makeBoolConstant(vol_min_filter_is_linear);
+              vol_filter_is_linear = builder_->createTriOp(
+                  spv::OpSelect, type_bool_, is_minifying_z,
+                  vol_min_filter_is_linear_loaded,
+                  vol_mag_filter_is_linear_loaded);
+            } else {
+              // No gradients, or using the same filter overrides for magnifying
+              // and minifying. Assume always magnifying if no gradients (LOD 0,
+              // always <= 0). LOD is within 2D layers, not between them (unlike
+              // in 3D textures, which have mips with depth reduced), so it
+              // shouldn't have effect on filtering between layers.
+              if (vol_mag_filter_is_fetch_const) {
+                vol_filter_is_linear = builder_->createBinOp(
+                    spv::OpINotEqual, type_bool_,
+                    builder_->createBinOp(
+                        spv::OpBitwiseAnd, type_uint_, fetch_constant_word_4,
+                        builder_->makeUintConstant(UINT32_C(1) << 0)),
+                    const_uint_0_);
+              }
+            }
+            spv::Id layer_coordinate = coordinates[2];
+            // Linear filtering may be needed either based on a dynamic
+            // condition (the filtering mode is taken from the fetch constant,
+            // or it's different for magnification and minification), or on a
+            // static one (with gradients - specified in the instruction for
+            // both magnification and minification as linear, without
+            // gradients - specified for magnification as linear).
+            // If the filter is linear, subtract 0.5 from the Z coordinate of
+            // the first layer in filtering because 0.5 is in the middle of it.
+            if (vol_filter_is_linear != spv::NoResult) {
+              layer_coordinate = builder_->createTriOp(
+                  spv::OpSelect, type_float_, vol_filter_is_linear,
+                  builder_->createNoContractionBinOp(
+                      spv::OpFSub, type_float_, layer_coordinate,
+                      builder_->makeFloatConstant(0.5f)),
+                  layer_coordinate);
+            } else if (vol_mag_filter_is_linear) {
+              layer_coordinate = builder_->createNoContractionBinOp(
+                  spv::OpFSub, type_float_, layer_coordinate,
+                  builder_->makeFloatConstant(0.5f));
+            }
+            // Sample the first layer, needed regardless of whether filtering is
+            // needed.
+            // Floor the array layer (Vulkan does rounding to nearest or + 0.5
+            // and floor even for the layer index, but on the Xenos, addressing
+            // is similar to that of 3D textures). This is needed for both point
+            // and linear filtering (with linear, 0.5 was subtracted
+            // previously).
+            spv::Id layer_0_coordinate = builder_->createUnaryBuiltinCall(
+                type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor,
+                layer_coordinate);
             id_vector_temp_.clear();
             id_vector_temp_.push_back(coordinates[0]);
             id_vector_temp_.push_back(coordinates[1]);
-            id_vector_temp_.push_back(layer_1_coordinate);
+            id_vector_temp_.push_back(layer_0_coordinate);
             texture_parameters.coords = builder_->createCompositeConstruct(
                 type_float3_, id_vector_temp_);
-            spv::Id layer_lerp_factor = builder_->createUnaryBuiltinCall(
-                type_float_, ext_inst_glsl_std_450_, GLSLstd450Fract,
-                layer_coordinate);
-            spv::Id sample_result_unsigned_stacked_filtered;
-            spv::Id sample_result_signed_stacked_filtered;
             SampleTexture(
                 texture_parameters, image_operands_mask,
                 image_2d_array_or_cube_unsigned, image_2d_array_or_cube_signed,
-                sampler, is_all_signed, is_any_signed,
-                sample_result_unsigned_stacked_filtered,
-                sample_result_signed_stacked_filtered, layer_lerp_factor,
+                sampler, is_any_unsigned, is_any_signed,
                 sample_result_unsigned_stacked, sample_result_signed_stacked);
-            if (vol_filter_is_linear != spv::NoResult) {
-              // Get the actual build point after the SampleTexture call for
-              // phi.
-              spv::Block& block_z_linear_end = *builder_->getBuildPoint();
-              builder_->createBranch(&block_z_merge);
-              builder_->setBuildPoint(&block_z_merge);
-              {
-                std::unique_ptr<spv::Instruction> filter_phi_op =
-                    std::make_unique<spv::Instruction>(
-                        builder_->getUniqueId(), type_float4_, spv::OpPhi);
-                filter_phi_op->addIdOperand(
-                    sample_result_unsigned_stacked_filtered);
-                filter_phi_op->addIdOperand(block_z_linear_end.getId());
-                filter_phi_op->addIdOperand(sample_result_unsigned_stacked);
-                filter_phi_op->addIdOperand(block_z_head.getId());
-                sample_result_unsigned_stacked = filter_phi_op->getResultId();
-                builder_->getBuildPoint()->addInstruction(
-                    std::move(filter_phi_op));
+            // Sample the second layer if linear filtering is potentially needed
+            // (conditionally or unconditionally, depending on whether the
+            // filter needs to be chosen at runtime), and filter.
+            if (vol_filter_is_linear != spv::NoResult ||
+                vol_mag_filter_is_linear) {
+              spv::Block& block_z_head = *builder_->getBuildPoint();
+              spv::Block& block_z_linear =
+                  (vol_filter_is_linear != spv::NoResult)
+                      ? builder_->makeNewBlock()
+                      : block_z_head;
+              spv::Block& block_z_merge =
+                  (vol_filter_is_linear != spv::NoResult)
+                      ? builder_->makeNewBlock()
+                      : block_z_head;
+              if (vol_filter_is_linear != spv::NoResult) {
+                builder_->createSelectionMerge(
+                    &block_z_merge, spv::SelectionControlDontFlattenMask);
+                builder_->createConditionalBranch(
+                    vol_filter_is_linear, &block_z_linear, &block_z_merge);
+                builder_->setBuildPoint(&block_z_linear);
               }
-              {
-                std::unique_ptr<spv::Instruction> filter_phi_op =
-                    std::make_unique<spv::Instruction>(
-                        builder_->getUniqueId(), type_float4_, spv::OpPhi);
-                filter_phi_op->addIdOperand(
-                    sample_result_signed_stacked_filtered);
-                filter_phi_op->addIdOperand(block_z_linear_end.getId());
-                filter_phi_op->addIdOperand(sample_result_signed_stacked);
-                filter_phi_op->addIdOperand(block_z_head.getId());
-                sample_result_signed_stacked = filter_phi_op->getResultId();
-                builder_->getBuildPoint()->addInstruction(
-                    std::move(filter_phi_op));
+              spv::Id layer_1_coordinate = builder_->createBinOp(
+                  spv::OpFAdd, type_float_, layer_0_coordinate,
+                  builder_->makeFloatConstant(1.0f));
+              id_vector_temp_.clear();
+              id_vector_temp_.push_back(coordinates[0]);
+              id_vector_temp_.push_back(coordinates[1]);
+              id_vector_temp_.push_back(layer_1_coordinate);
+              texture_parameters.coords = builder_->createCompositeConstruct(
+                  type_float3_, id_vector_temp_);
+              spv::Id layer_lerp_factor = builder_->createUnaryBuiltinCall(
+                  type_float_, ext_inst_glsl_std_450_, GLSLstd450Fract,
+                  layer_coordinate);
+              spv::Id sample_result_unsigned_stacked_filtered;
+              spv::Id sample_result_signed_stacked_filtered;
+              SampleTexture(
+                  texture_parameters, image_operands_mask,
+                  image_2d_array_or_cube_unsigned,
+                  image_2d_array_or_cube_signed, sampler, is_any_unsigned,
+                  is_any_signed, sample_result_unsigned_stacked_filtered,
+                  sample_result_signed_stacked_filtered, layer_lerp_factor,
+                  sample_result_unsigned_stacked, sample_result_signed_stacked);
+              if (vol_filter_is_linear != spv::NoResult) {
+                // Get the actual build point after the SampleTexture call for
+                // phi.
+                spv::Block& block_z_linear_end = *builder_->getBuildPoint();
+                builder_->createBranch(&block_z_merge);
+                builder_->setBuildPoint(&block_z_merge);
+                {
+                  std::unique_ptr<spv::Instruction> filter_phi_op =
+                      std::make_unique<spv::Instruction>(
+                          builder_->getUniqueId(), type_float4_, spv::OpPhi);
+                  filter_phi_op->addIdOperand(
+                      sample_result_unsigned_stacked_filtered);
+                  filter_phi_op->addIdOperand(block_z_linear_end.getId());
+                  filter_phi_op->addIdOperand(sample_result_unsigned_stacked);
+                  filter_phi_op->addIdOperand(block_z_head.getId());
+                  sample_result_unsigned_stacked = filter_phi_op->getResultId();
+                  builder_->getBuildPoint()->addInstruction(
+                      std::move(filter_phi_op));
+                }
+                {
+                  std::unique_ptr<spv::Instruction> filter_phi_op =
+                      std::make_unique<spv::Instruction>(
+                          builder_->getUniqueId(), type_float4_, spv::OpPhi);
+                  filter_phi_op->addIdOperand(
+                      sample_result_signed_stacked_filtered);
+                  filter_phi_op->addIdOperand(block_z_linear_end.getId());
+                  filter_phi_op->addIdOperand(sample_result_signed_stacked);
+                  filter_phi_op->addIdOperand(block_z_head.getId());
+                  sample_result_signed_stacked = filter_phi_op->getResultId();
+                  builder_->getBuildPoint()->addInstruction(
+                      std::move(filter_phi_op));
+                }
+              } else {
+                sample_result_unsigned_stacked =
+                    sample_result_unsigned_stacked_filtered;
+                sample_result_signed_stacked =
+                    sample_result_signed_stacked_filtered;
               }
-            } else {
-              sample_result_unsigned_stacked =
-                  sample_result_unsigned_stacked_filtered;
-              sample_result_signed_stacked =
-                  sample_result_signed_stacked_filtered;
             }
           }
-          // Get the actual build point for phi.
-          spv::Block& block_dimension_stacked_end = *builder_->getBuildPoint();
-          builder_->createBranch(&block_dimension_merge);
+          if_data_is_3d.makeEndIf();
 
-          // Choose between the 3D and the stacked result based on the actual
-          // data dimensionality.
-          builder_->setBuildPoint(&block_dimension_merge);
-          {
-            std::unique_ptr<spv::Instruction> dimension_phi_op =
-                std::make_unique<spv::Instruction>(builder_->getUniqueId(),
-                                                   type_float4_, spv::OpPhi);
-            dimension_phi_op->addIdOperand(sample_result_unsigned_3d);
-            dimension_phi_op->addIdOperand(block_dimension_3d_end.getId());
-            dimension_phi_op->addIdOperand(sample_result_unsigned_stacked);
-            dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId());
-            sample_result_unsigned = dimension_phi_op->getResultId();
-            builder_->getBuildPoint()->addInstruction(
-                std::move(dimension_phi_op));
-          }
-          {
-            std::unique_ptr<spv::Instruction> dimension_phi_op =
-                std::make_unique<spv::Instruction>(builder_->getUniqueId(),
-                                                   type_float4_, spv::OpPhi);
-            dimension_phi_op->addIdOperand(sample_result_signed_3d);
-            dimension_phi_op->addIdOperand(block_dimension_3d_end.getId());
-            dimension_phi_op->addIdOperand(sample_result_signed_stacked);
-            dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId());
-            sample_result_signed = dimension_phi_op->getResultId();
-            builder_->getBuildPoint()->addInstruction(
-                std::move(dimension_phi_op));
-          }
+          sample_result_unsigned = if_data_is_3d.createMergePhi(
+              sample_result_unsigned_3d, sample_result_unsigned_stacked);
+          sample_result_signed = if_data_is_3d.createMergePhi(
+              sample_result_signed_3d, sample_result_signed_stacked);
         } else {
           if (use_computed_lod) {
             texture_parameters.gradX = gradients_h;
@@ -2045,7 +1984,7 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
               builder_->createCompositeConstruct(type_float3_, id_vector_temp_);
           SampleTexture(texture_parameters, image_operands_mask,
                         image_2d_array_or_cube_unsigned,
-                        image_2d_array_or_cube_signed, sampler, is_all_signed,
+                        image_2d_array_or_cube_signed, sampler, is_any_unsigned,
                         is_any_signed, sample_result_unsigned,
                         sample_result_signed);
         }
@@ -2095,26 +2034,20 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
                     spv::OpBitwiseAnd, type_uint_, swizzle_word,
                     builder_->makeUintConstant(swizzle_bit_0_value << 2)),
                 const_uint_0_);
-            spv::Block& block_swizzle_head = *builder_->getBuildPoint();
-            spv::Block& block_swizzle_constant = builder_->makeNewBlock();
-            spv::Block& block_swizzle_component = builder_->makeNewBlock();
-            spv::Block& block_swizzle_merge = builder_->makeNewBlock();
-            builder_->createSelectionMerge(
-                &block_swizzle_merge, spv::SelectionControlDontFlattenMask);
-            builder_->createConditionalBranch(swizzle_bit_2,
-                                              &block_swizzle_constant,
-                                              &block_swizzle_component);
-            // Constant values.
-            builder_->setBuildPoint(&block_swizzle_constant);
-            // Bit 0 - 0 or 1.
-            spv::Id swizzle_result_constant =
-                builder_->createTriOp(spv::OpSelect, type_float_, swizzle_bit_0,
-                                      const_float_1, const_float_0_);
-            builder_->createBranch(&block_swizzle_merge);
-            // Fetched components.
+            SpirvBuilder::IfBuilder if_swizzle_constant(
+                swizzle_bit_2, spv::SelectionControlDontFlattenMask, *builder_);
+            spv::Id swizzle_result_constant;
+            {
+              // Constant values.
+              // Bit 0 - 0 or 1.
+              swizzle_result_constant = builder_->createTriOp(
+                  spv::OpSelect, type_float_, swizzle_bit_0, const_float_1,
+                  const_float_0_);
+            }
+            if_swizzle_constant.makeBeginElse();
             spv::Id swizzle_result_component;
             {
-              builder_->setBuildPoint(&block_swizzle_component);
+              // Fetched components.
               // Select whether the result is signed or unsigned (or biased or
               // gamma-corrected) based on the post-swizzle signedness.
               spv::Id swizzle_sample_result = builder_->createTriOp(
@@ -2146,22 +2079,11 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
               swizzle_result_component = builder_->createTriOp(
                   spv::OpSelect, type_float_, swizzle_bit_1, swizzle_z_or_w,
                   swizzle_x_or_y);
-              builder_->createBranch(&block_swizzle_merge);
             }
+            if_swizzle_constant.makeEndIf();
             // Select between the constants and the fetched components.
-            builder_->setBuildPoint(&block_swizzle_merge);
-            {
-              std::unique_ptr<spv::Instruction> swizzle_phi_op =
-                  std::make_unique<spv::Instruction>(builder_->getUniqueId(),
-                                                     type_float_, spv::OpPhi);
-              swizzle_phi_op->addIdOperand(swizzle_result_constant);
-              swizzle_phi_op->addIdOperand(block_swizzle_constant.getId());
-              swizzle_phi_op->addIdOperand(swizzle_result_component);
-              swizzle_phi_op->addIdOperand(block_swizzle_component.getId());
-              result[result_component_index] = swizzle_phi_op->getResultId();
-              builder_->getBuildPoint()->addInstruction(
-                  std::move(swizzle_phi_op));
-            }
+            result[result_component_index] = if_swizzle_constant.createMergePhi(
+                swizzle_result_constant, swizzle_result_component);
           }
         }
 
@@ -2441,58 +2363,43 @@ size_t SpirvShaderTranslator::FindOrAddSamplerBinding(
 void SpirvShaderTranslator::SampleTexture(
     spv::Builder::TextureParameters& texture_parameters,
     spv::ImageOperandsMask image_operands_mask, spv::Id image_unsigned,
-    spv::Id image_signed, spv::Id sampler, spv::Id is_all_signed,
+    spv::Id image_signed, spv::Id sampler, spv::Id is_any_unsigned,
     spv::Id is_any_signed, spv::Id& result_unsigned_out,
     spv::Id& result_signed_out, spv::Id lerp_factor,
     spv::Id lerp_first_unsigned, spv::Id lerp_first_signed) {
   for (uint32_t i = 0; i < 2; ++i) {
-    spv::Block& block_sign_head = *builder_->getBuildPoint();
-    spv::Block& block_sign = builder_->makeNewBlock();
-    spv::Block& block_sign_merge = builder_->makeNewBlock();
-    builder_->createSelectionMerge(&block_sign_merge,
-                                   spv::SelectionControlDontFlattenMask);
-    // Unsigned (i == 0) - if there are any non-signed components.
-    // Signed (i == 1) - if there are any signed components.
-    builder_->createConditionalBranch(i ? is_any_signed : is_all_signed,
-                                      i ? &block_sign : &block_sign_merge,
-                                      i ? &block_sign_merge : &block_sign);
-    builder_->setBuildPoint(&block_sign);
-    spv::Id image = i ? image_signed : image_unsigned;
-    // OpSampledImage must be in the same block as where its result is used.
-    texture_parameters.sampler = builder_->createBinOp(
-        spv::OpSampledImage,
-        builder_->makeSampledImageType(builder_->getTypeId(image)), image,
-        sampler);
-    spv::Id result = builder_->createTextureCall(
-        spv::NoPrecision, type_float4_, false, false, false, false, false,
-        texture_parameters, image_operands_mask);
-    if (lerp_factor != spv::NoResult) {
-      spv::Id lerp_first = i ? lerp_first_signed : lerp_first_unsigned;
-      if (lerp_first != spv::NoResult) {
-        spv::Id lerp_difference = builder_->createNoContractionBinOp(
-            spv::OpVectorTimesScalar, type_float4_,
-            builder_->createNoContractionBinOp(spv::OpFSub, type_float4_,
-                                               result, lerp_first),
-            lerp_factor);
-        result = builder_->createNoContractionBinOp(spv::OpFAdd, type_float4_,
-                                                    result, lerp_difference);
+    SpirvBuilder::IfBuilder sign_if(i ? is_any_signed : is_any_unsigned,
+                                    spv::SelectionControlDontFlattenMask,
+                                    *builder_);
+    spv::Id sign_result;
+    {
+      spv::Id image = i ? image_signed : image_unsigned;
+      // OpSampledImage must be in the same block as where its result is used.
+      texture_parameters.sampler = builder_->createBinOp(
+          spv::OpSampledImage,
+          builder_->makeSampledImageType(builder_->getTypeId(image)), image,
+          sampler);
+      sign_result = builder_->createTextureCall(
+          spv::NoPrecision, type_float4_, false, false, false, false, false,
+          texture_parameters, image_operands_mask);
+      if (lerp_factor != spv::NoResult) {
+        spv::Id lerp_first = i ? lerp_first_signed : lerp_first_unsigned;
+        if (lerp_first != spv::NoResult) {
+          spv::Id lerp_difference = builder_->createNoContractionBinOp(
+              spv::OpVectorTimesScalar, type_float4_,
+              builder_->createNoContractionBinOp(spv::OpFSub, type_float4_,
+                                                 sign_result, lerp_first),
+              lerp_factor);
+          sign_result = builder_->createNoContractionBinOp(
+              spv::OpFAdd, type_float4_, sign_result, lerp_difference);
+        }
       }
     }
-    builder_->createBranch(&block_sign_merge);
-    builder_->setBuildPoint(&block_sign_merge);
-    {
-      std::unique_ptr<spv::Instruction> phi_op =
-          std::make_unique<spv::Instruction>(builder_->getUniqueId(),
-                                             type_float4_, spv::OpPhi);
-      phi_op->addIdOperand(result);
-      phi_op->addIdOperand(block_sign.getId());
-      phi_op->addIdOperand(const_float4_0_);
-      phi_op->addIdOperand(block_sign_head.getId());
-      // This may overwrite the first lerp endpoint for the sign (such usage of
-      // this function is allowed).
-      (i ? result_signed_out : result_unsigned_out) = phi_op->getResultId();
-      builder_->getBuildPoint()->addInstruction(std::move(phi_op));
-    }
+    sign_if.makeEndIf();
+    // This may overwrite the first lerp endpoint for the sign (such usage of
+    // this function is allowed).
+    (i ? result_signed_out : result_unsigned_out) =
+        sign_if.createMergePhi(sign_result, const_float4_0_);
   }
 }
 
@@ -2500,48 +2407,33 @@ spv::Id SpirvShaderTranslator::QueryTextureLod(
     spv::Builder::TextureParameters& texture_parameters, spv::Id image_unsigned,
     spv::Id image_signed, spv::Id sampler, spv::Id is_all_signed) {
   // OpSampledImage must be in the same block as where its result is used.
-  spv::Block& block_sign_head = *builder_->getBuildPoint();
-  spv::Block& block_sign_signed = builder_->makeNewBlock();
-  spv::Block& block_sign_unsigned = builder_->makeNewBlock();
-  spv::Block& block_sign_merge = builder_->makeNewBlock();
-  builder_->createSelectionMerge(&block_sign_merge,
-                                 spv::SelectionControlDontFlattenMask);
-  builder_->createConditionalBranch(is_all_signed, &block_sign_signed,
-                                    &block_sign_unsigned);
-  builder_->setBuildPoint(&block_sign_signed);
-  texture_parameters.sampler = builder_->createBinOp(
-      spv::OpSampledImage,
-      builder_->makeSampledImageType(builder_->getTypeId(image_signed)),
-      image_signed, sampler);
-  spv::Id lod_signed = builder_->createCompositeExtract(
-      builder_->createTextureQueryCall(spv::OpImageQueryLod, texture_parameters,
-                                       false),
-      type_float_, 1);
-  builder_->createBranch(&block_sign_merge);
-  builder_->setBuildPoint(&block_sign_unsigned);
-  texture_parameters.sampler = builder_->createBinOp(
-      spv::OpSampledImage,
-      builder_->makeSampledImageType(builder_->getTypeId(image_unsigned)),
-      image_unsigned, sampler);
-  spv::Id lod_unsigned = builder_->createCompositeExtract(
-      builder_->createTextureQueryCall(spv::OpImageQueryLod, texture_parameters,
-                                       false),
-      type_float_, 1);
-  builder_->createBranch(&block_sign_merge);
-  builder_->setBuildPoint(&block_sign_merge);
-  spv::Id result;
+  SpirvBuilder::IfBuilder if_signed(
+      is_all_signed, spv::SelectionControlDontFlattenMask, *builder_);
+  spv::Id lod_signed;
   {
-    std::unique_ptr<spv::Instruction> sign_phi_op =
-        std::make_unique<spv::Instruction>(builder_->getUniqueId(), type_float_,
-                                           spv::OpPhi);
-    sign_phi_op->addIdOperand(lod_signed);
-    sign_phi_op->addIdOperand(block_sign_signed.getId());
-    sign_phi_op->addIdOperand(lod_unsigned);
-    sign_phi_op->addIdOperand(block_sign_unsigned.getId());
-    result = sign_phi_op->getResultId();
-    builder_->getBuildPoint()->addInstruction(std::move(sign_phi_op));
+    texture_parameters.sampler = builder_->createBinOp(
+        spv::OpSampledImage,
+        builder_->makeSampledImageType(builder_->getTypeId(image_signed)),
+        image_signed, sampler);
+    lod_signed = builder_->createCompositeExtract(
+        builder_->createTextureQueryCall(spv::OpImageQueryLod,
+                                         texture_parameters, false),
+        type_float_, 1);
   }
-  return result;
+  if_signed.makeBeginElse();
+  spv::Id lod_unsigned;
+  {
+    texture_parameters.sampler = builder_->createBinOp(
+        spv::OpSampledImage,
+        builder_->makeSampledImageType(builder_->getTypeId(image_unsigned)),
+        image_unsigned, sampler);
+    lod_unsigned = builder_->createCompositeExtract(
+        builder_->createTextureQueryCall(spv::OpImageQueryLod,
+                                         texture_parameters, false),
+        type_float_, 1);
+  }
+  if_signed.makeEndIf();
+  return if_signed.createMergePhi(lod_signed, lod_unsigned);
 }
 
 }  // namespace gpu
diff --git a/src/xenia/gpu/spirv_shader_translator_memexport.cc b/src/xenia/gpu/spirv_shader_translator_memexport.cc
new file mode 100644
index 000000000..94c0adf54
--- /dev/null
+++ b/src/xenia/gpu/spirv_shader_translator_memexport.cc
@@ -0,0 +1,950 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2024 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/gpu/spirv_shader_translator.h"
+
+#include <array>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <optional>
+#include <utility>
+
+#include "third_party/glslang/SPIRV/GLSL.std.450.h"
+#include "xenia/base/assert.h"
+#include "xenia/base/math.h"
+#include "xenia/gpu/ucode.h"
+
+namespace xe {
+namespace gpu {
+
+void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
+  if (!export_eM) {
+    return;
+  }
+
+  assert_zero(export_eM & ~current_shader().memexport_eM_written());
+
+  if (!IsMemoryExportSupported()) {
+    return;
+  }
+
+  // Check if memory export is allowed in this guest shader invocation.
+  std::optional<SpirvBuilder::IfBuilder> if_memexport_allowed;
+  if (main_memexport_allowed_ != spv::NoResult) {
+    if_memexport_allowed.emplace(main_memexport_allowed_,
+                                 spv::SelectionControlDontFlattenMask,
+                                 *builder_);
+  }
+
+  // If the pixel was killed (but the actual killing on the SPIR-V side has not
+  // been performed yet because the device doesn't support demotion to helper
+  // invocation that doesn't interfere with control flow), the current
+  // invocation is not considered active anymore.
+  std::optional<SpirvBuilder::IfBuilder> if_pixel_not_killed;
+  if (var_main_kill_pixel_ != spv::NoResult) {
+    if_pixel_not_killed.emplace(
+        builder_->createUnaryOp(
+            spv::OpLogicalNot, type_bool_,
+            builder_->createLoad(var_main_kill_pixel_, spv::NoPrecision)),
+        spv::SelectionControlDontFlattenMask, *builder_);
+  }
+
+  // Check if the address with the correct sign and exponent was written, and
+  // that the index doesn't overflow the mantissa bits.
+  // all((eA_vector >> uvec4(30, 23, 23, 23)) == uvec4(0x1, 0x96, 0x96, 0x96))
+  spv::Id eA_vector = builder_->createUnaryOp(
+      spv::OpBitcast, type_uint4_,
+      builder_->createLoad(var_main_memexport_address_, spv::NoPrecision));
+  id_vector_temp_.clear();
+  id_vector_temp_.push_back(builder_->makeUintConstant(30));
+  id_vector_temp_.push_back(builder_->makeUintConstant(23));
+  id_vector_temp_.push_back(id_vector_temp_.back());
+  id_vector_temp_.push_back(id_vector_temp_.back());
+  spv::Id address_validation_shift =
+      builder_->makeCompositeConstant(type_uint4_, id_vector_temp_);
+  id_vector_temp_.clear();
+  id_vector_temp_.push_back(builder_->makeUintConstant(0x1));
+  id_vector_temp_.push_back(builder_->makeUintConstant(0x96));
+  id_vector_temp_.push_back(id_vector_temp_.back());
+  id_vector_temp_.push_back(id_vector_temp_.back());
+  spv::Id address_validation_value =
+      builder_->makeCompositeConstant(type_uint4_, id_vector_temp_);
+  SpirvBuilder::IfBuilder if_address_valid(
+      builder_->createUnaryOp(
+          spv::OpAll, type_bool_,
+          builder_->createBinOp(
+              spv::OpIEqual, type_bool4_,
+              builder_->createBinOp(spv::OpShiftRightLogical, type_uint4_,
+                                    eA_vector, address_validation_shift),
+              address_validation_value)),
+      spv::SelectionControlDontFlattenMask, *builder_, 2, 1);
+
+  using EMIdArray = std::array<spv::Id, ucode::kMaxMemExportElementCount>;
+
+  auto for_each_eM = [&](std::function<void(uint32_t eM_index)> fn) {
+    uint8_t eM_remaining = export_eM;
+    uint32_t eM_index;
+    while (xe::bit_scan_forward(eM_remaining, &eM_index)) {
+      eM_remaining &= ~(uint8_t(1) << eM_index);
+      fn(eM_index);
+    }
+  };
+
+  // Load the original eM.
+  EMIdArray eM_original;
+  for_each_eM([&](uint32_t eM_index) {
+    eM_original[eM_index] = builder_->createLoad(
+        var_main_memexport_data_[eM_index], spv::NoPrecision);
+  });
+
+  // Swap red and blue if needed.
+  spv::Id format_info =
+      builder_->createCompositeExtract(eA_vector, type_uint_, 2);
+  spv::Id swap_red_blue = builder_->createBinOp(
+      spv::OpINotEqual, type_bool_,
+      builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, format_info,
+                            builder_->makeUintConstant(uint32_t(1) << 19)),
+      const_uint_0_);
+  EMIdArray eM_swapped;
+  uint_vector_temp_.clear();
+  uint_vector_temp_.push_back(2);
+  uint_vector_temp_.push_back(1);
+  uint_vector_temp_.push_back(0);
+  uint_vector_temp_.push_back(3);
+  for_each_eM([&](uint32_t eM_index) {
+    eM_swapped[eM_index] = builder_->createTriOp(
+        spv::OpSelect, type_float4_, swap_red_blue,
+        builder_->createRvalueSwizzle(spv::NoPrecision, type_float4_,
+                                      eM_original[eM_index], uint_vector_temp_),
+        eM_original[eM_index]);
+  });
+
+  // Extract the numeric format.
+  spv::Id is_signed = builder_->createBinOp(
+      spv::OpINotEqual, type_bool_,
+      builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, format_info,
+                            builder_->makeUintConstant(uint32_t(1) << 16)),
+      const_uint_0_);
+  spv::Id is_norm = builder_->createBinOp(
+      spv::OpIEqual, type_bool_,
+      builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, format_info,
+                            builder_->makeUintConstant(uint32_t(1) << 17)),
+      const_uint_0_);
+
+  // Perform format packing.
+
+  auto flush_nan = [&](const EMIdArray& eM) -> EMIdArray {
+    EMIdArray eM_flushed;
+    for_each_eM([&](uint32_t eM_index) {
+      spv::Id element_unflushed = eM[eM_index];
+      unsigned int component_count =
+          builder_->getNumComponents(element_unflushed);
+      eM_flushed[eM_index] = builder_->createTriOp(
+          spv::OpSelect, type_float_vectors_[component_count - 1],
+          builder_->createUnaryOp(spv::OpIsNan,
+                                  type_bool_vectors_[component_count - 1],
+                                  element_unflushed),
+          const_float_vectors_0_[component_count - 1], element_unflushed);
+    });
+    return eM_flushed;
+  };
+
+  auto make_float_constant_vectors =
+      [&](float value) -> std::array<spv::Id, 4> {
+    std::array<spv::Id, 4> const_vectors;
+    const_vectors[0] = builder_->makeFloatConstant(value);
+    id_vector_temp_.clear();
+    id_vector_temp_.push_back(const_vectors[0]);
+    for (unsigned int component_count_minus_1 = 1; component_count_minus_1 < 4;
+         ++component_count_minus_1) {
+      id_vector_temp_.push_back(const_vectors[0]);
+      const_vectors[component_count_minus_1] = builder_->makeCompositeConstant(
+          type_float_vectors_[component_count_minus_1], id_vector_temp_);
+    }
+    return const_vectors;
+  };
+  std::array<spv::Id, 4> const_float_vectors_minus_1 =
+      make_float_constant_vectors(-1.0f);
+  std::array<spv::Id, 4> const_float_vectors_minus_0_5 =
+      make_float_constant_vectors(-0.5f);
+  std::array<spv::Id, 4> const_float_vectors_0_5 =
+      make_float_constant_vectors(0.5f);
+
+  // The widths must be without holes (R, RG, RGB, RGBA), and expecting the
+  // widths to add up to the size of the stored texel (8, 16 or 32 bits), as the
+  // unused upper bits will contain junk from the sign extension of X if the
+  // number is signed.
+  auto pack_8_16_32 = [&](std::array<uint32_t, 4> widths) -> EMIdArray {
+    unsigned int component_count;
+    std::array<uint32_t, 4> offsets{};
+    for (component_count = 0; component_count < widths.size();
+         ++component_count) {
+      if (!widths[component_count]) {
+        break;
+      }
+      // Only formats for which max + 0.5 can be represented exactly.
+      assert(widths[component_count] <= 23);
+      if (component_count) {
+        offsets[component_count] =
+            offsets[component_count - 1] + widths[component_count - 1];
+      }
+    }
+    assert_not_zero(component_count);
+
+    // Extract the needed components.
+    EMIdArray eM_unflushed = eM_swapped;
+    if (component_count < 4) {
+      if (component_count == 1) {
+        for_each_eM([&](uint32_t eM_index) {
+          eM_unflushed[eM_index] = builder_->createCompositeExtract(
+              eM_unflushed[eM_index], type_float_, 0);
+        });
+      } else {
+        uint_vector_temp_.clear();
+        for (unsigned int component_index = 0;
+             component_index < component_count; ++component_index) {
+          uint_vector_temp_.push_back(component_index);
+        }
+        for_each_eM([&](uint32_t eM_index) {
+          eM_unflushed[eM_index] = builder_->createRvalueSwizzle(
+              spv::NoPrecision, type_float_vectors_[component_count - 1],
+              eM_unflushed[eM_index], uint_vector_temp_);
+        });
+      }
+    }
+
+    // Flush NaNs.
+    EMIdArray eM_flushed = flush_nan(eM_unflushed);
+
+    // Convert to integers.
+    SpirvBuilder::IfBuilder if_signed(
+        is_signed, spv::SelectionControlDontFlattenMask, *builder_);
+    EMIdArray eM_signed;
+    {
+      // Signed.
+      SpirvBuilder::IfBuilder if_norm(
+          is_norm, spv::SelectionControlDontFlattenMask, *builder_);
+      EMIdArray eM_norm;
+      {
+        // Signed normalized.
+        id_vector_temp_.clear();
+        for (unsigned int component_index = 0;
+             component_index < component_count; ++component_index) {
+          id_vector_temp_.push_back(builder_->makeFloatConstant(
+              float((uint32_t(1) << (widths[component_index] - 1)) - 1)));
+        }
+        spv::Id const_max_value =
+            component_count > 1
+                ? builder_->makeCompositeConstant(
+                      type_float_vectors_[component_count - 1], id_vector_temp_)
+                : id_vector_temp_.front();
+        for_each_eM([&](uint32_t eM_index) {
+          eM_norm[eM_index] = builder_->createNoContractionBinOp(
+              spv::OpFMul, type_float_vectors_[component_count - 1],
+              builder_->createTriBuiltinCall(
+                  type_float_vectors_[component_count - 1],
+                  ext_inst_glsl_std_450_, GLSLstd450FClamp,
+                  eM_flushed[eM_index],
+                  const_float_vectors_minus_1[component_count - 1],
+                  const_float_vectors_1_[component_count - 1]),
+              const_max_value);
+        });
+      }
+      if_norm.makeEndIf();
+      // All phi instructions must be in the beginning of the block.
+      for_each_eM([&](uint32_t eM_index) {
+        eM_signed[eM_index] =
+            if_norm.createMergePhi(eM_norm[eM_index], eM_flushed[eM_index]);
+      });
+      // Convert to signed integer, adding plus/minus 0.5 before truncating
+      // according to the Direct3D format conversion rules.
+      for_each_eM([&](uint32_t eM_index) {
+        eM_signed[eM_index] = builder_->createUnaryOp(
+            spv::OpBitcast, type_uint_vectors_[component_count - 1],
+            builder_->createUnaryOp(
+                spv::OpConvertFToS, type_int_vectors_[component_count - 1],
+                builder_->createNoContractionBinOp(
+                    spv::OpFAdd, type_float_vectors_[component_count - 1],
+                    eM_signed[eM_index],
+                    builder_->createTriOp(
+                        spv::OpSelect, type_float_vectors_[component_count - 1],
+                        builder_->createBinOp(
+                            spv::OpFOrdLessThan,
+                            type_bool_vectors_[component_count - 1],
+                            eM_signed[eM_index],
+                            const_float_vectors_0_[component_count - 1]),
+                        const_float_vectors_minus_0_5[component_count - 1],
+                        const_float_vectors_0_5[component_count - 1]))));
+      });
+    }
+    if_signed.makeBeginElse();
+    EMIdArray eM_unsigned;
+    {
+      SpirvBuilder::IfBuilder if_norm(
+          is_norm, spv::SelectionControlDontFlattenMask, *builder_);
+      EMIdArray eM_norm;
+      {
+        // Unsigned normalized.
+        id_vector_temp_.clear();
+        for (unsigned int component_index = 0;
+             component_index < component_count; ++component_index) {
+          id_vector_temp_.push_back(builder_->makeFloatConstant(
+              float((uint32_t(1) << widths[component_index]) - 1)));
+        }
+        spv::Id const_max_value =
+            component_count > 1
+                ? builder_->makeCompositeConstant(
+                      type_float_vectors_[component_count - 1], id_vector_temp_)
+                : id_vector_temp_.front();
+        for_each_eM([&](uint32_t eM_index) {
+          eM_norm[eM_index] = builder_->createNoContractionBinOp(
+              spv::OpFMul, type_float_vectors_[component_count - 1],
+              builder_->createTriBuiltinCall(
+                  type_float_vectors_[component_count - 1],
+                  ext_inst_glsl_std_450_, GLSLstd450FClamp,
+                  eM_flushed[eM_index],
+                  const_float_vectors_0_[component_count - 1],
+                  const_float_vectors_1_[component_count - 1]),
+              const_max_value);
+        });
+      }
+      if_norm.makeEndIf();
+      // All phi instructions must be in the beginning of the block.
+      for_each_eM([&](uint32_t eM_index) {
+        eM_unsigned[eM_index] =
+            if_norm.createMergePhi(eM_norm[eM_index], eM_flushed[eM_index]);
+      });
+      // Convert to unsigned integer, adding 0.5 before truncating according to
+      // the Direct3D format conversion rules.
+      for_each_eM([&](uint32_t eM_index) {
+        eM_unsigned[eM_index] = builder_->createUnaryOp(
+            spv::OpConvertFToU, type_uint_vectors_[component_count - 1],
+            builder_->createNoContractionBinOp(
+                spv::OpFAdd, type_float_vectors_[component_count - 1],
+                eM_unsigned[eM_index],
+                const_float_vectors_0_5[component_count - 1]));
+      });
+    }
+    if_signed.makeEndIf();
+    EMIdArray eM_unpacked;
+    for_each_eM([&](uint32_t eM_index) {
+      eM_unpacked[eM_index] =
+          if_signed.createMergePhi(eM_signed[eM_index], eM_unsigned[eM_index]);
+    });
+
+    // Pack into a 32-bit value, and pad to a 4-component vector for the phi.
+    EMIdArray eM_packed;
+    for_each_eM([&](uint32_t eM_index) {
+      spv::Id element_unpacked = eM_unpacked[eM_index];
+      eM_packed[eM_index] = component_count > 1
+                                ? builder_->createCompositeExtract(
+                                      element_unpacked, type_uint_, 0)
+                                : element_unpacked;
+      for (unsigned int component_index = 1; component_index < component_count;
+           ++component_index) {
+        eM_packed[eM_index] = builder_->createQuadOp(
+            spv::OpBitFieldInsert, type_uint_, eM_packed[eM_index],
+            builder_->createCompositeExtract(element_unpacked, type_uint_,
+                                             component_index),
+            builder_->makeUintConstant(offsets[component_index]),
+            builder_->makeUintConstant(widths[component_index]));
+      }
+      id_vector_temp_.clear();
+      id_vector_temp_.resize(4, const_uint_0_);
+      id_vector_temp_.front() = eM_packed[eM_index];
+      eM_packed[eM_index] =
+          builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
+    });
+
+    return eM_packed;
+  };
+
+  SpirvBuilder::SwitchBuilder format_switch(
+      builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, format_info,
+                            builder_->makeUintConstant(8),
+                            builder_->makeUintConstant(6)),
+      spv::SelectionControlDontFlattenMask, *builder_);
+
+  struct FormatCase {
+    EMIdArray eM_packed;
+    uint32_t element_bytes_log2;
+    spv::Id phi_parent;
+  };
+  std::vector<FormatCase> format_cases;
+  // Must be called at the end of the switch case segment for the correct phi
+  // parent.
+  auto add_format_case = [&](const EMIdArray& eM_packed,
+                             uint32_t element_bytes_log2) {
+    FormatCase& format_case = format_cases.emplace_back();
+    format_case.eM_packed = eM_packed;
+    format_case.element_bytes_log2 = element_bytes_log2;
+    format_case.phi_parent = builder_->getBuildPoint()->getId();
+  };
+
+  // k_8, k_8_A, k_8_B
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_8));
+  // TODO(Triang3l): Investigate how input should be treated for k_8_A, k_8_B.
+  format_switch.addCurrentCaseLiteral(
+      static_cast<unsigned int>(xenos::ColorFormat::k_8_A));
+  format_switch.addCurrentCaseLiteral(
+      static_cast<unsigned int>(xenos::ColorFormat::k_8_B));
+  add_format_case(pack_8_16_32({8}), 0);
+
+  // k_1_5_5_5
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_1_5_5_5));
+  add_format_case(pack_8_16_32({5, 5, 5, 1}), 1);
+
+  // k_5_6_5
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_5_6_5));
+  add_format_case(pack_8_16_32({5, 6, 5}), 1);
+
+  // k_6_5_5
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_6_5_5));
+  add_format_case(pack_8_16_32({5, 5, 6}), 1);
+
+  // k_8_8_8_8, k_8_8_8_8_A, k_8_8_8_8_AS_16_16_16_16
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_8_8_8_8));
+  // TODO(Triang3l): Investigate how input should be treated for k_8_8_8_8_A.
+  format_switch.addCurrentCaseLiteral(
+      static_cast<unsigned int>(xenos::ColorFormat::k_8_8_8_8_A));
+  format_switch.addCurrentCaseLiteral(
+      static_cast<unsigned int>(xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16));
+  add_format_case(pack_8_16_32({8, 8, 8, 8}), 2);
+
+  // k_2_10_10_10, k_2_10_10_10_AS_16_16_16_16
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_2_10_10_10));
+  format_switch.addCurrentCaseLiteral(static_cast<unsigned int>(
+      xenos::ColorFormat::k_2_10_10_10_AS_16_16_16_16));
+  add_format_case(pack_8_16_32({10, 10, 10, 2}), 2);
+
+  // k_8_8
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_8_8));
+  add_format_case(pack_8_16_32({8, 8}), 1);
+
+  // k_4_4_4_4
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_4_4_4_4));
+  add_format_case(pack_8_16_32({4, 4, 4, 4}), 1);
+
+  // k_10_11_11, k_10_11_11_AS_16_16_16_16
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_10_11_11));
+  format_switch.addCurrentCaseLiteral(
+      static_cast<unsigned int>(xenos::ColorFormat::k_10_11_11_AS_16_16_16_16));
+  add_format_case(pack_8_16_32({11, 11, 10}), 2);
+
+  // k_11_11_10, k_11_11_10_AS_16_16_16_16
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_11_11_10));
+  format_switch.addCurrentCaseLiteral(
+      static_cast<unsigned int>(xenos::ColorFormat::k_11_11_10_AS_16_16_16_16));
+  add_format_case(pack_8_16_32({10, 11, 11}), 2);
+
+  // k_16
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_16));
+  add_format_case(pack_8_16_32({16}), 1);
+
+  // k_16_16
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_16_16));
+  add_format_case(pack_8_16_32({16, 16}), 2);
+
+  // k_16_16_16_16
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_16_16_16_16));
+  {
+    // Flush NaNs.
+    EMIdArray fixed16_flushed = flush_nan(eM_swapped);
+
+    // Convert to integers.
+    SpirvBuilder::IfBuilder if_signed(
+        is_signed, spv::SelectionControlDontFlattenMask, *builder_);
+    EMIdArray fixed16_signed;
+    {
+      // Signed.
+      SpirvBuilder::IfBuilder if_norm(
+          is_norm, spv::SelectionControlDontFlattenMask, *builder_);
+      EMIdArray fixed16_norm;
+      {
+        // Signed normalized.
+        id_vector_temp_.clear();
+        id_vector_temp_.resize(4, builder_->makeFloatConstant(
+                                      float((uint32_t(1) << (16 - 1)) - 1)));
+        spv::Id const_snorm16_max_value =
+            builder_->makeCompositeConstant(type_float4_, id_vector_temp_);
+        for_each_eM([&](uint32_t eM_index) {
+          fixed16_norm[eM_index] = builder_->createNoContractionBinOp(
+              spv::OpFMul, type_float4_,
+              builder_->createTriBuiltinCall(
+                  type_float4_, ext_inst_glsl_std_450_, GLSLstd450FClamp,
+                  fixed16_flushed[eM_index], const_float_vectors_minus_1[3],
+                  const_float4_1_),
+              const_snorm16_max_value);
+        });
+      }
+      if_norm.makeEndIf();
+      // All phi instructions must be in the beginning of the block.
+      for_each_eM([&](uint32_t eM_index) {
+        fixed16_signed[eM_index] = if_norm.createMergePhi(
+            fixed16_norm[eM_index], fixed16_flushed[eM_index]);
+      });
+      // Convert to signed integer, adding plus/minus 0.5 before truncating
+      // according to the Direct3D format conversion rules.
+      for_each_eM([&](uint32_t eM_index) {
+        fixed16_signed[eM_index] = builder_->createUnaryOp(
+            spv::OpBitcast, type_uint4_,
+            builder_->createUnaryOp(
+                spv::OpConvertFToS, type_int4_,
+                builder_->createNoContractionBinOp(
+                    spv::OpFAdd, type_float4_, fixed16_signed[eM_index],
+                    builder_->createTriOp(
+                        spv::OpSelect, type_float4_,
+                        builder_->createBinOp(spv::OpFOrdLessThan, type_bool4_,
+                                              fixed16_signed[eM_index],
+                                              const_float4_0_),
+                        const_float_vectors_minus_0_5[3],
+                        const_float_vectors_0_5[3]))));
+      });
+    }
+    if_signed.makeBeginElse();
+    EMIdArray fixed16_unsigned;
+    {
+      // Unsigned.
+      SpirvBuilder::IfBuilder if_norm(
+          is_norm, spv::SelectionControlDontFlattenMask, *builder_);
+      EMIdArray fixed16_norm;
+      {
+        // Unsigned normalized.
+        id_vector_temp_.clear();
+        id_vector_temp_.resize(
+            4, builder_->makeFloatConstant(float((uint32_t(1) << 16) - 1)));
+        spv::Id const_unorm16_max_value =
+            builder_->makeCompositeConstant(type_float4_, id_vector_temp_);
+        for_each_eM([&](uint32_t eM_index) {
+          fixed16_norm[eM_index] = builder_->createNoContractionBinOp(
+              spv::OpFMul, type_float4_,
+              builder_->createTriBuiltinCall(
+                  type_float4_, ext_inst_glsl_std_450_, GLSLstd450FClamp,
+                  fixed16_flushed[eM_index], const_float4_0_, const_float4_1_),
+              const_unorm16_max_value);
+        });
+      }
+      if_norm.makeEndIf();
+      // All phi instructions must be in the beginning of the block.
+      for_each_eM([&](uint32_t eM_index) {
+        fixed16_unsigned[eM_index] = if_norm.createMergePhi(
+            fixed16_norm[eM_index], fixed16_flushed[eM_index]);
+      });
+      // Convert to unsigned integer, adding 0.5 before truncating according to
+      // the Direct3D format conversion rules.
+      for_each_eM([&](uint32_t eM_index) {
+        fixed16_unsigned[eM_index] = builder_->createUnaryOp(
+            spv::OpConvertFToU, type_uint4_,
+            builder_->createNoContractionBinOp(spv::OpFAdd, type_float4_,
+                                               fixed16_unsigned[eM_index],
+                                               const_float_vectors_0_5[3]));
+      });
+    }
+    if_signed.makeEndIf();
+    EMIdArray fixed16_unpacked;
+    for_each_eM([&](uint32_t eM_index) {
+      fixed16_unpacked[eM_index] = if_signed.createMergePhi(
+          fixed16_signed[eM_index], fixed16_unsigned[eM_index]);
+    });
+
+    // Pack into two 32-bit values, and pad to a 4-component vector for the phi.
+    EMIdArray fixed16_packed;
+    spv::Id const_uint_16 = builder_->makeUintConstant(16);
+    for_each_eM([&](uint32_t eM_index) {
+      spv::Id fixed16_element_unpacked = fixed16_unpacked[eM_index];
+      id_vector_temp_.clear();
+      for (uint32_t component_index = 0; component_index < 2;
+           ++component_index) {
+        id_vector_temp_.push_back(builder_->createQuadOp(
+            spv::OpBitFieldInsert, type_uint_,
+            builder_->createCompositeExtract(fixed16_element_unpacked,
+                                             type_uint_, 2 * component_index),
+            builder_->createCompositeExtract(
+                fixed16_element_unpacked, type_uint_, 2 * component_index + 1),
+            const_uint_16, const_uint_16));
+      }
+      for (uint32_t component_index = 2; component_index < 4;
+           ++component_index) {
+        id_vector_temp_.push_back(const_uint_0_);
+      }
+      fixed16_packed[eM_index] =
+          builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
+    });
+
+    add_format_case(fixed16_packed, 3);
+  }
+
+  // TODO(Triang3l): Use the extended range float16 conversion.
+
+  // k_16_FLOAT
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_16_FLOAT));
+  {
+    EMIdArray format_packed_16_float;
+    for_each_eM([&](uint32_t eM_index) {
+      id_vector_temp_.clear();
+      id_vector_temp_.push_back(builder_->createCompositeExtract(
+          eM_swapped[eM_index], type_float_, 0));
+      id_vector_temp_.push_back(const_float_0_);
+      spv::Id format_packed_16_float_x = builder_->createUnaryBuiltinCall(
+          type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
+          builder_->createCompositeConstruct(type_float2_, id_vector_temp_));
+      id_vector_temp_.clear();
+      id_vector_temp_.resize(4, const_uint_0_);
+      id_vector_temp_.front() = format_packed_16_float_x;
+      format_packed_16_float[eM_index] =
+          builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
+    });
+    add_format_case(format_packed_16_float, 1);
+  }
+
+  // k_16_16_FLOAT
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_16_16_FLOAT));
+  {
+    EMIdArray format_packed_16_16_float;
+    for_each_eM([&](uint32_t eM_index) {
+      uint_vector_temp_.clear();
+      uint_vector_temp_.push_back(0);
+      uint_vector_temp_.push_back(1);
+      spv::Id format_packed_16_16_float_xy = builder_->createUnaryBuiltinCall(
+          type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
+          builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
+                                        eM_swapped[eM_index],
+                                        uint_vector_temp_));
+      id_vector_temp_.clear();
+      id_vector_temp_.resize(4, const_uint_0_);
+      id_vector_temp_.front() = format_packed_16_16_float_xy;
+      format_packed_16_16_float[eM_index] =
+          builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
+    });
+    add_format_case(format_packed_16_16_float, 2);
+  }
+
+  // k_16_16_16_16_FLOAT
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_16_16_16_16_FLOAT));
+  {
+    EMIdArray format_packed_16_16_16_16_float;
+    for_each_eM([&](uint32_t eM_index) {
+      spv::Id format_packed_16_16_16_16_float_xy_zw[2];
+      for (uint32_t component_index = 0; component_index < 2;
+           ++component_index) {
+        uint_vector_temp_.clear();
+        uint_vector_temp_.push_back(2 * component_index);
+        uint_vector_temp_.push_back(2 * component_index + 1);
+        format_packed_16_16_16_16_float_xy_zw[component_index] =
+            builder_->createUnaryBuiltinCall(
+                type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
+                builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
+                                              eM_swapped[eM_index],
+                                              uint_vector_temp_));
+      }
+      id_vector_temp_.clear();
+      id_vector_temp_.push_back(format_packed_16_16_16_16_float_xy_zw[0]);
+      id_vector_temp_.push_back(format_packed_16_16_16_16_float_xy_zw[1]);
+      id_vector_temp_.push_back(const_uint_0_);
+      id_vector_temp_.push_back(const_uint_0_);
+      format_packed_16_16_16_16_float[eM_index] =
+          builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
+    });
+    add_format_case(format_packed_16_16_16_16_float, 3);
+  }
+
+  // k_32_FLOAT
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_32_FLOAT));
+  {
+    EMIdArray format_packed_32_float;
+    for_each_eM([&](uint32_t eM_index) {
+      format_packed_32_float[eM_index] = builder_->createUnaryOp(
+          spv::OpBitcast, type_uint4_, eM_swapped[eM_index]);
+    });
+    add_format_case(format_packed_32_float, 2);
+  }
+
+  // k_32_32_FLOAT
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_32_32_FLOAT));
+  {
+    EMIdArray format_packed_32_32_float;
+    for_each_eM([&](uint32_t eM_index) {
+      format_packed_32_32_float[eM_index] = builder_->createUnaryOp(
+          spv::OpBitcast, type_uint4_, eM_swapped[eM_index]);
+    });
+    add_format_case(format_packed_32_32_float, 3);
+  }
+
+  // k_32_32_32_32_FLOAT
+  format_switch.makeBeginCase(
+      static_cast<unsigned int>(xenos::ColorFormat::k_32_32_32_32_FLOAT));
+  {
+    EMIdArray format_packed_32_32_32_32_float;
+    for_each_eM([&](uint32_t eM_index) {
+      format_packed_32_32_32_32_float[eM_index] = builder_->createUnaryOp(
+          spv::OpBitcast, type_uint4_, eM_swapped[eM_index]);
+    });
+    add_format_case(format_packed_32_32_32_32_float, 4);
+  }
+
+  format_switch.makeEndSwitch();
+
+  // Select the result and the element size based on the format.
+  // Phi must be the first instructions in a block.
+  EMIdArray eM_packed;
+  for_each_eM([&](uint32_t eM_index) {
+    auto eM_packed_phi = std::make_unique<spv::Instruction>(
+        builder_->getUniqueId(), type_uint4_, spv::OpPhi);
+    // Default case for an invalid format.
+    eM_packed_phi->addIdOperand(const_uint4_0_);
+    eM_packed_phi->addIdOperand(format_switch.getDefaultPhiParent());
+    for (const FormatCase& format_case : format_cases) {
+      eM_packed_phi->addIdOperand(format_case.eM_packed[eM_index]);
+      eM_packed_phi->addIdOperand(format_case.phi_parent);
+    }
+    eM_packed[eM_index] = eM_packed_phi->getResultId();
+    builder_->getBuildPoint()->addInstruction(std::move(eM_packed_phi));
+  });
+  spv::Id element_bytes_log2;
+  {
+    auto element_bytes_log2_phi = std::make_unique<spv::Instruction>(
+        builder_->getUniqueId(), type_uint_, spv::OpPhi);
+    // Default case for an invalid format (doesn't enter any element size
+    // conditional, skipped).
+    element_bytes_log2_phi->addIdOperand(builder_->makeUintConstant(5));
+    element_bytes_log2_phi->addIdOperand(format_switch.getDefaultPhiParent());
+    for (const FormatCase& format_case : format_cases) {
+      element_bytes_log2_phi->addIdOperand(
+          builder_->makeUintConstant(format_case.element_bytes_log2));
+      element_bytes_log2_phi->addIdOperand(format_case.phi_parent);
+    }
+    element_bytes_log2 = element_bytes_log2_phi->getResultId();
+    builder_->getBuildPoint()->addInstruction(
+        std::move(element_bytes_log2_phi));
+  }
+
+  // Endian-swap.
+  spv::Id endian =
+      builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, format_info,
+                            const_uint_0_, builder_->makeUintConstant(3));
+  for_each_eM([&](uint32_t eM_index) {
+    eM_packed[eM_index] = EndianSwap128Uint4(eM_packed[eM_index], endian);
+  });
+
+  // Load the index of eM0 in the stream.
+  spv::Id eM0_index = builder_->createTriOp(
+      spv::OpBitFieldUExtract, type_uint_,
+      builder_->createCompositeExtract(eA_vector, type_uint_, 1), const_uint_0_,
+      builder_->makeUintConstant(23));
+
+  // Check how many elements starting from eM0 are within the bounds of the
+  // stream, and from the eM# that were written, exclude the out-of-bounds ones.
+  // The index can't be negative, and the index and the count are limited to 23
+  // bits, so it's safe to use 32-bit signed subtraction and clamping to get the
+  // remaining eM# count.
+  spv::Id eM_indices_to_store = builder_->createTriOp(
+      spv::OpBitFieldUExtract, type_uint_,
+      builder_->createLoad(var_main_memexport_data_written_, spv::NoPrecision),
+      const_uint_0_,
+      builder_->createUnaryOp(
+          spv::OpBitcast, type_uint_,
+          builder_->createTriBuiltinCall(
+              type_int_, ext_inst_glsl_std_450_, GLSLstd450SClamp,
+              builder_->createBinOp(
+                  spv::OpISub, type_int_,
+                  builder_->createUnaryOp(
+                      spv::OpBitcast, type_int_,
+                      builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_,
+                                            builder_->createCompositeExtract(
+                                                eA_vector, type_uint_, 3),
+                                            const_uint_0_,
+                                            builder_->makeUintConstant(23))),
+                  builder_->createUnaryOp(spv::OpBitcast, type_int_,
+                                          eM0_index)),
+              const_int_0_,
+              builder_->makeIntConstant(ucode::kMaxMemExportElementCount))));
+
+  // Get the eM0 address in bytes.
+  // Left-shift the stream base address by 2 to both convert it from dwords to
+  // bytes and drop the upper bits.
+  spv::Id const_uint_2 = builder_->makeUintConstant(2);
+  spv::Id eM0_address_bytes = builder_->createBinOp(
+      spv::OpIAdd, type_uint_,
+      builder_->createBinOp(
+          spv::OpShiftLeftLogical, type_uint_,
+          builder_->createCompositeExtract(eA_vector, type_uint_, 0),
+          const_uint_2),
+      builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_, eM0_index,
+                            element_bytes_log2));
+
+  // Store based on the element size.
+  auto store_needed_eM = [&](std::function<void(uint32_t eM_index)> fn) {
+    for_each_eM([&](uint32_t eM_index) {
+      SpirvBuilder::IfBuilder if_eM_needed(
+          builder_->createBinOp(
+              spv::OpINotEqual, type_bool_,
+              builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
+                                    eM_indices_to_store,
+                                    builder_->makeUintConstant(1u << eM_index)),
+              const_uint_0_),
+          spv::SelectionControlDontFlattenMask, *builder_, 2, 1);
+      fn(eM_index);
+      if_eM_needed.makeEndIf();
+    });
+  };
+  SpirvBuilder::SwitchBuilder element_size_switch(
+      element_bytes_log2, spv::SelectionControlDontFlattenMask, *builder_);
+  element_size_switch.makeBeginCase(0);
+  {
+    store_needed_eM([&](uint32_t eM_index) {
+      spv::Id element_address_bytes =
+          eM_index != 0 ? builder_->createBinOp(
+                              spv::OpIAdd, type_uint_, eM0_address_bytes,
+                              builder_->makeUintConstant(eM_index))
+                        : eM0_address_bytes;
+      // replace_shift = 8 * (element_address_bytes & 3)
+      spv::Id replace_shift = builder_->createQuadOp(
+          spv::OpBitFieldInsert, type_uint_, const_uint_0_,
+          element_address_bytes, builder_->makeUintConstant(3), const_uint_2);
+      StoreUint32ToSharedMemory(
+          builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_,
+                                builder_->createCompositeExtract(
+                                    eM_packed[eM_index], type_uint_, 0),
+                                replace_shift),
+          builder_->createUnaryOp(
+              spv::OpBitcast, type_int_,
+              builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
+                                    element_address_bytes, const_uint_2)),
+          builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_,
+                                builder_->makeUintConstant(0xFFu),
+                                replace_shift));
+    });
+  }
+  element_size_switch.makeBeginCase(1);
+  {
+    spv::Id const_uint_1 = builder_->makeUintConstant(1);
+    spv::Id eM0_address_words = builder_->createBinOp(
+        spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_1);
+    store_needed_eM([&](uint32_t eM_index) {
+      spv::Id element_address_words =
+          eM_index != 0 ? builder_->createBinOp(
+                              spv::OpIAdd, type_uint_, eM0_address_words,
+                              builder_->makeUintConstant(eM_index))
+                        : eM0_address_words;
+      // replace_shift = 16 * (element_address_words & 1)
+      spv::Id replace_shift = builder_->createQuadOp(
+          spv::OpBitFieldInsert, type_uint_, const_uint_0_,
+          element_address_words, builder_->makeUintConstant(4), const_uint_1);
+      StoreUint32ToSharedMemory(
+          builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_,
+                                builder_->createCompositeExtract(
+                                    eM_packed[eM_index], type_uint_, 0),
+                                replace_shift),
+          builder_->createUnaryOp(
+              spv::OpBitcast, type_int_,
+              builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
+                                    element_address_words, const_uint_1)),
+          builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_,
+                                builder_->makeUintConstant(0xFFFFu),
+                                replace_shift));
+    });
+  }
+  element_size_switch.makeBeginCase(2);
+  {
+    spv::Id eM0_address_dwords = builder_->createBinOp(
+        spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_2);
+    store_needed_eM([&](uint32_t eM_index) {
+      StoreUint32ToSharedMemory(
+          builder_->createCompositeExtract(eM_packed[eM_index], type_uint_, 0),
+          builder_->createUnaryOp(
+              spv::OpBitcast, type_int_,
+              eM_index != 0 ? builder_->createBinOp(
+                                  spv::OpIAdd, type_uint_, eM0_address_dwords,
+                                  builder_->makeUintConstant(eM_index))
+                            : eM0_address_dwords));
+    });
+  }
+  element_size_switch.makeBeginCase(3);
+  {
+    spv::Id eM0_address_dwords = builder_->createBinOp(
+        spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_2);
+    store_needed_eM([&](uint32_t eM_index) {
+      spv::Id element_value = eM_packed[eM_index];
+      spv::Id element_address_dwords_int = builder_->createUnaryOp(
+          spv::OpBitcast, type_int_,
+          eM_index != 0 ? builder_->createBinOp(
+                              spv::OpIAdd, type_uint_, eM0_address_dwords,
+                              builder_->makeUintConstant(2 * eM_index))
+                        : eM0_address_dwords);
+      StoreUint32ToSharedMemory(
+          builder_->createCompositeExtract(element_value, type_uint_, 0),
+          element_address_dwords_int);
+      StoreUint32ToSharedMemory(
+          builder_->createCompositeExtract(element_value, type_uint_, 1),
+          builder_->createBinOp(spv::OpIAdd, type_int_,
+                                element_address_dwords_int,
+                                builder_->makeIntConstant(1)));
+    });
+  }
+  element_size_switch.makeBeginCase(4);
+  {
+    spv::Id eM0_address_dwords = builder_->createBinOp(
+        spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_2);
+    store_needed_eM([&](uint32_t eM_index) {
+      spv::Id element_value = eM_packed[eM_index];
+      spv::Id element_address_dwords_int = builder_->createUnaryOp(
+          spv::OpBitcast, type_int_,
+          eM_index != 0 ? builder_->createBinOp(
+                              spv::OpIAdd, type_uint_, eM0_address_dwords,
+                              builder_->makeUintConstant(4 * eM_index))
+                        : eM0_address_dwords);
+      StoreUint32ToSharedMemory(
+          builder_->createCompositeExtract(element_value, type_uint_, 0),
+          element_address_dwords_int);
+      for (uint32_t element_dword_index = 1; element_dword_index < 4;
+           ++element_dword_index) {
+        StoreUint32ToSharedMemory(
+            builder_->createCompositeExtract(element_value, type_uint_,
+                                             element_dword_index),
+            builder_->createBinOp(spv::OpIAdd, type_int_,
+                                  element_address_dwords_int,
+                                  builder_->makeIntConstant(
+                                      static_cast<int>(element_dword_index))));
+      }
+    });
+  }
+  element_size_switch.makeEndSwitch();
+
+  // Close the conditionals for whether memory export is allowed in this
+  // invocation.
+  if_address_valid.makeEndIf();
+  if (if_pixel_not_killed.has_value()) {
+    if_pixel_not_killed->makeEndIf();
+  }
+  if (if_memexport_allowed.has_value()) {
+    if_memexport_allowed->makeEndIf();
+  }
+}
+
+}  // namespace gpu
+}  // namespace xe
diff --git a/src/xenia/gpu/spirv_shader_translator_rb.cc b/src/xenia/gpu/spirv_shader_translator_rb.cc
index 65a01209d..e19fdd540 100644
--- a/src/xenia/gpu/spirv_shader_translator_rb.cc
+++ b/src/xenia/gpu/spirv_shader_translator_rb.cc
@@ -457,22 +457,14 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
         // Kill the pixel once the guest control flow and derivatives are not
         // needed anymore.
         assert_true(var_main_kill_pixel_ != spv::NoResult);
-        // Load the condition before the OpSelectionMerge, which must be the
-        // penultimate instruction.
-        spv::Id kill_pixel =
-            builder_->createLoad(var_main_kill_pixel_, spv::NoPrecision);
-        spv::Block& block_kill = builder_->makeNewBlock();
-        spv::Block& block_kill_merge = builder_->makeNewBlock();
-        builder_->createSelectionMerge(&block_kill_merge,
-                                       spv::SelectionControlDontFlattenMask);
-        builder_->createConditionalBranch(kill_pixel, &block_kill,
-                                          &block_kill_merge);
-        builder_->setBuildPoint(&block_kill);
+        SpirvBuilder::IfBuilder kill_pixel_if(
+            builder_->createLoad(var_main_kill_pixel_, spv::NoPrecision),
+            spv::SelectionControlMaskNone, *builder_);
         // TODO(Triang3l): Use OpTerminateInvocation when SPIR-V 1.6 is
         // targeted.
         builder_->createNoResultOp(spv::OpKill);
         // OpKill terminates the block.
-        builder_->setBuildPoint(&block_kill_merge);
+        kill_pixel_if.makeEndIf(false);
       }
     }
   }
@@ -533,17 +525,11 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
         builder_->makeUintConstant(3));
     // Check if the comparison function is not "always" - that should pass even
     // for NaN likely, unlike "less, equal or greater".
-    spv::Id alpha_test_function_is_non_always = builder_->createBinOp(
-        spv::OpINotEqual, type_bool_, alpha_test_function,
-        builder_->makeUintConstant(uint32_t(xenos::CompareFunction::kAlways)));
-    spv::Block& block_alpha_test = builder_->makeNewBlock();
-    spv::Block& block_alpha_test_merge = builder_->makeNewBlock();
-    builder_->createSelectionMerge(&block_alpha_test_merge,
-                                   spv::SelectionControlDontFlattenMask);
-    builder_->createConditionalBranch(alpha_test_function_is_non_always,
-                                      &block_alpha_test,
-                                      &block_alpha_test_merge);
-    builder_->setBuildPoint(&block_alpha_test);
+    SpirvBuilder::IfBuilder if_alpha_test_function_is_non_always(
+        builder_->createBinOp(spv::OpINotEqual, type_bool_, alpha_test_function,
+                              builder_->makeUintConstant(
+                                  uint32_t(xenos::CompareFunction::kAlways))),
+        spv::SelectionControlDontFlattenMask, *builder_);
     {
       id_vector_temp_.clear();
       id_vector_temp_.push_back(builder_->makeIntConstant(3));
@@ -564,28 +550,20 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
       // The comparison function is not "always" - perform the alpha test.
       // Handle "not equal" specially (specifically as "not equal" so it's true
       // for NaN, not "less or greater" which is false for NaN).
-      spv::Id alpha_test_function_is_not_equal = builder_->createBinOp(
-          spv::OpIEqual, type_bool_, alpha_test_function,
-          builder_->makeUintConstant(
-              uint32_t(xenos::CompareFunction::kNotEqual)));
-      spv::Block& block_alpha_test_not_equal = builder_->makeNewBlock();
-      spv::Block& block_alpha_test_non_not_equal = builder_->makeNewBlock();
-      spv::Block& block_alpha_test_not_equal_merge = builder_->makeNewBlock();
-      builder_->createSelectionMerge(&block_alpha_test_not_equal_merge,
-                                     spv::SelectionControlDontFlattenMask);
-      builder_->createConditionalBranch(alpha_test_function_is_not_equal,
-                                        &block_alpha_test_not_equal,
-                                        &block_alpha_test_non_not_equal);
-      spv::Id alpha_test_result_not_equal, alpha_test_result_non_not_equal;
-      builder_->setBuildPoint(&block_alpha_test_not_equal);
+      SpirvBuilder::IfBuilder if_alpha_test_function_is_not_equal(
+          builder_->createBinOp(spv::OpIEqual, type_bool_, alpha_test_function,
+                                builder_->makeUintConstant(uint32_t(
+                                    xenos::CompareFunction::kNotEqual))),
+          spv::SelectionControlDontFlattenMask, *builder_, 1, 2);
+      spv::Id alpha_test_result_not_equal;
       {
         // "Not equal" function.
         alpha_test_result_not_equal =
             builder_->createBinOp(spv::OpFUnordNotEqual, type_bool_,
                                   alpha_test_alpha, alpha_test_reference);
-        builder_->createBranch(&block_alpha_test_not_equal_merge);
       }
-      builder_->setBuildPoint(&block_alpha_test_non_not_equal);
+      if_alpha_test_function_is_not_equal.makeBeginElse();
+      spv::Id alpha_test_result_non_not_equal;
       {
         // Function other than "not equal".
         static const spv::Op kAlphaTestOps[] = {
@@ -609,16 +587,11 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
             alpha_test_result_non_not_equal = alpha_test_comparison_result;
           }
         }
-        builder_->createBranch(&block_alpha_test_not_equal_merge);
       }
-      builder_->setBuildPoint(&block_alpha_test_not_equal_merge);
-      id_vector_temp_.clear();
-      id_vector_temp_.push_back(alpha_test_result_not_equal);
-      id_vector_temp_.push_back(block_alpha_test_not_equal.getId());
-      id_vector_temp_.push_back(alpha_test_result_non_not_equal);
-      id_vector_temp_.push_back(block_alpha_test_non_not_equal.getId());
+      if_alpha_test_function_is_not_equal.makeEndIf();
       spv::Id alpha_test_result =
-          builder_->createOp(spv::OpPhi, type_bool_, id_vector_temp_);
+          if_alpha_test_function_is_not_equal.createMergePhi(
+              alpha_test_result_not_equal, alpha_test_result_non_not_equal);
       // Discard the pixel if the alpha test has failed.
       if (edram_fragment_shader_interlock_ &&
           !features_.demote_to_helper_invocation) {
@@ -627,16 +600,11 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
             spv::OpSelect, type_uint_, alpha_test_result,
             fsi_sample_mask_in_rt_0_alpha_tests, const_uint_0_);
       } else {
-        // Creating a merge block even though it will contain just one OpBranch
-        // since SPIR-V requires structured control flow in shaders.
-        spv::Block& block_alpha_test_kill = builder_->makeNewBlock();
-        spv::Block& block_alpha_test_kill_merge = builder_->makeNewBlock();
-        builder_->createSelectionMerge(&block_alpha_test_kill_merge,
-                                       spv::SelectionControlDontFlattenMask);
-        builder_->createConditionalBranch(alpha_test_result,
-                                          &block_alpha_test_kill_merge,
-                                          &block_alpha_test_kill);
-        builder_->setBuildPoint(&block_alpha_test_kill);
+        SpirvBuilder::IfBuilder alpha_test_kill_if(
+            builder_->createUnaryOp(spv::OpLogicalNot, type_bool_,
+                                    alpha_test_result),
+            spv::SelectionControlDontFlattenMask, *builder_);
+        bool branch_to_alpha_test_kill_merge = true;
         if (edram_fragment_shader_interlock_) {
           assert_true(features_.demote_to_helper_invocation);
           fsi_pixel_potentially_killed = true;
@@ -645,18 +613,17 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
           builder_->addExtension("SPV_EXT_demote_to_helper_invocation");
           builder_->addCapability(spv::CapabilityDemoteToHelperInvocationEXT);
           builder_->createNoResultOp(spv::OpDemoteToHelperInvocationEXT);
-          builder_->createBranch(&block_alpha_test_kill_merge);
         } else {
           // TODO(Triang3l): Use OpTerminateInvocation when SPIR-V 1.6 is
           // targeted.
           builder_->createNoResultOp(spv::OpKill);
           // OpKill terminates the block.
+          branch_to_alpha_test_kill_merge = false;
         }
-        builder_->setBuildPoint(&block_alpha_test_kill_merge);
-        builder_->createBranch(&block_alpha_test_merge);
+        alpha_test_kill_if.makeEndIf(branch_to_alpha_test_kill_merge);
       }
     }
-    builder_->setBuildPoint(&block_alpha_test_merge);
+    if_alpha_test_function_is_non_always.makeEndIf();
 
     // TODO(Triang3l): Alpha to coverage.
 
@@ -725,18 +692,9 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
                 spv::OpBitwiseAnd, type_uint_, main_fsi_sample_mask_,
                 builder_->makeUintConstant(uint32_t(1) << (4 + i))),
             const_uint_0_);
-        spv::Block& block_sample_late_depth_stencil_write =
-            builder_->makeNewBlock();
-        spv::Block& block_sample_late_depth_stencil_write_merge =
-            builder_->makeNewBlock();
-        builder_->createSelectionMerge(
-            &block_sample_late_depth_stencil_write_merge,
-            spv::SelectionControlDontFlattenMask);
-        builder_->createConditionalBranch(
+        SpirvBuilder::IfBuilder if_sample_late_depth_stencil_write_needed(
             sample_late_depth_stencil_write_needed,
-            &block_sample_late_depth_stencil_write,
-            &block_sample_late_depth_stencil_write_merge);
-        builder_->setBuildPoint(&block_sample_late_depth_stencil_write);
+            spv::SelectionControlDontFlattenMask, *builder_);
         spv::Id depth_stencil_sample_address =
             FSI_AddSampleOffset(main_fsi_address_depth_, i);
         id_vector_temp_.clear();
@@ -749,8 +707,7 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
                                             ? spv::StorageClassStorageBuffer
                                             : spv::StorageClassUniform,
                                         buffer_edram_, id_vector_temp_));
-        builder_->createBranch(&block_sample_late_depth_stencil_write_merge);
-        builder_->setBuildPoint(&block_sample_late_depth_stencil_write_merge);
+        if_sample_late_depth_stencil_write_needed.makeEndIf();
       }
       if (color_targets_written) {
         // Only take the remaining coverage bits, not the late depth / stencil
@@ -852,28 +809,10 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
                 spv::OpBitwiseAnd, type_uint_, fsi_color_targets_written,
                 builder_->makeUintConstant(uint32_t(1) << color_target_index)),
             const_uint_0_);
-        spv::Block& fsi_color_written_if_head = *builder_->getBuildPoint();
-        spv::Block& fsi_color_written_if = builder_->makeNewBlock();
-        spv::Block& fsi_color_written_if_merge = builder_->makeNewBlock();
-        builder_->createSelectionMerge(&fsi_color_written_if_merge,
-                                       spv::SelectionControlDontFlattenMask);
-        {
-          std::unique_ptr<spv::Instruction> rt_written_branch_conditional_op =
-              std::make_unique<spv::Instruction>(spv::OpBranchConditional);
-          rt_written_branch_conditional_op->addIdOperand(fsi_color_written);
-          rt_written_branch_conditional_op->addIdOperand(
-              fsi_color_written_if.getId());
-          rt_written_branch_conditional_op->addIdOperand(
-              fsi_color_written_if_merge.getId());
-          // More likely to write to the render target than not.
-          rt_written_branch_conditional_op->addImmediateOperand(2);
-          rt_written_branch_conditional_op->addImmediateOperand(1);
-          builder_->getBuildPoint()->addInstruction(
-              std::move(rt_written_branch_conditional_op));
-        }
-        fsi_color_written_if.addPredecessor(&fsi_color_written_if_head);
-        fsi_color_written_if_merge.addPredecessor(&fsi_color_written_if_head);
-        builder_->setBuildPoint(&fsi_color_written_if);
+        // More likely to write to the render target than not.
+        SpirvBuilder::IfBuilder if_fsi_color_written(
+            fsi_color_written, spv::SelectionControlDontFlattenMask, *builder_,
+            2, 1);
 
         // For accessing uint2 arrays of per-render-target data which are passed
         // as uint4 arrays due to std140 array element alignment.
@@ -914,14 +853,9 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
                                   const_uint32_max),
             builder_->createBinOp(spv::OpINotEqual, type_bool_, rt_keep_mask[1],
                                   const_uint32_max));
-        spv::Block& rt_write_mask_not_empty_if = builder_->makeNewBlock();
-        spv::Block& rt_write_mask_not_empty_if_merge = builder_->makeNewBlock();
-        builder_->createSelectionMerge(&rt_write_mask_not_empty_if_merge,
-                                       spv::SelectionControlDontFlattenMask);
-        builder_->createConditionalBranch(rt_write_mask_not_empty,
-                                          &rt_write_mask_not_empty_if,
-                                          &rt_write_mask_not_empty_if_merge);
-        builder_->setBuildPoint(&rt_write_mask_not_empty_if);
+        SpirvBuilder::IfBuilder if_rt_write_mask_not_empty(
+            rt_write_mask_not_empty, spv::SelectionControlDontFlattenMask,
+            *builder_);
 
         spv::Id const_int_rt_index =
             builder_->makeIntConstant(color_target_index);
@@ -982,17 +916,10 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
         spv::Id rt_blend_enabled = builder_->createBinOp(
             spv::OpINotEqual, type_bool_, rt_blend_factors_equations,
             builder_->makeUintConstant(0x00010001));
-        spv::Block& rt_blend_enabled_if = builder_->makeNewBlock();
-        spv::Block& rt_blend_enabled_else = builder_->makeNewBlock();
-        spv::Block& rt_blend_enabled_merge = builder_->makeNewBlock();
-        builder_->createSelectionMerge(&rt_blend_enabled_merge,
-                                       spv::SelectionControlDontFlattenMask);
-        builder_->createConditionalBranch(
-            rt_blend_enabled, &rt_blend_enabled_if, &rt_blend_enabled_else);
-
-        // Blending path.
+        SpirvBuilder::IfBuilder if_rt_blend_enabled(
+            rt_blend_enabled, spv::SelectionControlDontFlattenMask, *builder_);
         {
-          builder_->setBuildPoint(&rt_blend_enabled_if);
+          // Blending path.
 
           // Get various parameters used in blending.
           spv::Id rt_color_is_fixed_point = builder_->createBinOp(
@@ -1097,15 +1024,9 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
 
           // Blend and mask each sample.
           for (uint32_t i = 0; i < 4; ++i) {
-            spv::Block& block_sample_covered = builder_->makeNewBlock();
-            spv::Block& block_sample_covered_merge = builder_->makeNewBlock();
-            builder_->createSelectionMerge(
-                &block_sample_covered_merge,
-                spv::SelectionControlDontFlattenMask);
-            builder_->createConditionalBranch(fsi_samples_covered[i],
-                                              &block_sample_covered,
-                                              &block_sample_covered_merge);
-            builder_->setBuildPoint(&block_sample_covered);
+            SpirvBuilder::IfBuilder if_sample_covered(
+                fsi_samples_covered[i], spv::SelectionControlDontFlattenMask,
+                *builder_);
 
             spv::Id rt_sample_address =
                 FSI_AddSampleOffset(rt_sample_0_address, i, rt_is_64bpp);
@@ -1131,26 +1052,13 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
             dest_packed[0] =
                 builder_->createLoad(rt_access_chain_0, spv::NoPrecision);
             {
-              spv::Block& block_load_64bpp_head = *builder_->getBuildPoint();
-              spv::Block& block_load_64bpp = builder_->makeNewBlock();
-              spv::Block& block_load_64bpp_merge = builder_->makeNewBlock();
-              builder_->createSelectionMerge(
-                  &block_load_64bpp_merge,
-                  spv::SelectionControlDontFlattenMask);
-              builder_->createConditionalBranch(rt_is_64bpp, &block_load_64bpp,
-                                                &block_load_64bpp_merge);
-              builder_->setBuildPoint(&block_load_64bpp);
+              SpirvBuilder::IfBuilder if_64bpp(
+                  rt_is_64bpp, spv::SelectionControlDontFlattenMask, *builder_);
               spv::Id dest_packed_64bpp_high =
                   builder_->createLoad(rt_access_chain_1, spv::NoPrecision);
-              builder_->createBranch(&block_load_64bpp_merge);
-              builder_->setBuildPoint(&block_load_64bpp_merge);
-              id_vector_temp_.clear();
-              id_vector_temp_.push_back(dest_packed_64bpp_high);
-              id_vector_temp_.push_back(block_load_64bpp.getId());
-              id_vector_temp_.push_back(const_uint_0_);
-              id_vector_temp_.push_back(block_load_64bpp_head.getId());
-              dest_packed[1] =
-                  builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_);
+              if_64bpp.makeEndIf();
+              dest_packed[1] = if_64bpp.createMergePhi(dest_packed_64bpp_high,
+                                                       const_uint_0_);
             }
             std::array<spv::Id, 4> dest_unpacked =
                 FSI_UnpackColor(dest_packed, rt_format_with_flags);
@@ -1203,35 +1111,27 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
                                           result_packed[0],
                                           rt_replace_mask[0])),
                 rt_access_chain_0);
-            spv::Block& block_store_64bpp = builder_->makeNewBlock();
-            spv::Block& block_store_64bpp_merge = builder_->makeNewBlock();
-            builder_->createSelectionMerge(
-                &block_store_64bpp_merge, spv::SelectionControlDontFlattenMask);
-            builder_->createConditionalBranch(rt_is_64bpp, &block_store_64bpp,
-                                              &block_store_64bpp_merge);
-            builder_->setBuildPoint(&block_store_64bpp);
-            builder_->createStore(
-                builder_->createBinOp(
-                    spv::OpBitwiseOr, type_uint_,
-                    builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
-                                          dest_packed[1], rt_keep_mask[1]),
-                    builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
-                                          result_packed[1],
-                                          rt_replace_mask[1])),
-                rt_access_chain_0);
-            builder_->createBranch(&block_store_64bpp_merge);
-            builder_->setBuildPoint(&block_store_64bpp_merge);
+            SpirvBuilder::IfBuilder if_64bpp(
+                rt_is_64bpp, spv::SelectionControlDontFlattenMask, *builder_);
+            {
+              builder_->createStore(
+                  builder_->createBinOp(
+                      spv::OpBitwiseOr, type_uint_,
+                      builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
+                                            dest_packed[1], rt_keep_mask[1]),
+                      builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
+                                            result_packed[1],
+                                            rt_replace_mask[1])),
+                  rt_access_chain_1);
+            }
+            if_64bpp.makeEndIf();
 
-            builder_->createBranch(&block_sample_covered_merge);
-            builder_->setBuildPoint(&block_sample_covered_merge);
+            if_sample_covered.makeEndIf();
           }
-
-          builder_->createBranch(&rt_blend_enabled_merge);
         }
-
-        // Non-blending paths.
+        if_rt_blend_enabled.makeBeginElse();
         {
-          builder_->setBuildPoint(&rt_blend_enabled_else);
+          // Non-blending paths.
 
           // Pack the new color for all samples.
           std::array<spv::Id, 2> color_packed =
@@ -1244,19 +1144,12 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
                                     rt_keep_mask[0], const_uint_0_),
               builder_->createBinOp(spv::OpINotEqual, type_bool_,
                                     rt_keep_mask[1], const_uint_0_));
-          spv::Block& rt_keep_mask_not_empty_if = builder_->makeNewBlock();
-          spv::Block& rt_keep_mask_not_empty_if_else = builder_->makeNewBlock();
-          spv::Block& rt_keep_mask_not_empty_if_merge =
-              builder_->makeNewBlock();
-          builder_->createSelectionMerge(&rt_keep_mask_not_empty_if_merge,
-                                         spv::SelectionControlDontFlattenMask);
-          builder_->createConditionalBranch(rt_keep_mask_not_empty,
-                                            &rt_keep_mask_not_empty_if,
-                                            &rt_keep_mask_not_empty_if_else);
 
-          // Loading and masking path.
+          SpirvBuilder::IfBuilder if_rt_keep_mask_not_empty(
+              rt_keep_mask_not_empty, spv::SelectionControlDontFlattenMask,
+              *builder_);
           {
-            builder_->setBuildPoint(&rt_keep_mask_not_empty_if);
+            // Loading and masking path.
             std::array<spv::Id, 2> color_packed_masked;
             for (uint32_t i = 0; i < 2; ++i) {
               color_packed_masked[i] = builder_->createBinOp(
@@ -1265,15 +1158,9 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
                                           rt_keep_mask[i]));
             }
             for (uint32_t i = 0; i < 4; ++i) {
-              spv::Block& block_sample_covered = builder_->makeNewBlock();
-              spv::Block& block_sample_covered_merge = builder_->makeNewBlock();
-              builder_->createSelectionMerge(
-                  &block_sample_covered_merge,
-                  spv::SelectionControlDontFlattenMask);
-              builder_->createConditionalBranch(fsi_samples_covered[i],
-                                                &block_sample_covered,
-                                                &block_sample_covered_merge);
-              builder_->setBuildPoint(&block_sample_covered);
+              SpirvBuilder::IfBuilder if_sample_covered(
+                  fsi_samples_covered[i], spv::SelectionControlDontFlattenMask,
+                  *builder_);
               spv::Id rt_sample_address =
                   FSI_AddSampleOffset(rt_sample_0_address, i, rt_is_64bpp);
               id_vector_temp_.clear();
@@ -1295,52 +1182,38 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
                           rt_keep_mask[0]),
                       color_packed_masked[0]),
                   rt_access_chain_0);
-              spv::Block& block_store_64bpp = builder_->makeNewBlock();
-              spv::Block& block_store_64bpp_merge = builder_->makeNewBlock();
-              builder_->createSelectionMerge(
-                  &block_store_64bpp_merge,
-                  spv::SelectionControlDontFlattenMask);
-              builder_->createConditionalBranch(rt_is_64bpp, &block_store_64bpp,
-                                                &block_store_64bpp_merge);
-              builder_->setBuildPoint(&block_store_64bpp);
-              id_vector_temp_.back() = builder_->createBinOp(
-                  spv::OpIAdd, type_int_, rt_sample_address, fsi_const_int_1);
-              spv::Id rt_access_chain_1 = builder_->createAccessChain(
-                  features_.spirv_version >= spv::Spv_1_3
-                      ? spv::StorageClassStorageBuffer
-                      : spv::StorageClassUniform,
-                  buffer_edram_, id_vector_temp_);
-              builder_->createStore(
-                  builder_->createBinOp(
-                      spv::OpBitwiseOr, type_uint_,
-                      builder_->createBinOp(
-                          spv::OpBitwiseAnd, type_uint_,
-                          builder_->createLoad(rt_access_chain_1,
-                                               spv::NoPrecision),
-                          rt_keep_mask[1]),
-                      color_packed_masked[1]),
-                  rt_access_chain_1);
-              builder_->createBranch(&block_store_64bpp_merge);
-              builder_->setBuildPoint(&block_store_64bpp_merge);
-              builder_->createBranch(&block_sample_covered_merge);
-              builder_->setBuildPoint(&block_sample_covered_merge);
+              SpirvBuilder::IfBuilder if_64bpp(
+                  rt_is_64bpp, spv::SelectionControlDontFlattenMask, *builder_);
+              {
+                id_vector_temp_.back() = builder_->createBinOp(
+                    spv::OpIAdd, type_int_, rt_sample_address, fsi_const_int_1);
+                spv::Id rt_access_chain_1 = builder_->createAccessChain(
+                    features_.spirv_version >= spv::Spv_1_3
+                        ? spv::StorageClassStorageBuffer
+                        : spv::StorageClassUniform,
+                    buffer_edram_, id_vector_temp_);
+                builder_->createStore(
+                    builder_->createBinOp(
+                        spv::OpBitwiseOr, type_uint_,
+                        builder_->createBinOp(
+                            spv::OpBitwiseAnd, type_uint_,
+                            builder_->createLoad(rt_access_chain_1,
+                                                 spv::NoPrecision),
+                            rt_keep_mask[1]),
+                        color_packed_masked[1]),
+                    rt_access_chain_1);
+              }
+              if_64bpp.makeEndIf();
+              if_sample_covered.makeEndIf();
             }
-            builder_->createBranch(&rt_keep_mask_not_empty_if_merge);
           }
-
-          // Fully overwriting path.
+          if_rt_keep_mask_not_empty.makeBeginElse();
           {
-            builder_->setBuildPoint(&rt_keep_mask_not_empty_if_else);
+            // Fully overwriting path.
             for (uint32_t i = 0; i < 4; ++i) {
-              spv::Block& block_sample_covered = builder_->makeNewBlock();
-              spv::Block& block_sample_covered_merge = builder_->makeNewBlock();
-              builder_->createSelectionMerge(
-                  &block_sample_covered_merge,
-                  spv::SelectionControlDontFlattenMask);
-              builder_->createConditionalBranch(fsi_samples_covered[i],
-                                                &block_sample_covered,
-                                                &block_sample_covered_merge);
-              builder_->setBuildPoint(&block_sample_covered);
+              SpirvBuilder::IfBuilder if_sample_covered(
+                  fsi_samples_covered[i], spv::SelectionControlDontFlattenMask,
+                  *builder_);
               spv::Id rt_sample_address =
                   FSI_AddSampleOffset(rt_sample_0_address, i, rt_is_64bpp);
               id_vector_temp_.clear();
@@ -1353,40 +1226,29 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
                                             ? spv::StorageClassStorageBuffer
                                             : spv::StorageClassUniform,
                                         buffer_edram_, id_vector_temp_));
-              spv::Block& block_store_64bpp = builder_->makeNewBlock();
-              spv::Block& block_store_64bpp_merge = builder_->makeNewBlock();
-              builder_->createSelectionMerge(
-                  &block_store_64bpp_merge,
-                  spv::SelectionControlDontFlattenMask);
-              builder_->createConditionalBranch(rt_is_64bpp, &block_store_64bpp,
-                                                &block_store_64bpp_merge);
-              builder_->setBuildPoint(&block_store_64bpp);
-              id_vector_temp_.back() = builder_->createBinOp(
-                  spv::OpIAdd, type_int_, id_vector_temp_.back(),
-                  fsi_const_int_1);
-              builder_->createStore(color_packed[1],
-                                    builder_->createAccessChain(
-                                        features_.spirv_version >= spv::Spv_1_3
-                                            ? spv::StorageClassStorageBuffer
-                                            : spv::StorageClassUniform,
-                                        buffer_edram_, id_vector_temp_));
-              builder_->createBranch(&block_store_64bpp_merge);
-              builder_->setBuildPoint(&block_store_64bpp_merge);
-              builder_->createBranch(&block_sample_covered_merge);
-              builder_->setBuildPoint(&block_sample_covered_merge);
+              SpirvBuilder::IfBuilder if_64bpp(
+                  rt_is_64bpp, spv::SelectionControlDontFlattenMask, *builder_);
+              {
+                id_vector_temp_.back() = builder_->createBinOp(
+                    spv::OpIAdd, type_int_, id_vector_temp_.back(),
+                    fsi_const_int_1);
+                builder_->createStore(
+                    color_packed[1], builder_->createAccessChain(
+                                         features_.spirv_version >= spv::Spv_1_3
+                                             ? spv::StorageClassStorageBuffer
+                                             : spv::StorageClassUniform,
+                                         buffer_edram_, id_vector_temp_));
+              }
+              if_64bpp.makeEndIf();
+              if_sample_covered.makeEndIf();
             }
-            builder_->createBranch(&rt_keep_mask_not_empty_if_merge);
           }
-
-          builder_->setBuildPoint(&rt_keep_mask_not_empty_if_merge);
-          builder_->createBranch(&rt_blend_enabled_merge);
+          if_rt_keep_mask_not_empty.makeEndIf();
         }
+        if_rt_blend_enabled.makeEndIf();
 
-        builder_->setBuildPoint(&rt_blend_enabled_merge);
-        builder_->createBranch(&rt_write_mask_not_empty_if_merge);
-        builder_->setBuildPoint(&rt_write_mask_not_empty_if_merge);
-        builder_->createBranch(&fsi_color_written_if_merge);
-        builder_->setBuildPoint(&fsi_color_written_if_merge);
+        if_rt_write_mask_not_empty.makeEndIf();
+        if_fsi_color_written.makeEndIf();
       } else {
         // Convert to gamma space - this is incorrect, since it must be done
         // after blending on the Xbox 360, but this is just one of many blending
@@ -1405,24 +1267,11 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
                 builder_->makeUintConstant(kSysFlag_ConvertColor0ToGamma
                                            << color_target_index)),
             const_uint_0_);
-        spv::Block& block_gamma_head = *builder_->getBuildPoint();
-        spv::Block& block_gamma = builder_->makeNewBlock();
-        spv::Block& block_gamma_merge = builder_->makeNewBlock();
-        builder_->createSelectionMerge(&block_gamma_merge,
-                                       spv::SelectionControlDontFlattenMask);
-        builder_->createConditionalBranch(is_gamma, &block_gamma,
-                                          &block_gamma_merge);
-        builder_->setBuildPoint(&block_gamma);
+        SpirvBuilder::IfBuilder if_gamma(
+            is_gamma, spv::SelectionControlDontFlattenMask, *builder_);
         spv::Id color_rgb_gamma = LinearToPWLGamma(color_rgb, false);
-        builder_->createBranch(&block_gamma_merge);
-        builder_->setBuildPoint(&block_gamma_merge);
-        id_vector_temp_.clear();
-        id_vector_temp_.push_back(color_rgb_gamma);
-        id_vector_temp_.push_back(block_gamma.getId());
-        id_vector_temp_.push_back(color_rgb);
-        id_vector_temp_.push_back(block_gamma_head.getId());
-        color_rgb =
-            builder_->createOp(spv::OpPhi, type_float3_, id_vector_temp_);
+        if_gamma.makeEndIf();
+        color_rgb = if_gamma.createMergePhi(color_rgb_gamma, color_rgb);
         {
           std::unique_ptr<spv::Instruction> color_rgba_shuffle_op =
               std::make_unique<spv::Instruction>(
@@ -1752,15 +1601,8 @@ void SpirvShaderTranslator::FSI_DepthStencilTest(
           spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
           builder_->makeUintConstant(kSysFlag_FSIDepthStencil)),
       const_uint_0_);
-  spv::Block& block_depth_stencil_enabled_head = *builder_->getBuildPoint();
-  spv::Block& block_depth_stencil_enabled = builder_->makeNewBlock();
-  spv::Block& block_depth_stencil_enabled_merge = builder_->makeNewBlock();
-  builder_->createSelectionMerge(&block_depth_stencil_enabled_merge,
-                                 spv::SelectionControlDontFlattenMask);
-  builder_->createConditionalBranch(depth_stencil_enabled,
-                                    &block_depth_stencil_enabled,
-                                    &block_depth_stencil_enabled_merge);
-  builder_->setBuildPoint(&block_depth_stencil_enabled);
+  SpirvBuilder::IfBuilder if_depth_stencil_enabled(
+      depth_stencil_enabled, spv::SelectionControlDontFlattenMask, *builder_);
 
   // Load the depth in the center of the pixel and calculate the derivatives of
   // the depth outside non-uniform control flow.
@@ -1976,14 +1818,8 @@ void SpirvShaderTranslator::FSI_DepthStencilTest(
         builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, new_sample_mask,
                               builder_->makeUintConstant(uint32_t(1) << i)),
         const_uint_0_);
-    spv::Block& block_sample_covered_head = *builder_->getBuildPoint();
-    spv::Block& block_sample_covered = builder_->makeNewBlock();
-    spv::Block& block_sample_covered_merge = builder_->makeNewBlock();
-    builder_->createSelectionMerge(&block_sample_covered_merge,
-                                   spv::SelectionControlDontFlattenMask);
-    builder_->createConditionalBranch(sample_covered, &block_sample_covered,
-                                      &block_sample_covered_merge);
-    builder_->setBuildPoint(&block_sample_covered);
+    SpirvBuilder::IfBuilder if_sample_covered(
+        sample_covered, spv::SelectionControlDontFlattenMask, *builder_);
 
     // Load the original depth and stencil for the sample.
     spv::Id sample_address = FSI_AddSampleOffset(main_fsi_address_depth_, i);
@@ -2074,21 +1910,11 @@ void SpirvShaderTranslator::FSI_DepthStencilTest(
         const_float_0_, const_float_1_);
 
     // Convert the new depth to 24-bit.
-    spv::Block& block_depth_format_float = builder_->makeNewBlock();
-    spv::Block& block_depth_format_unorm = builder_->makeNewBlock();
-    spv::Block& block_depth_format_merge = builder_->makeNewBlock();
-    builder_->createSelectionMerge(&block_depth_format_merge,
-                                   spv::SelectionControlDontFlattenMask);
-    builder_->createConditionalBranch(
-        depth_is_float24, &block_depth_format_float, &block_depth_format_unorm);
-    // Float24 case.
-    builder_->setBuildPoint(&block_depth_format_float);
+    SpirvBuilder::IfBuilder depth_format_if(
+        depth_is_float24, spv::SelectionControlDontFlattenMask, *builder_);
     spv::Id sample_depth_float24 = SpirvShaderTranslator::PreClampedDepthTo20e4(
         *builder_, sample_depth32, true, false, ext_inst_glsl_std_450_);
-    builder_->createBranch(&block_depth_format_merge);
-    spv::Block& block_depth_format_float_end = *builder_->getBuildPoint();
-    // Unorm24 case.
-    builder_->setBuildPoint(&block_depth_format_unorm);
+    depth_format_if.makeBeginElse();
     // Round to the nearest even integer. This seems to be the correct
     // conversion, adding +0.5 and rounding towards zero results in red instead
     // of black in the 4D5307E6 clear shader.
@@ -2099,17 +1925,10 @@ void SpirvShaderTranslator::FSI_DepthStencilTest(
             builder_->createNoContractionBinOp(
                 spv::OpFMul, type_float_, sample_depth32,
                 builder_->makeFloatConstant(float(0xFFFFFF)))));
-    builder_->createBranch(&block_depth_format_merge);
-    spv::Block& block_depth_format_unorm_end = *builder_->getBuildPoint();
+    depth_format_if.makeEndIf();
     // Merge between the two formats.
-    builder_->setBuildPoint(&block_depth_format_merge);
-    id_vector_temp_.clear();
-    id_vector_temp_.push_back(sample_depth_float24);
-    id_vector_temp_.push_back(block_depth_format_float_end.getId());
-    id_vector_temp_.push_back(sample_depth_unorm24);
-    id_vector_temp_.push_back(block_depth_format_unorm_end.getId());
-    spv::Id sample_depth24 =
-        builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_);
+    spv::Id sample_depth24 = depth_format_if.createMergePhi(
+        sample_depth_float24, sample_depth_unorm24);
 
     // Perform the depth test.
     spv::Id old_depth = builder_->createBinOp(
@@ -2131,206 +1950,188 @@ void SpirvShaderTranslator::FSI_DepthStencilTest(
             builder_->createBinOp(spv::OpUGreaterThan, type_bool_,
                                   sample_depth24, old_depth)));
 
-    // Begin the stencil test.
-    spv::Block& block_stencil_enabled_head = *builder_->getBuildPoint();
-    spv::Block& block_stencil_enabled = builder_->makeNewBlock();
-    spv::Block& block_stencil_enabled_merge = builder_->makeNewBlock();
-    builder_->createSelectionMerge(&block_stencil_enabled_merge,
-                                   spv::SelectionControlDontFlattenMask);
-    builder_->createConditionalBranch(stencil_enabled, &block_stencil_enabled,
-                                      &block_stencil_enabled_merge);
-    builder_->setBuildPoint(&block_stencil_enabled);
-
-    // Perform the stencil test.
-    // The read mask has zeros in the upper bits, applying it to the combined
-    // stencil and depth will remove the depth part.
-    spv::Id old_stencil_read_masked = builder_->createBinOp(
-        spv::OpBitwiseAnd, type_uint_, old_depth_stencil, stencil_read_mask);
-    spv::Id stencil_passed_if_enabled = builder_->createBinOp(
-        spv::OpLogicalAnd, type_bool_, stencil_pass_if_less,
-        builder_->createBinOp(spv::OpULessThan, type_bool_,
-                              stencil_reference_read_masked,
-                              old_stencil_read_masked));
-    stencil_passed_if_enabled = builder_->createBinOp(
-        spv::OpLogicalOr, type_bool_, stencil_passed_if_enabled,
-        builder_->createBinOp(
-            spv::OpLogicalAnd, type_bool_, stencil_pass_if_equal,
-            builder_->createBinOp(spv::OpIEqual, type_bool_,
-                                  stencil_reference_read_masked,
-                                  old_stencil_read_masked)));
-    stencil_passed_if_enabled = builder_->createBinOp(
-        spv::OpLogicalOr, type_bool_, stencil_passed_if_enabled,
-        builder_->createBinOp(
-            spv::OpLogicalAnd, type_bool_, stencil_pass_if_greater,
-            builder_->createBinOp(spv::OpUGreaterThan, type_bool_,
-                                  stencil_reference_read_masked,
-                                  old_stencil_read_masked)));
-    spv::Id stencil_op = builder_->createTriOp(
-        spv::OpBitFieldUExtract, type_uint_, stencil_func_ops,
-        builder_->createTriOp(
-            spv::OpSelect, type_uint_, stencil_passed_if_enabled,
-            builder_->createTriOp(spv::OpSelect, type_uint_, depth_passed,
-                                  builder_->makeUintConstant(6),
-                                  builder_->makeUintConstant(9)),
-            builder_->makeUintConstant(3)),
-        builder_->makeUintConstant(3));
-    spv::Block& block_stencil_op_head = *builder_->getBuildPoint();
-    spv::Block& block_stencil_op_keep = builder_->makeNewBlock();
-    spv::Block& block_stencil_op_zero = builder_->makeNewBlock();
-    spv::Block& block_stencil_op_replace = builder_->makeNewBlock();
-    spv::Block& block_stencil_op_increment_clamp = builder_->makeNewBlock();
-    spv::Block& block_stencil_op_decrement_clamp = builder_->makeNewBlock();
-    spv::Block& block_stencil_op_invert = builder_->makeNewBlock();
-    spv::Block& block_stencil_op_increment_wrap = builder_->makeNewBlock();
-    spv::Block& block_stencil_op_decrement_wrap = builder_->makeNewBlock();
-    spv::Block& block_stencil_op_merge = builder_->makeNewBlock();
-    builder_->createSelectionMerge(&block_stencil_op_merge,
-                                   spv::SelectionControlDontFlattenMask);
+    // Perform the stencil test if enabled.
+    SpirvBuilder::IfBuilder stencil_if(
+        stencil_enabled, spv::SelectionControlDontFlattenMask, *builder_);
+    spv::Id stencil_passed_if_enabled;
+    spv::Id new_stencil_and_old_depth_if_stencil_enabled;
     {
-      std::unique_ptr<spv::Instruction> stencil_op_switch_op =
-          std::make_unique<spv::Instruction>(spv::OpSwitch);
-      stencil_op_switch_op->addIdOperand(stencil_op);
-      // Make keep the default.
-      stencil_op_switch_op->addIdOperand(block_stencil_op_keep.getId());
-      stencil_op_switch_op->addImmediateOperand(
-          int32_t(xenos::StencilOp::kZero));
-      stencil_op_switch_op->addIdOperand(block_stencil_op_zero.getId());
-      stencil_op_switch_op->addImmediateOperand(
-          int32_t(xenos::StencilOp::kReplace));
-      stencil_op_switch_op->addIdOperand(block_stencil_op_replace.getId());
-      stencil_op_switch_op->addImmediateOperand(
-          int32_t(xenos::StencilOp::kIncrementClamp));
-      stencil_op_switch_op->addIdOperand(
-          block_stencil_op_increment_clamp.getId());
-      stencil_op_switch_op->addImmediateOperand(
-          int32_t(xenos::StencilOp::kDecrementClamp));
-      stencil_op_switch_op->addIdOperand(
-          block_stencil_op_decrement_clamp.getId());
-      stencil_op_switch_op->addImmediateOperand(
-          int32_t(xenos::StencilOp::kInvert));
-      stencil_op_switch_op->addIdOperand(block_stencil_op_invert.getId());
-      stencil_op_switch_op->addImmediateOperand(
-          int32_t(xenos::StencilOp::kIncrementWrap));
-      stencil_op_switch_op->addIdOperand(
-          block_stencil_op_increment_wrap.getId());
-      stencil_op_switch_op->addImmediateOperand(
-          int32_t(xenos::StencilOp::kDecrementWrap));
-      stencil_op_switch_op->addIdOperand(
-          block_stencil_op_decrement_wrap.getId());
-      builder_->getBuildPoint()->addInstruction(
-          std::move(stencil_op_switch_op));
+      // The read mask has zeros in the upper bits, applying it to the combined
+      // stencil and depth will remove the depth part.
+      spv::Id old_stencil_read_masked = builder_->createBinOp(
+          spv::OpBitwiseAnd, type_uint_, old_depth_stencil, stencil_read_mask);
+      stencil_passed_if_enabled = builder_->createBinOp(
+          spv::OpLogicalAnd, type_bool_, stencil_pass_if_less,
+          builder_->createBinOp(spv::OpULessThan, type_bool_,
+                                stencil_reference_read_masked,
+                                old_stencil_read_masked));
+      stencil_passed_if_enabled = builder_->createBinOp(
+          spv::OpLogicalOr, type_bool_, stencil_passed_if_enabled,
+          builder_->createBinOp(
+              spv::OpLogicalAnd, type_bool_, stencil_pass_if_equal,
+              builder_->createBinOp(spv::OpIEqual, type_bool_,
+                                    stencil_reference_read_masked,
+                                    old_stencil_read_masked)));
+      stencil_passed_if_enabled = builder_->createBinOp(
+          spv::OpLogicalOr, type_bool_, stencil_passed_if_enabled,
+          builder_->createBinOp(
+              spv::OpLogicalAnd, type_bool_, stencil_pass_if_greater,
+              builder_->createBinOp(spv::OpUGreaterThan, type_bool_,
+                                    stencil_reference_read_masked,
+                                    old_stencil_read_masked)));
+      spv::Id stencil_op = builder_->createTriOp(
+          spv::OpBitFieldUExtract, type_uint_, stencil_func_ops,
+          builder_->createTriOp(
+              spv::OpSelect, type_uint_, stencil_passed_if_enabled,
+              builder_->createTriOp(spv::OpSelect, type_uint_, depth_passed,
+                                    builder_->makeUintConstant(6),
+                                    builder_->makeUintConstant(9)),
+              builder_->makeUintConstant(3)),
+          builder_->makeUintConstant(3));
+      spv::Block& block_stencil_op_head = *builder_->getBuildPoint();
+      spv::Block& block_stencil_op_keep = builder_->makeNewBlock();
+      spv::Block& block_stencil_op_zero = builder_->makeNewBlock();
+      spv::Block& block_stencil_op_replace = builder_->makeNewBlock();
+      spv::Block& block_stencil_op_increment_clamp = builder_->makeNewBlock();
+      spv::Block& block_stencil_op_decrement_clamp = builder_->makeNewBlock();
+      spv::Block& block_stencil_op_invert = builder_->makeNewBlock();
+      spv::Block& block_stencil_op_increment_wrap = builder_->makeNewBlock();
+      spv::Block& block_stencil_op_decrement_wrap = builder_->makeNewBlock();
+      spv::Block& block_stencil_op_merge = builder_->makeNewBlock();
+      builder_->createSelectionMerge(&block_stencil_op_merge,
+                                     spv::SelectionControlDontFlattenMask);
+      {
+        std::unique_ptr<spv::Instruction> stencil_op_switch_op =
+            std::make_unique<spv::Instruction>(spv::OpSwitch);
+        stencil_op_switch_op->addIdOperand(stencil_op);
+        // Make keep the default.
+        stencil_op_switch_op->addIdOperand(block_stencil_op_keep.getId());
+        stencil_op_switch_op->addImmediateOperand(
+            int32_t(xenos::StencilOp::kZero));
+        stencil_op_switch_op->addIdOperand(block_stencil_op_zero.getId());
+        stencil_op_switch_op->addImmediateOperand(
+            int32_t(xenos::StencilOp::kReplace));
+        stencil_op_switch_op->addIdOperand(block_stencil_op_replace.getId());
+        stencil_op_switch_op->addImmediateOperand(
+            int32_t(xenos::StencilOp::kIncrementClamp));
+        stencil_op_switch_op->addIdOperand(
+            block_stencil_op_increment_clamp.getId());
+        stencil_op_switch_op->addImmediateOperand(
+            int32_t(xenos::StencilOp::kDecrementClamp));
+        stencil_op_switch_op->addIdOperand(
+            block_stencil_op_decrement_clamp.getId());
+        stencil_op_switch_op->addImmediateOperand(
+            int32_t(xenos::StencilOp::kInvert));
+        stencil_op_switch_op->addIdOperand(block_stencil_op_invert.getId());
+        stencil_op_switch_op->addImmediateOperand(
+            int32_t(xenos::StencilOp::kIncrementWrap));
+        stencil_op_switch_op->addIdOperand(
+            block_stencil_op_increment_wrap.getId());
+        stencil_op_switch_op->addImmediateOperand(
+            int32_t(xenos::StencilOp::kDecrementWrap));
+        stencil_op_switch_op->addIdOperand(
+            block_stencil_op_decrement_wrap.getId());
+        builder_->getBuildPoint()->addInstruction(
+            std::move(stencil_op_switch_op));
+      }
+      block_stencil_op_keep.addPredecessor(&block_stencil_op_head);
+      block_stencil_op_zero.addPredecessor(&block_stencil_op_head);
+      block_stencil_op_replace.addPredecessor(&block_stencil_op_head);
+      block_stencil_op_increment_clamp.addPredecessor(&block_stencil_op_head);
+      block_stencil_op_decrement_clamp.addPredecessor(&block_stencil_op_head);
+      block_stencil_op_invert.addPredecessor(&block_stencil_op_head);
+      block_stencil_op_increment_wrap.addPredecessor(&block_stencil_op_head);
+      block_stencil_op_decrement_wrap.addPredecessor(&block_stencil_op_head);
+      // Keep - will use the old stencil in the phi.
+      builder_->setBuildPoint(&block_stencil_op_keep);
+      builder_->createBranch(&block_stencil_op_merge);
+      // Zero - will use the zero constant in the phi.
+      builder_->setBuildPoint(&block_stencil_op_zero);
+      builder_->createBranch(&block_stencil_op_merge);
+      // Replace - will use the stencil reference in the phi.
+      builder_->setBuildPoint(&block_stencil_op_replace);
+      builder_->createBranch(&block_stencil_op_merge);
+      // Increment and clamp.
+      builder_->setBuildPoint(&block_stencil_op_increment_clamp);
+      spv::Id new_stencil_in_low_bits_increment_clamp = builder_->createBinOp(
+          spv::OpIAdd, type_uint_,
+          builder_->createBinBuiltinCall(
+              type_uint_, ext_inst_glsl_std_450_, GLSLstd450UMin,
+              builder_->makeUintConstant(UINT8_MAX - 1),
+              builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
+                                    old_depth_stencil,
+                                    builder_->makeUintConstant(UINT8_MAX))),
+          const_uint_1);
+      builder_->createBranch(&block_stencil_op_merge);
+      // Decrement and clamp.
+      builder_->setBuildPoint(&block_stencil_op_decrement_clamp);
+      spv::Id new_stencil_in_low_bits_decrement_clamp = builder_->createBinOp(
+          spv::OpISub, type_uint_,
+          builder_->createBinBuiltinCall(
+              type_uint_, ext_inst_glsl_std_450_, GLSLstd450UMax, const_uint_1,
+              builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
+                                    old_depth_stencil,
+                                    builder_->makeUintConstant(UINT8_MAX))),
+          const_uint_1);
+      builder_->createBranch(&block_stencil_op_merge);
+      // Invert.
+      builder_->setBuildPoint(&block_stencil_op_invert);
+      spv::Id new_stencil_in_low_bits_invert =
+          builder_->createUnaryOp(spv::OpNot, type_uint_, old_depth_stencil);
+      builder_->createBranch(&block_stencil_op_merge);
+      // Increment and wrap.
+      // The upper bits containing the old depth have no effect on the behavior.
+      builder_->setBuildPoint(&block_stencil_op_increment_wrap);
+      spv::Id new_stencil_in_low_bits_increment_wrap = builder_->createBinOp(
+          spv::OpIAdd, type_uint_, old_depth_stencil, const_uint_1);
+      builder_->createBranch(&block_stencil_op_merge);
+      // Decrement and wrap.
+      // The upper bits containing the old depth have no effect on the behavior.
+      builder_->setBuildPoint(&block_stencil_op_decrement_wrap);
+      spv::Id new_stencil_in_low_bits_decrement_wrap = builder_->createBinOp(
+          spv::OpISub, type_uint_, old_depth_stencil, const_uint_1);
+      builder_->createBranch(&block_stencil_op_merge);
+      // Select the new stencil (with undefined data in bits starting from 8)
+      // based on the stencil operation.
+      builder_->setBuildPoint(&block_stencil_op_merge);
+      id_vector_temp_.clear();
+      id_vector_temp_.reserve(2 * 8);
+      id_vector_temp_.push_back(old_depth_stencil);
+      id_vector_temp_.push_back(block_stencil_op_keep.getId());
+      id_vector_temp_.push_back(const_uint_0_);
+      id_vector_temp_.push_back(block_stencil_op_zero.getId());
+      id_vector_temp_.push_back(stencil_reference);
+      id_vector_temp_.push_back(block_stencil_op_replace.getId());
+      id_vector_temp_.push_back(new_stencil_in_low_bits_increment_clamp);
+      id_vector_temp_.push_back(block_stencil_op_increment_clamp.getId());
+      id_vector_temp_.push_back(new_stencil_in_low_bits_decrement_clamp);
+      id_vector_temp_.push_back(block_stencil_op_decrement_clamp.getId());
+      id_vector_temp_.push_back(new_stencil_in_low_bits_invert);
+      id_vector_temp_.push_back(block_stencil_op_invert.getId());
+      id_vector_temp_.push_back(new_stencil_in_low_bits_increment_wrap);
+      id_vector_temp_.push_back(block_stencil_op_increment_wrap.getId());
+      id_vector_temp_.push_back(new_stencil_in_low_bits_decrement_wrap);
+      id_vector_temp_.push_back(block_stencil_op_decrement_wrap.getId());
+      spv::Id new_stencil_in_low_bits_if_enabled =
+          builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_);
+      // Merge the old depth / stencil (old depth kept from the old depth /
+      // stencil so the separate old depth register is not needed anymore after
+      // the depth test) and the new stencil based on the write mask.
+      new_stencil_and_old_depth_if_stencil_enabled = builder_->createBinOp(
+          spv::OpBitwiseOr, type_uint_,
+          builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
+                                old_depth_stencil, stencil_write_keep_mask),
+          builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
+                                new_stencil_in_low_bits_if_enabled,
+                                stencil_write_mask));
     }
-    block_stencil_op_keep.addPredecessor(&block_stencil_op_head);
-    block_stencil_op_zero.addPredecessor(&block_stencil_op_head);
-    block_stencil_op_replace.addPredecessor(&block_stencil_op_head);
-    block_stencil_op_increment_clamp.addPredecessor(&block_stencil_op_head);
-    block_stencil_op_decrement_clamp.addPredecessor(&block_stencil_op_head);
-    block_stencil_op_invert.addPredecessor(&block_stencil_op_head);
-    block_stencil_op_increment_wrap.addPredecessor(&block_stencil_op_head);
-    block_stencil_op_decrement_wrap.addPredecessor(&block_stencil_op_head);
-    // Keep - will use the old stencil in the phi.
-    builder_->setBuildPoint(&block_stencil_op_keep);
-    builder_->createBranch(&block_stencil_op_merge);
-    // Zero - will use the zero constant in the phi.
-    builder_->setBuildPoint(&block_stencil_op_zero);
-    builder_->createBranch(&block_stencil_op_merge);
-    // Replace - will use the stencil reference in the phi.
-    builder_->setBuildPoint(&block_stencil_op_replace);
-    builder_->createBranch(&block_stencil_op_merge);
-    // Increment and clamp.
-    builder_->setBuildPoint(&block_stencil_op_increment_clamp);
-    spv::Id new_stencil_in_low_bits_increment_clamp = builder_->createBinOp(
-        spv::OpIAdd, type_uint_,
-        builder_->createBinBuiltinCall(
-            type_uint_, ext_inst_glsl_std_450_, GLSLstd450UMin,
-            builder_->makeUintConstant(UINT8_MAX - 1),
-            builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
-                                  old_depth_stencil,
-                                  builder_->makeUintConstant(UINT8_MAX))),
-        const_uint_1);
-    builder_->createBranch(&block_stencil_op_merge);
-    // Decrement and clamp.
-    builder_->setBuildPoint(&block_stencil_op_decrement_clamp);
-    spv::Id new_stencil_in_low_bits_decrement_clamp = builder_->createBinOp(
-        spv::OpISub, type_uint_,
-        builder_->createBinBuiltinCall(
-            type_uint_, ext_inst_glsl_std_450_, GLSLstd450UMax, const_uint_1,
-            builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
-                                  old_depth_stencil,
-                                  builder_->makeUintConstant(UINT8_MAX))),
-        const_uint_1);
-    builder_->createBranch(&block_stencil_op_merge);
-    // Invert.
-    builder_->setBuildPoint(&block_stencil_op_invert);
-    spv::Id new_stencil_in_low_bits_invert =
-        builder_->createUnaryOp(spv::OpNot, type_uint_, old_depth_stencil);
-    builder_->createBranch(&block_stencil_op_merge);
-    // Increment and wrap.
-    // The upper bits containing the old depth have no effect on the behavior.
-    builder_->setBuildPoint(&block_stencil_op_increment_wrap);
-    spv::Id new_stencil_in_low_bits_increment_wrap = builder_->createBinOp(
-        spv::OpIAdd, type_uint_, old_depth_stencil, const_uint_1);
-    builder_->createBranch(&block_stencil_op_merge);
-    // Decrement and wrap.
-    // The upper bits containing the old depth have no effect on the behavior.
-    builder_->setBuildPoint(&block_stencil_op_decrement_wrap);
-    spv::Id new_stencil_in_low_bits_decrement_wrap = builder_->createBinOp(
-        spv::OpISub, type_uint_, old_depth_stencil, const_uint_1);
-    builder_->createBranch(&block_stencil_op_merge);
-    // Select the new stencil (with undefined data in bits starting from 8)
-    // based on the stencil operation.
-    builder_->setBuildPoint(&block_stencil_op_merge);
-    id_vector_temp_.clear();
-    id_vector_temp_.reserve(2 * 8);
-    id_vector_temp_.push_back(old_depth_stencil);
-    id_vector_temp_.push_back(block_stencil_op_keep.getId());
-    id_vector_temp_.push_back(const_uint_0_);
-    id_vector_temp_.push_back(block_stencil_op_zero.getId());
-    id_vector_temp_.push_back(stencil_reference);
-    id_vector_temp_.push_back(block_stencil_op_replace.getId());
-    id_vector_temp_.push_back(new_stencil_in_low_bits_increment_clamp);
-    id_vector_temp_.push_back(block_stencil_op_increment_clamp.getId());
-    id_vector_temp_.push_back(new_stencil_in_low_bits_decrement_clamp);
-    id_vector_temp_.push_back(block_stencil_op_decrement_clamp.getId());
-    id_vector_temp_.push_back(new_stencil_in_low_bits_invert);
-    id_vector_temp_.push_back(block_stencil_op_invert.getId());
-    id_vector_temp_.push_back(new_stencil_in_low_bits_increment_wrap);
-    id_vector_temp_.push_back(block_stencil_op_increment_wrap.getId());
-    id_vector_temp_.push_back(new_stencil_in_low_bits_decrement_wrap);
-    id_vector_temp_.push_back(block_stencil_op_decrement_wrap.getId());
-    spv::Id new_stencil_in_low_bits_if_enabled =
-        builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_);
-    // Merge the old depth / stencil (old depth kept from the old depth /
-    // stencil so the separate old depth register is not needed anymore after
-    // the depth test) and the new stencil based on the write mask.
-    spv::Id new_stencil_and_old_depth_if_stencil_enabled =
-        builder_->createBinOp(
-            spv::OpBitwiseOr, type_uint_,
-            builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
-                                  old_depth_stencil, stencil_write_keep_mask),
-            builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
-                                  new_stencil_in_low_bits_if_enabled,
-                                  stencil_write_mask));
-
+    stencil_if.makeEndIf();
     // Choose the result based on whether the stencil test was done.
     // All phi operations must be the first in the block.
-    builder_->createBranch(&block_stencil_enabled_merge);
-    spv::Block& block_stencil_enabled_end = *builder_->getBuildPoint();
-    builder_->setBuildPoint(&block_stencil_enabled_merge);
-    id_vector_temp_.clear();
-    id_vector_temp_.push_back(stencil_passed_if_enabled);
-    id_vector_temp_.push_back(block_stencil_enabled_end.getId());
-    id_vector_temp_.push_back(builder_->makeBoolConstant(true));
-    id_vector_temp_.push_back(block_stencil_enabled_head.getId());
-    spv::Id stencil_passed =
-        builder_->createOp(spv::OpPhi, type_bool_, id_vector_temp_);
-    id_vector_temp_.clear();
-    id_vector_temp_.push_back(new_stencil_and_old_depth_if_stencil_enabled);
-    id_vector_temp_.push_back(block_stencil_enabled_end.getId());
-    id_vector_temp_.push_back(old_depth_stencil);
-    id_vector_temp_.push_back(block_stencil_enabled_head.getId());
-    spv::Id new_stencil_and_old_depth =
-        builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_);
+    spv::Id stencil_passed = stencil_if.createMergePhi(
+        stencil_passed_if_enabled, builder_->makeBoolConstant(true));
+    spv::Id new_stencil_and_old_depth = stencil_if.createMergePhi(
+        new_stencil_and_old_depth_if_stencil_enabled, old_depth_stencil);
 
     // Check whether the tests have passed, and exclude the bit from the
     // coverage if not.
@@ -2384,37 +2185,19 @@ void SpirvShaderTranslator::FSI_DepthStencilTest(
       new_depth_stencil_write_condition = new_depth_stencil_different;
     }
     if (new_depth_stencil_write_condition != spv::NoResult) {
-      spv::Block& block_depth_stencil_write = builder_->makeNewBlock();
-      spv::Block& block_depth_stencil_write_merge = builder_->makeNewBlock();
-      builder_->createSelectionMerge(&block_depth_stencil_write_merge,
-                                     spv::SelectionControlDontFlattenMask);
-      builder_->createConditionalBranch(new_depth_stencil_write_condition,
-                                        &block_depth_stencil_write,
-                                        &block_depth_stencil_write_merge);
-      builder_->setBuildPoint(&block_depth_stencil_write);
+      SpirvBuilder::IfBuilder new_depth_stencil_write_if(
+          new_depth_stencil_write_condition,
+          spv::SelectionControlDontFlattenMask, *builder_);
       builder_->createStore(new_depth_stencil, sample_access_chain);
-      builder_->createBranch(&block_depth_stencil_write_merge);
-      builder_->setBuildPoint(&block_depth_stencil_write_merge);
+      new_depth_stencil_write_if.makeEndIf();
     }
 
-    builder_->createBranch(&block_sample_covered_merge);
-    spv::Block& block_sample_covered_end = *builder_->getBuildPoint();
-    builder_->setBuildPoint(&block_sample_covered_merge);
-    id_vector_temp_.clear();
-    id_vector_temp_.push_back(new_sample_mask_after_sample);
-    id_vector_temp_.push_back(block_sample_covered_end.getId());
-    id_vector_temp_.push_back(new_sample_mask);
-    id_vector_temp_.push_back(block_sample_covered_head.getId());
-    new_sample_mask =
-        builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_);
+    if_sample_covered.makeEndIf();
+    new_sample_mask = if_sample_covered.createMergePhi(
+        new_sample_mask_after_sample, new_sample_mask);
     if (is_early) {
-      id_vector_temp_.clear();
-      id_vector_temp_.push_back(new_depth_stencil);
-      id_vector_temp_.push_back(block_sample_covered_end.getId());
-      id_vector_temp_.push_back(const_uint_0_);
-      id_vector_temp_.push_back(block_sample_covered_head.getId());
       late_write_depth_stencil[i] =
-          builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_);
+          if_sample_covered.createMergePhi(new_depth_stencil, const_uint_0_);
     }
   }
 
@@ -2442,25 +2225,14 @@ void SpirvShaderTranslator::FSI_DepthStencilTest(
       }
     }
   }
-  builder_->createBranch(&block_depth_stencil_enabled_merge);
-  spv::Block& block_depth_stencil_enabled_end = *builder_->getBuildPoint();
-  builder_->setBuildPoint(&block_depth_stencil_enabled_merge);
-  id_vector_temp_.clear();
-  id_vector_temp_.push_back(new_sample_mask);
-  id_vector_temp_.push_back(block_depth_stencil_enabled_end.getId());
-  id_vector_temp_.push_back(main_fsi_sample_mask_);
-  id_vector_temp_.push_back(block_depth_stencil_enabled_head.getId());
-  main_fsi_sample_mask_ =
-      builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_);
+  if_depth_stencil_enabled.makeEndIf();
+  main_fsi_sample_mask_ = if_depth_stencil_enabled.createMergePhi(
+      new_sample_mask, main_fsi_sample_mask_);
   if (is_early) {
     for (uint32_t i = 0; i < 4; ++i) {
-      id_vector_temp_.clear();
-      id_vector_temp_.push_back(late_write_depth_stencil[i]);
-      id_vector_temp_.push_back(block_depth_stencil_enabled_end.getId());
-      id_vector_temp_.push_back(const_uint_0_);
-      id_vector_temp_.push_back(block_depth_stencil_enabled_head.getId());
       main_fsi_late_write_depth_stencil_[i] =
-          builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_);
+          if_depth_stencil_enabled.createMergePhi(late_write_depth_stencil[i],
+                                                  const_uint_0_);
     }
   }
 }
@@ -3160,32 +2932,25 @@ spv::Id SpirvShaderTranslator::FSI_FlushNaNClampAndInBlending(
   assert_true(builder_->getTypeId(min_value) == color_or_alpha_type);
   assert_true(builder_->getTypeId(max_value) == color_or_alpha_type);
 
-  spv::Block& block_is_fixed_point_head = *builder_->getBuildPoint();
-  spv::Block& block_is_fixed_point_if = builder_->makeNewBlock();
-  spv::Block& block_is_fixed_point_merge = builder_->makeNewBlock();
-  builder_->createSelectionMerge(&block_is_fixed_point_merge,
-                                 spv::SelectionControlDontFlattenMask);
-  builder_->createConditionalBranch(is_fixed_point, &block_is_fixed_point_if,
-                                    &block_is_fixed_point_merge);
-  builder_->setBuildPoint(&block_is_fixed_point_if);
-  // Flush NaN to 0 even for signed (NMax would flush it to the minimum value).
-  spv::Id color_or_alpha_clamped = builder_->createTriBuiltinCall(
-      color_or_alpha_type, ext_inst_glsl_std_450_, GLSLstd450FClamp,
-      builder_->createTriOp(
-          spv::OpSelect, color_or_alpha_type,
-          builder_->createUnaryOp(spv::OpIsNan,
-                                  type_bool_vectors_[component_count - 1],
-                                  color_or_alpha),
-          const_float_vectors_0_[component_count - 1], color_or_alpha),
-      min_value, max_value);
-  builder_->createBranch(&block_is_fixed_point_merge);
-  builder_->setBuildPoint(&block_is_fixed_point_merge);
-  id_vector_temp_.clear();
-  id_vector_temp_.push_back(color_or_alpha_clamped);
-  id_vector_temp_.push_back(block_is_fixed_point_if.getId());
-  id_vector_temp_.push_back(color_or_alpha);
-  id_vector_temp_.push_back(block_is_fixed_point_head.getId());
-  return builder_->createOp(spv::OpPhi, color_or_alpha_type, id_vector_temp_);
+  SpirvBuilder::IfBuilder if_fixed_point(
+      is_fixed_point, spv::SelectionControlDontFlattenMask, *builder_);
+  spv::Id color_or_alpha_clamped;
+  {
+    // Flush NaN to 0 even for signed (NMax would flush it to the minimum
+    // value).
+    color_or_alpha_clamped = builder_->createTriBuiltinCall(
+        color_or_alpha_type, ext_inst_glsl_std_450_, GLSLstd450FClamp,
+        builder_->createTriOp(
+            spv::OpSelect, color_or_alpha_type,
+            builder_->createUnaryOp(spv::OpIsNan,
+                                    type_bool_vectors_[component_count - 1],
+                                    color_or_alpha),
+            const_float_vectors_0_[component_count - 1], color_or_alpha),
+        min_value, max_value);
+  }
+  if_fixed_point.makeEndIf();
+
+  return if_fixed_point.createMergePhi(color_or_alpha_clamped, color_or_alpha);
 }
 
 spv::Id SpirvShaderTranslator::FSI_ApplyColorBlendFactor(
@@ -3197,21 +2962,14 @@ spv::Id SpirvShaderTranslator::FSI_ApplyColorBlendFactor(
   // infinity and NaN are not potentially involved in the multiplication.
   // Calculate the condition before the selection merge, which must be the
   // penultimate instruction in the block.
-  spv::Id factor_not_zero = builder_->createBinOp(
-      spv::OpINotEqual, type_bool_, factor,
-      builder_->makeUintConstant(uint32_t(xenos::BlendFactor::kZero)));
-  spv::Block& block_not_zero_head = *builder_->getBuildPoint();
-  spv::Block& block_not_zero_if = builder_->makeNewBlock();
-  spv::Block& block_not_zero_merge = builder_->makeNewBlock();
-  builder_->createSelectionMerge(&block_not_zero_merge,
-                                 spv::SelectionControlDontFlattenMask);
-  builder_->createConditionalBranch(factor_not_zero, &block_not_zero_if,
-                                    &block_not_zero_merge);
+  SpirvBuilder::IfBuilder factor_not_zero_if(
+      builder_->createBinOp(
+          spv::OpINotEqual, type_bool_, factor,
+          builder_->makeUintConstant(uint32_t(xenos::BlendFactor::kZero))),
+      spv::SelectionControlDontFlattenMask, *builder_);
 
   // Non-zero factor case.
 
-  builder_->setBuildPoint(&block_not_zero_if);
-
   spv::Block& block_factor_head = *builder_->getBuildPoint();
   spv::Block& block_factor_one = builder_->makeNewBlock();
   std::array<spv::Block*, 3> color_factor_blocks;
@@ -3386,18 +3144,11 @@ spv::Id SpirvShaderTranslator::FSI_ApplyColorBlendFactor(
       builder_->createOp(spv::OpPhi, type_float3_, id_vector_temp_);
   spv::Id result = FSI_FlushNaNClampAndInBlending(
       result_unclamped, is_fixed_point, clamp_min_value, clamp_max_value);
-  builder_->createBranch(&block_not_zero_merge);
-  // Get the latest block for a non-zero factor after all the control flow.
-  spv::Block& block_not_zero_if_end = *builder_->getBuildPoint();
+
+  factor_not_zero_if.makeEndIf();
 
   // Make the result zero if the factor is zero.
-  builder_->setBuildPoint(&block_not_zero_merge);
-  id_vector_temp_.clear();
-  id_vector_temp_.push_back(result);
-  id_vector_temp_.push_back(block_not_zero_if_end.getId());
-  id_vector_temp_.push_back(const_float3_0_);
-  id_vector_temp_.push_back(block_not_zero_head.getId());
-  return builder_->createOp(spv::OpPhi, type_float3_, id_vector_temp_);
+  return factor_not_zero_if.createMergePhi(result, const_float3_0_);
 }
 
 spv::Id SpirvShaderTranslator::FSI_ApplyAlphaBlendFactor(
@@ -3408,21 +3159,14 @@ spv::Id SpirvShaderTranslator::FSI_ApplyAlphaBlendFactor(
   // infinity and NaN are not potentially involved in the multiplication.
   // Calculate the condition before the selection merge, which must be the
   // penultimate instruction in the block.
-  spv::Id factor_not_zero = builder_->createBinOp(
-      spv::OpINotEqual, type_bool_, factor,
-      builder_->makeUintConstant(uint32_t(xenos::BlendFactor::kZero)));
-  spv::Block& block_not_zero_head = *builder_->getBuildPoint();
-  spv::Block& block_not_zero_if = builder_->makeNewBlock();
-  spv::Block& block_not_zero_merge = builder_->makeNewBlock();
-  builder_->createSelectionMerge(&block_not_zero_merge,
-                                 spv::SelectionControlDontFlattenMask);
-  builder_->createConditionalBranch(factor_not_zero, &block_not_zero_if,
-                                    &block_not_zero_merge);
+  SpirvBuilder::IfBuilder factor_not_zero_if(
+      builder_->createBinOp(
+          spv::OpINotEqual, type_bool_, factor,
+          builder_->makeUintConstant(uint32_t(xenos::BlendFactor::kZero))),
+      spv::SelectionControlDontFlattenMask, *builder_);
 
   // Non-zero factor case.
 
-  builder_->setBuildPoint(&block_not_zero_if);
-
   spv::Block& block_factor_head = *builder_->getBuildPoint();
   spv::Block& block_factor_one = builder_->makeNewBlock();
   std::array<spv::Block*, 3> alpha_factor_blocks;
@@ -3557,18 +3301,11 @@ spv::Id SpirvShaderTranslator::FSI_ApplyAlphaBlendFactor(
       builder_->createOp(spv::OpPhi, type_float_, id_vector_temp_);
   spv::Id result = FSI_FlushNaNClampAndInBlending(
       result_unclamped, is_fixed_point, clamp_min_value, clamp_max_value);
-  builder_->createBranch(&block_not_zero_merge);
-  // Get the latest block for a non-zero factor after all the control flow.
-  spv::Block& block_not_zero_if_end = *builder_->getBuildPoint();
+
+  factor_not_zero_if.makeEndIf();
 
   // Make the result zero if the factor is zero.
-  builder_->setBuildPoint(&block_not_zero_merge);
-  id_vector_temp_.clear();
-  id_vector_temp_.push_back(result);
-  id_vector_temp_.push_back(block_not_zero_if_end.getId());
-  id_vector_temp_.push_back(const_float_0_);
-  id_vector_temp_.push_back(block_not_zero_head.getId());
-  return builder_->createOp(spv::OpPhi, type_float_, id_vector_temp_);
+  return factor_not_zero_if.createMergePhi(result, const_float_0_);
 }
 
 spv::Id SpirvShaderTranslator::FSI_BlendColorOrAlphaWithUnclampedResult(
diff --git a/src/xenia/gpu/texture_cache.cc b/src/xenia/gpu/texture_cache.cc
index 14af42d0d..c36484df1 100644
--- a/src/xenia/gpu/texture_cache.cc
+++ b/src/xenia/gpu/texture_cache.cc
@@ -330,8 +330,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
     uint32_t index_bit = UINT32_C(1) << index;
     textures_remaining = xe::clear_lowest_bit(textures_remaining);
     TextureBinding& binding = texture_bindings_[index];
-    const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
-        XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6);
+    xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(index);
     TextureKey old_key = binding.key;
     uint8_t old_swizzled_signs = binding.swizzled_signs;
     BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs);
diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc
index a527edf35..c4b354e8a 100644
--- a/src/xenia/gpu/trace_viewer.cc
+++ b/src/xenia/gpu/trace_viewer.cc
@@ -19,6 +19,7 @@
 #include "xenia/base/filesystem.h"
 #include "xenia/base/logging.h"
 #include "xenia/base/math.h"
+#include "xenia/base/memory.h"
 #include "xenia/base/platform.h"
 #include "xenia/base/string.h"
 #include "xenia/base/system.h"
@@ -354,9 +355,10 @@ void TraceViewer::DrawPacketDisassemblerUI() {
                   ImGui::NextColumn();
                   if (!register_info ||
                       register_info->type == RegisterInfo::Type::kDword) {
-                    ImGui::Text("%.8X", action.register_write.value.u32);
+                    ImGui::Text("%.8X", action.register_write.value);
                   } else {
-                    ImGui::Text("%8f", action.register_write.value.f32);
+                    ImGui::Text("%8f", xe::memory::Reinterpret<float>(
+                                           action.register_write.value));
                   }
                   ImGui::Columns(1);
                   break;
@@ -706,10 +708,8 @@ void TraceViewer::DrawTextureInfo(
     const Shader::TextureBinding& texture_binding) {
   auto& regs = *graphics_system_->register_file();
 
-  int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
-          texture_binding.fetch_constant * 6;
-  auto group = reinterpret_cast<const xe_gpu_fetch_group_t*>(&regs.values[r]);
-  auto& fetch = group->texture_fetch;
+  xenos::xe_gpu_texture_fetch_t fetch =
+      regs.GetTextureFetch(texture_binding.fetch_constant);
   if (fetch.type != xenos::FetchConstantType::kTexture &&
       (!cvars::gpu_allow_invalid_fetch_constants ||
        fetch.type != xenos::FetchConstantType::kInvalidTexture)) {
@@ -777,9 +777,9 @@ void TraceViewer::DrawFailedTextureInfo(
 
 void TraceViewer::DrawVertexFetcher(Shader* shader,
                                     const Shader::VertexBinding& vertex_binding,
-                                    const xe_gpu_vertex_fetch_t* fetch) {
-  const uint8_t* addr = memory_->TranslatePhysical(fetch->address << 2);
-  uint32_t vertex_count = fetch->size / vertex_binding.stride_words;
+                                    const xe_gpu_vertex_fetch_t& fetch) {
+  const uint8_t* addr = memory_->TranslatePhysical(fetch.address << 2);
+  uint32_t vertex_count = fetch.size / vertex_binding.stride_words;
   int column_count = 0;
   for (const auto& attrib : vertex_binding.attributes) {
     switch (attrib.fetch_instr.attributes.data_format) {
@@ -880,7 +880,7 @@ void TraceViewer::DrawVertexFetcher(Shader* shader,
 #define LOADEL(type, wo)                                                   \
   GpuSwap(xe::load<type>(vstart +                                          \
                          (attrib.fetch_instr.attributes.offset + wo) * 4), \
-          fetch->endian)
+          fetch.endian)
       switch (attrib.fetch_instr.attributes.data_format) {
         case xenos::VertexFormat::k_32:
           ImGui::Text("%.8X", LOADEL(uint32_t, 0));
@@ -1062,7 +1062,7 @@ void ProgressBar(float frac, float width, float height = 0,
   if (height == 0) {
     height = ImGui::GetTextLineHeightWithSpacing();
   }
-  frac = xe::saturate_unsigned(frac);
+  frac = xe::saturate(frac);
 
   auto pos = ImGui::GetCursorScreenPos();
   auto col = ImGui::ColorConvertFloat4ToU32(color);
@@ -1180,7 +1180,7 @@ void TraceViewer::DrawStateUI() {
   }
 
   auto enable_mode =
-      static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
+      static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL] & 0x7);
 
   switch (enable_mode) {
     case ModeControl::kIgnore:
@@ -1202,7 +1202,7 @@ void TraceViewer::DrawStateUI() {
       break;
     }
     case ModeControl::kCopy: {
-      uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
+      uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
       ImGui::Text("Copy Command %d (to %.8X)", player_->current_command_index(),
                   copy_dest_base);
       break;
@@ -1213,9 +1213,9 @@ void TraceViewer::DrawStateUI() {
   ImGui::BulletText("Viewport State:");
   if (true) {
     ImGui::TreePush((const void*)0);
-    uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
+    uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
     if ((pa_su_sc_mode_cntl >> 16) & 1) {
-      uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
+      uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET];
       int16_t window_offset_x = window_offset & 0x7FFF;
       int16_t window_offset_y = (window_offset >> 16) & 0x7FFF;
       if (window_offset_x & 0x4000) {
@@ -1229,8 +1229,8 @@ void TraceViewer::DrawStateUI() {
     } else {
       ImGui::BulletText("Window Offset: disabled");
     }
-    uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
-    uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
+    uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL];
+    uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR];
     ImGui::BulletText(
         "Window Scissor: %d,%d to %d,%d (%d x %d)", window_scissor_tl & 0x7FFF,
         (window_scissor_tl >> 16) & 0x7FFF, window_scissor_br & 0x7FFF,
@@ -1238,7 +1238,7 @@ void TraceViewer::DrawStateUI() {
         (window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF),
         ((window_scissor_br >> 16) & 0x7FFF) -
             ((window_scissor_tl >> 16) & 0x7FFF));
-    uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
+    uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO];
     uint32_t surface_hiz = (surface_info >> 18) & 0x3FFF;
     uint32_t surface_pitch = surface_info & 0x3FFF;
     auto surface_msaa = (surface_info >> 16) & 0x3;
@@ -1250,7 +1250,7 @@ void TraceViewer::DrawStateUI() {
     ImGui::BulletText("Surface Pitch: %d", surface_pitch);
     ImGui::BulletText("Surface HI-Z Pitch: %d", surface_hiz);
     ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]);
-    uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
+    uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL];
     bool vport_xscale_enable = (vte_control & (1 << 0)) > 0;
     bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0;
     bool vport_yscale_enable = (vte_control & (1 << 2)) > 0;
@@ -1265,14 +1265,20 @@ void TraceViewer::DrawStateUI() {
     }
     ImGui::BulletText(
         "Viewport Offset: %f, %f, %f",
-        vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0,
-        vport_yoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : 0,
-        vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0);
+        vport_xoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XOFFSET)
+                             : 0.0f,
+        vport_yoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
+                             : 0.0f,
+        vport_zoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZOFFSET)
+                             : 0.0f);
     ImGui::BulletText(
         "Viewport Scale: %f, %f, %f",
-        vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1,
-        vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1,
-        vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1);
+        vport_xscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XSCALE)
+                            : 1.0f,
+        vport_yscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
+                            : 1.0f,
+        vport_zscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZSCALE)
+                            : 1.0f);
     if (!vport_xscale_enable) {
       ImGui::PopStyleColor();
     }
@@ -1282,7 +1288,7 @@ void TraceViewer::DrawStateUI() {
                       ((vte_control >> 8) & 0x1) ? "y/w0" : "y",
                       ((vte_control >> 9) & 0x1) ? "z/w0" : "z",
                       ((vte_control >> 10) & 0x1) ? "w0" : "1/w0");
-    uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32;
+    uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL];
     bool clip_enabled = ((clip_control >> 17) & 0x1) == 0;
     bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1;
     ImGui::BulletText("Clip Enabled: %s, DX Clip: %s",
@@ -1294,11 +1300,9 @@ void TraceViewer::DrawStateUI() {
   ImGui::BulletText("Rasterizer State:");
   if (true) {
     ImGui::TreePush((const void*)0);
-    uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
-    uint32_t pa_sc_screen_scissor_tl =
-        regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32;
-    uint32_t pa_sc_screen_scissor_br =
-        regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32;
+    uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
+    uint32_t pa_sc_screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL];
+    uint32_t pa_sc_screen_scissor_br = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR];
     if (pa_sc_screen_scissor_tl != 0 && pa_sc_screen_scissor_br != 0x20002000) {
       int32_t screen_scissor_x = pa_sc_screen_scissor_tl & 0x7FFF;
       int32_t screen_scissor_y = (pa_sc_screen_scissor_tl >> 16) & 0x7FFF;
@@ -1353,7 +1357,7 @@ void TraceViewer::DrawStateUI() {
   }
   ImGui::Columns(1);
 
-  auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
+  auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO];
   uint32_t surface_pitch = rb_surface_info & 0x3FFF;
   auto surface_msaa =
       static_cast<xenos::MsaaSamples>((rb_surface_info >> 16) & 0x3);
@@ -1362,39 +1366,39 @@ void TraceViewer::DrawStateUI() {
     if (enable_mode != ModeControl::kDepth) {
       // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
       // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
-      uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
+      uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL];
       if ((color_control & 0x8) != 0) {
         ImGui::BulletText("Alpha Test: %s %.2f",
                           kCompareFuncNames[color_control & 0x7],
-                          regs[XE_GPU_REG_RB_ALPHA_REF].f32);
+                          regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF));
       } else {
         ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored);
         ImGui::BulletText("Alpha Test: disabled");
         ImGui::PopStyleColor();
       }
 
-      auto blend_color = ImVec4(regs[XE_GPU_REG_RB_BLEND_RED].f32,
-                                regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
-                                regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
-                                regs[XE_GPU_REG_RB_BLEND_ALPHA].f32);
+      auto blend_color = ImVec4(regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
+                                regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
+                                regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
+                                regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA));
       ImGui::BulletText("Blend Color: (%.2f,%.2f,%.2f,%.2f)", blend_color.x,
                         blend_color.y, blend_color.z, blend_color.w);
       ImGui::SameLine();
       // TODO small_height (was true) parameter was removed
       ImGui::ColorButton(nullptr, blend_color);
 
-      uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
+      uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
       uint32_t color_info[4] = {
-          regs[XE_GPU_REG_RB_COLOR_INFO].u32,
-          regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
-          regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
-          regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
+          regs[XE_GPU_REG_RB_COLOR_INFO],
+          regs[XE_GPU_REG_RB_COLOR1_INFO],
+          regs[XE_GPU_REG_RB_COLOR2_INFO],
+          regs[XE_GPU_REG_RB_COLOR3_INFO],
       };
       uint32_t rb_blendcontrol[4] = {
-          regs[XE_GPU_REG_RB_BLENDCONTROL0].u32,
-          regs[XE_GPU_REG_RB_BLENDCONTROL1].u32,
-          regs[XE_GPU_REG_RB_BLENDCONTROL2].u32,
-          regs[XE_GPU_REG_RB_BLENDCONTROL3].u32,
+          regs[XE_GPU_REG_RB_BLENDCONTROL0],
+          regs[XE_GPU_REG_RB_BLENDCONTROL1],
+          regs[XE_GPU_REG_RB_BLENDCONTROL2],
+          regs[XE_GPU_REG_RB_BLENDCONTROL3],
       };
       ImGui::Columns(2);
       for (int i = 0; i < xe::countof(color_info); ++i) {
@@ -1503,9 +1507,9 @@ void TraceViewer::DrawStateUI() {
   }
 
   if (ImGui::CollapsingHeader("Depth/Stencil Target")) {
-    auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
-    auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
-    auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
+    auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL];
+    auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK];
+    auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO];
     bool uses_depth =
         (rb_depthcontrol & 0x00000002) || (rb_depthcontrol & 0x00000004);
     uint32_t stencil_ref = (rb_stencilrefmask & 0xFF);
@@ -1689,10 +1693,9 @@ void TraceViewer::DrawStateUI() {
                   draw_info.index_buffer_size,
                   kIndexFormatNames[int(draw_info.index_format)],
                   kEndiannessNames[int(draw_info.index_endianness)]);
-      uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
+      uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
       if (pa_su_sc_mode_cntl & (1 << 21)) {
-        uint32_t reset_index =
-            regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32;
+        uint32_t reset_index = regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX];
         if (draw_info.index_format == xenos::IndexFormat::kInt16) {
           ImGui::Text("Reset Index: %.4X", reset_index & 0xFFFF);
         } else {
@@ -1752,30 +1755,16 @@ void TraceViewer::DrawStateUI() {
     auto shader = command_processor->active_vertex_shader();
     if (shader) {
       for (const auto& vertex_binding : shader->vertex_bindings()) {
-        int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
-                (vertex_binding.fetch_constant / 3) * 6;
-        const auto group =
-            reinterpret_cast<xe_gpu_fetch_group_t*>(&regs.values[r]);
-        const xe_gpu_vertex_fetch_t* fetch = nullptr;
-        switch (vertex_binding.fetch_constant % 3) {
-          case 0:
-            fetch = &group->vertex_fetch_0;
-            break;
-          case 1:
-            fetch = &group->vertex_fetch_1;
-            break;
-          case 2:
-            fetch = &group->vertex_fetch_2;
-            break;
-        }
-        assert_true(fetch->endian == xenos::Endian::k8in32);
+        xe_gpu_vertex_fetch_t fetch =
+            regs.GetVertexFetch(vertex_binding.fetch_constant);
+        assert_true(fetch.endian == xenos::Endian::k8in32);
         char tree_root_id[32];
         sprintf(tree_root_id, "#vertices_root_%d",
                 vertex_binding.fetch_constant);
         if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s",
-                            vertex_binding.fetch_constant, fetch->address << 2,
-                            fetch->size * 4,
-                            kEndiannessNames[int(fetch->endian)])) {
+                            vertex_binding.fetch_constant, fetch.address << 2,
+                            fetch.size * 4,
+                            kEndiannessNames[int(fetch.endian)])) {
           ImGui::BeginChild("#vertices", ImVec2(0, 300));
           DrawVertexFetcher(shader, vertex_binding, fetch);
           ImGui::EndChild();
@@ -1823,7 +1812,7 @@ void TraceViewer::DrawStateUI() {
       ImGui::Text("f%02d_%d", (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6,
                   (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) % 6);
       ImGui::NextColumn();
-      ImGui::Text("%.8X", regs[i].u32);
+      ImGui::Text("%.8X", regs[i]);
       ImGui::NextColumn();
     }
     ImGui::Columns(1);
@@ -1834,8 +1823,9 @@ void TraceViewer::DrawStateUI() {
          i <= XE_GPU_REG_SHADER_CONSTANT_511_X; i += 4) {
       ImGui::Text("c%d", (i - XE_GPU_REG_SHADER_CONSTANT_000_X) / 4);
       ImGui::NextColumn();
-      ImGui::Text("%f, %f, %f, %f", regs[i + 0].f32, regs[i + 1].f32,
-                  regs[i + 2].f32, regs[i + 3].f32);
+      ImGui::Text("%f, %f, %f, %f", regs.Get<float>(i + 0),
+                  regs.Get<float>(i + 1), regs.Get<float>(i + 2),
+                  regs.Get<float>(i + 3));
       ImGui::NextColumn();
     }
     ImGui::Columns(1);
@@ -1848,7 +1838,7 @@ void TraceViewer::DrawStateUI() {
                   (i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32,
                   (i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32 + 31);
       ImGui::NextColumn();
-      ImGui::Text("%.8X", regs[i].u32);
+      ImGui::Text("%.8X", regs[i]);
       ImGui::NextColumn();
     }
     ImGui::Columns(1);
@@ -1859,7 +1849,7 @@ void TraceViewer::DrawStateUI() {
          i <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31; ++i) {
       ImGui::Text("l%d", i - XE_GPU_REG_SHADER_CONSTANT_LOOP_00);
       ImGui::NextColumn();
-      ImGui::Text("%.8X", regs[i].u32);
+      ImGui::Text("%.8X", regs[i]);
       ImGui::NextColumn();
     }
     ImGui::Columns(1);
diff --git a/src/xenia/gpu/trace_viewer.h b/src/xenia/gpu/trace_viewer.h
index 188a6eb53..e5b7307fb 100644
--- a/src/xenia/gpu/trace_viewer.h
+++ b/src/xenia/gpu/trace_viewer.h
@@ -123,7 +123,7 @@ class TraceViewer : public xe::ui::WindowedApp {
 
   void DrawVertexFetcher(Shader* shader,
                          const Shader::VertexBinding& vertex_binding,
-                         const xenos::xe_gpu_vertex_fetch_t* fetch);
+                         const xenos::xe_gpu_vertex_fetch_t& fetch);
 
   TraceViewerWindowListener window_listener_;
 
diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc
index 118797a5d..317dd1cb7 100644
--- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc
+++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc
@@ -2177,6 +2177,11 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
     return IssueCopy();
   }
 
+  const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
+      GetVulkanProvider().device_info();
+
+  memexport_ranges_.clear();
+
   // Vertex shader analysis.
   auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader());
   if (!vertex_shader) {
@@ -2184,7 +2189,14 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
     return false;
   }
   pipeline_cache_->AnalyzeShaderUcode(*vertex_shader);
-  bool memexport_used_vertex = vertex_shader->memexport_eM_written() != 0;
+  // TODO(Triang3l): If the shader uses memory export, but
+  // vertexPipelineStoresAndAtomics is not supported, convert the vertex shader
+  // to a compute shader and dispatch it after the draw if the draw doesn't use
+  // tessellation.
+  if (vertex_shader->memexport_eM_written() != 0 &&
+      device_info.vertexPipelineStoresAndAtomics) {
+    draw_util::AddMemExportRanges(regs, *vertex_shader, memexport_ranges_);
+  }
 
   // Pixel shader analysis.
   bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs);
@@ -2207,12 +2219,15 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
   } else {
     // Disabling pixel shader for this case is also required by the pipeline
     // cache.
-    if (!memexport_used_vertex) {
+    if (memexport_ranges_.empty()) {
       // This draw has no effect.
       return true;
     }
   }
-  // TODO(Triang3l): Memory export.
+  if (pixel_shader && pixel_shader->memexport_eM_written() != 0 &&
+      device_info.fragmentStoresAndAtomics) {
+    draw_util::AddMemExportRanges(regs, *pixel_shader, memexport_ranges_);
+  }
 
   uint32_t ps_param_gen_pos = UINT32_MAX;
   uint32_t interpolator_mask =
@@ -2428,9 +2443,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
     current_guest_graphics_pipeline_layout_ = pipeline_layout;
   }
 
-  const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
-      GetVulkanProvider().device_info();
-
   bool host_render_targets_used = render_target_cache_->GetPath() ==
                                   RenderTargetCache::Path::kHostRenderTargets;
 
@@ -2503,8 +2515,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
         (uint64_t(1) << (vfetch_index & 63))) {
       continue;
     }
-    const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
-        XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
+    xenos::xe_gpu_vertex_fetch_t vfetch_constant =
+        regs.GetVertexFetch(vfetch_index);
     switch (vfetch_constant.type) {
       case xenos::FetchConstantType::kVertex:
         break;
@@ -2537,9 +2549,39 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
                                                   << (vfetch_index & 63);
   }
 
+  // Synchronize the memory pages backing memory scatter export streams, and
+  // calculate the range that includes the streams for the buffer barrier.
+  uint32_t memexport_extent_start = UINT32_MAX, memexport_extent_end = 0;
+  for (const draw_util::MemExportRange& memexport_range : memexport_ranges_) {
+    uint32_t memexport_range_base_bytes = memexport_range.base_address_dwords
+                                          << 2;
+    if (!shared_memory_->RequestRange(memexport_range_base_bytes,
+                                      memexport_range.size_bytes)) {
+      XELOGE(
+          "Failed to request memexport stream at 0x{:08X} (size {}) in the "
+          "shared memory",
+          memexport_range_base_bytes, memexport_range.size_bytes);
+      return false;
+    }
+    memexport_extent_start =
+        std::min(memexport_extent_start, memexport_range_base_bytes);
+    memexport_extent_end =
+        std::max(memexport_extent_end,
+                 memexport_range_base_bytes + memexport_range.size_bytes);
+  }
+
   // Insert the shared memory barrier if needed.
-  // TODO(Triang3l): Memory export.
-  shared_memory_->Use(VulkanSharedMemory::Usage::kRead);
+  // TODO(Triang3l): Find some PM4 command that can be used for indication of
+  // when memexports should be awaited instead of inserting the barrier in Use
+  // every time if memory export was done in the previous draw?
+  if (memexport_extent_start < memexport_extent_end) {
+    shared_memory_->Use(
+        VulkanSharedMemory::Usage::kGuestDrawReadWrite,
+        std::make_pair(memexport_extent_start,
+                       memexport_extent_end - memexport_extent_start));
+  } else {
+    shared_memory_->Use(VulkanSharedMemory::Usage::kRead);
+  }
 
   // After all commands that may dispatch, copy or insert barriers, submit the
   // barriers (may end the render pass), and (re)enter the render pass before
@@ -2584,6 +2626,12 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
         primitive_processing_result.host_draw_vertex_count, 1, 0, 0, 0);
   }
 
+  // Invalidate textures in memexported memory and watch for changes.
+  for (const draw_util::MemExportRange& memexport_range : memexport_ranges_) {
+    shared_memory_->RangeWrittenByGpu(memexport_range.base_address_dwords << 2,
+                                      memexport_range.size_bytes, false);
+  }
+
   return true;
 }
 
@@ -3306,10 +3354,10 @@ void VulkanCommandProcessor::UpdateDynamicState(
 
     // Blend constants.
     float blend_constants[] = {
-        regs[XE_GPU_REG_RB_BLEND_RED].f32,
-        regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
-        regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
-        regs[XE_GPU_REG_RB_BLEND_ALPHA].f32,
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA),
     };
     dynamic_blend_constants_update_needed_ |=
         std::memcmp(dynamic_blend_constants_, blend_constants,
@@ -3455,7 +3503,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
   const RegisterFile& regs = *register_file_;
   auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
   auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
-  float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
+  auto rb_alpha_ref = regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF);
   auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
   auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
   auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
@@ -3463,7 +3511,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
       regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
   auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
   auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
-  int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
+  auto vgt_indx_offset = regs.Get<int32_t>(XE_GPU_REG_VGT_INDX_OFFSET);
 
   bool edram_fragment_shader_interlock =
       render_target_cache_->GetPath() ==
@@ -3776,7 +3824,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
         dirty |= system_constants_.edram_rt_format_flags[i] != format_flags;
         system_constants_.edram_rt_format_flags[i] = format_flags;
         uint32_t blend_factors_ops =
-            regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
+            regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF;
         dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
                  blend_factors_ops;
         system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
@@ -3805,22 +3853,22 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
     if (primitive_polygonal) {
       if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
         poly_offset_front_scale =
-            regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
+            regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
         poly_offset_front_offset =
-            regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
+            regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
       }
       if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
         poly_offset_back_scale =
-            regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
+            regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
         poly_offset_back_offset =
-            regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
+            regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
       }
     } else {
       if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
         poly_offset_front_scale =
-            regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
+            regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
         poly_offset_front_offset =
-            regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
+            regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
         poly_offset_back_scale = poly_offset_front_scale;
         poly_offset_back_offset = poly_offset_front_offset;
       }
@@ -3883,21 +3931,21 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
     }
 
     dirty |= system_constants_.edram_blend_constant[0] !=
-             regs[XE_GPU_REG_RB_BLEND_RED].f32;
+             regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
     system_constants_.edram_blend_constant[0] =
-        regs[XE_GPU_REG_RB_BLEND_RED].f32;
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
     dirty |= system_constants_.edram_blend_constant[1] !=
-             regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
+             regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
     system_constants_.edram_blend_constant[1] =
-        regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
     dirty |= system_constants_.edram_blend_constant[2] !=
-             regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
+             regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
     system_constants_.edram_blend_constant[2] =
-        regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
     dirty |= system_constants_.edram_blend_constant[3] !=
-             regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
+             regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
     system_constants_.edram_blend_constant[3] =
-        regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
+        regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
   }
 
   if (dirty) {
@@ -3924,10 +3972,10 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
   // These are the constant base addresses/ranges for shaders.
   // We have these hardcoded right now cause nothing seems to differ on the Xbox
   // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
-  assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
-              regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
-  assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
-              regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
+  assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 ||
+              regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000);
+  assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 ||
+              regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000);
   // Check if the float constant layout is still the same and get the counts.
   const Shader::ConstantRegisterMap& float_constant_map_vertex =
       vertex_shader->constant_register_map();
@@ -4022,8 +4070,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
           float_constant_map_entry &= ~(1ull << float_constant_index);
           std::memcpy(mapping,
                       &regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
-                            (float_constant_index << 2)]
-                           .f32,
+                            (float_constant_index << 2)],
                       sizeof(float) * 4);
           mapping += sizeof(float) * 4;
         }
@@ -4054,8 +4101,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
           float_constant_map_entry &= ~(1ull << float_constant_index);
           std::memcpy(mapping,
                       &regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
-                            (float_constant_index << 2)]
-                           .f32,
+                            (float_constant_index << 2)],
                       sizeof(float) * 4);
           mapping += sizeof(float) * 4;
         }
@@ -4076,7 +4122,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
         return false;
       }
       buffer_info.range = VkDeviceSize(kBoolLoopConstantsSize);
-      std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
+      std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031],
                   kBoolLoopConstantsSize);
       current_constant_buffers_up_to_date_ |=
           UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop;
@@ -4094,7 +4140,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
         return false;
       }
       buffer_info.range = VkDeviceSize(kFetchConstantsSize);
-      std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
+      std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0],
                   kFetchConstantsSize);
       current_constant_buffers_up_to_date_ |=
           UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch;
diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h
index 5ebddf604..bd5cfa84f 100644
--- a/src/xenia/gpu/vulkan/vulkan_command_processor.h
+++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h
@@ -744,6 +744,9 @@ class VulkanCommandProcessor final : public CommandProcessor {
 
   // System shader constants.
   SpirvShaderTranslator::SystemConstants system_constants_;
+
+  // Temporary storage for memexport stream constants used in the draw.
+  std::vector<draw_util::MemExportRange> memexport_ranges_;
 };
 
 }  // namespace vulkan
diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc
index f91cc4e6b..eb2ee9b21 100644
--- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc
+++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc
@@ -718,7 +718,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
                       [common_blend_rt_index]),
               (((normalized_color_mask &
                  ~(uint32_t(0b1111) << (4 * common_blend_rt_index)))
-                    ? regs[XE_GPU_REG_RB_COLOR_MASK].u32
+                    ? regs[XE_GPU_REG_RB_COLOR_MASK]
                     : normalized_color_mask) >>
                (4 * common_blend_rt_index)) &
                   0b1111,
diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc
index bf1cda68d..8f7887b4e 100644
--- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc
+++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc
@@ -4156,21 +4156,16 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
               builder.createAccessChain(spv::StorageClassPushConstant,
                                         push_constants, id_vector_temp),
               spv::NoPrecision);
-          spv::Id stencil_sample_passed = builder.createBinOp(
-              spv::OpINotEqual, type_bool,
-              builder.createBinOp(spv::OpBitwiseAnd, type_uint, packed,
-                                  stencil_mask_constant),
-              builder.makeUintConstant(0));
-          spv::Block& stencil_bit_kill_block = builder.makeNewBlock();
-          spv::Block& stencil_bit_merge_block = builder.makeNewBlock();
-          builder.createSelectionMerge(&stencil_bit_merge_block,
-                                       spv::SelectionControlMaskNone);
-          builder.createConditionalBranch(stencil_sample_passed,
-                                          &stencil_bit_merge_block,
-                                          &stencil_bit_kill_block);
-          builder.setBuildPoint(&stencil_bit_kill_block);
+          SpirvBuilder::IfBuilder stencil_kill_if(
+              builder.createBinOp(
+                  spv::OpIEqual, type_bool,
+                  builder.createBinOp(spv::OpBitwiseAnd, type_uint, packed,
+                                      stencil_mask_constant),
+                  builder.makeUintConstant(0)),
+              spv::SelectionControlMaskNone, builder);
           builder.createNoResultOp(spv::OpKill);
-          builder.setBuildPoint(&stencil_bit_merge_block);
+          // OpKill terminates the block.
+          stencil_kill_if.makeEndIf(false);
         }
       } break;
     }
diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc
index a10c1d44e..2ce46119b 100644
--- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc
+++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc
@@ -612,8 +612,8 @@ VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView(
 VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters(
     const VulkanShader::SamplerBinding& binding) const {
   const auto& regs = register_file();
-  const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
-      XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6);
+  xenos::xe_gpu_texture_fetch_t fetch =
+      regs.GetTextureFetch(binding.fetch_constant);
 
   SamplerParameters parameters;
 
@@ -875,8 +875,7 @@ VkImageView VulkanTextureCache::RequestSwapTexture(
     uint32_t& width_scaled_out, uint32_t& height_scaled_out,
     xenos::TextureFormat& format_out) {
   const auto& regs = register_file();
-  const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
-      XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
+  xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
   TextureKey key;
   BindingInfoFromFetchConstant(fetch, key, nullptr);
   if (!key.is_valid || key.base_page == 0 ||
diff --git a/src/xenia/gpu/xenos.cc b/src/xenia/gpu/xenos.cc
index f1a3dff34..3f86ac256 100644
--- a/src/xenia/gpu/xenos.cc
+++ b/src/xenia/gpu/xenos.cc
@@ -8,6 +8,7 @@
  */
 
 #include "xenia/gpu/xenos.h"
+#include "xenia/base/memory.h"
 
 namespace xe {
 namespace gpu {
@@ -22,7 +23,7 @@ namespace xenos {
 float PWLGammaToLinear(float gamma) {
   // Not found in game executables, so just using the logic similar to that in
   // the Source Engine.
-  gamma = xe::saturate_unsigned(gamma);
+  gamma = xe::saturate(gamma);
   float scale, offset;
   // While the compiled code for linear to gamma conversion uses `vcmpgtfp
   // constant, value` comparison (constant > value, or value < constant), it's
@@ -63,7 +64,7 @@ float PWLGammaToLinear(float gamma) {
 }
 
 float LinearToPWLGamma(float linear) {
-  linear = xe::saturate_unsigned(linear);
+  linear = xe::saturate(linear);
   float scale, offset;
   // While the compiled code uses `vcmpgtfp constant, value` comparison
   // (constant > value, or value < constant), it's preferable to use `value >=
@@ -114,8 +115,8 @@ float Float7e3To32(uint32_t f10) {
     exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
     mantissa = (mantissa << mantissa_lzcnt) & 0x7F;
   }
-  uint32_t f32 = ((exponent + 124) << 23) | (mantissa << 3);
-  return *reinterpret_cast<const float*>(&f32);
+  return xe::memory::Reinterpret<float>(
+      uint32_t(((exponent + 124) << 23) | (mantissa << 3)));
 }
 
 // Based on CFloat24 from d3dref9.dll and the 6e4 code from:
@@ -127,7 +128,7 @@ uint32_t Float32To20e4(float f32, bool round_to_nearest_even) noexcept {
     // Positive only, and not -0 or NaN.
     return 0;
   }
-  uint32_t f32u32 = *reinterpret_cast<const uint32_t*>(&f32);
+  auto f32u32 = xe::memory::Reinterpret<uint32_t>(f32);
   if (f32u32 >= 0x3FFFFFF8) {
     // Saturate.
     return 0xFFFFFF;
@@ -161,8 +162,8 @@ float Float20e4To32(uint32_t f24) noexcept {
     exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
     mantissa = (mantissa << mantissa_lzcnt) & 0xFFFFF;
   }
-  uint32_t f32 = ((exponent + 112) << 23) | (mantissa << 3);
-  return *reinterpret_cast<const float*>(&f32);
+  return xe::memory::Reinterpret<float>(
+      uint32_t(((exponent + 112) << 23) | (mantissa << 3)));
 }
 
 const char* GetColorRenderTargetFormatName(ColorRenderTargetFormat format) {
@@ -241,4 +242,4 @@ const char* GetPrimitiveTypeEnglishDescription(xenos::PrimitiveType prim_type) {
 }
 }  // namespace xenos
 }  // namespace gpu
-}  // namespace xe
+}  // namespace xe
\ No newline at end of file
diff --git a/src/xenia/ui/immediate_drawer.cc b/src/xenia/ui/immediate_drawer.cc
index fb00be77f..4d3c6bb4e 100644
--- a/src/xenia/ui/immediate_drawer.cc
+++ b/src/xenia/ui/immediate_drawer.cc
@@ -12,6 +12,7 @@
 #include <algorithm>
 
 #include "xenia/base/assert.h"
+#include "xenia/base/math.h"
 #include "xenia/ui/graphics_util.h"
 #include "xenia/ui/presenter.h"
 
@@ -67,24 +68,19 @@ bool ImmediateDrawer::ScissorToRenderTarget(const ImmediateDraw& immediate_draw,
   }
   float render_target_width_float = float(render_target_width);
   float render_target_height_float = float(render_target_height);
-  // Scale to render target coordinates, drop NaNs (by doing
-  // std::max(0.0f, variable) in this argument order), and clamp to the render
+  // Scale to render target coordinates, drop NaNs, and clamp to the render
   // target size, below which the values are representable as 16p8 fixed-point.
   float scale_x = render_target_width / coordinate_space_width();
   float scale_y = render_target_height / coordinate_space_height();
-  float x0_float =
-      std::min(render_target_width_float,
-               std::max(0.0f, immediate_draw.scissor_left * scale_x));
-  float y0_float =
-      std::min(render_target_height_float,
-               std::max(0.0f, immediate_draw.scissor_top * scale_y));
+  float x0_float = xe::clamp_float(immediate_draw.scissor_left * scale_x, 0.0f,
+                                   render_target_width_float);
+  float y0_float = xe::clamp_float(immediate_draw.scissor_top * scale_y, 0.0f,
+                                   render_target_height_float);
   // Also make sure the size is non-negative.
-  float x1_float =
-      std::min(render_target_width_float,
-               std::max(x0_float, immediate_draw.scissor_right * scale_x));
-  float y1_float =
-      std::min(render_target_height_float,
-               std::max(y0_float, immediate_draw.scissor_bottom * scale_y));
+  float x1_float = xe::clamp_float(immediate_draw.scissor_right * scale_x,
+                                   x0_float, render_target_width_float);
+  float y1_float = xe::clamp_float(immediate_draw.scissor_bottom * scale_y,
+                                   y0_float, render_target_height_float);
   // Top-left - include .5 (0.128 treated as 0 covered, 0.129 as 0 not covered).
   int32_t x0 = (FloatToD3D11Fixed16p8(x0_float) + 127) >> 8;
   int32_t y0 = (FloatToD3D11Fixed16p8(y0_float) + 127) >> 8;
diff --git a/src/xenia/ui/window_android.cc b/src/xenia/ui/window_android.cc
index d67d478d1..8de82f400 100644
--- a/src/xenia/ui/window_android.cc
+++ b/src/xenia/ui/window_android.cc
@@ -153,16 +153,16 @@ bool AndroidWindow::OnActivitySurfaceMotionEvent(jobject event) {
       // with out-of-bounds coordinates), when moving the mouse outside the
       // View, or when starting moving the mouse when the pointer was previously
       // outside the View in some cases.
-      int32_t mouse_x = int32_t(
-          std::min(float(GetActualPhysicalWidth()),
-                   std::max(0.0f, jni_env->CallFloatMethod(
-                                      event, jni_ids.motion_event_get_x, 0))) +
-          0.5f);
-      int32_t mouse_y = int32_t(
-          std::min(float(GetActualPhysicalHeight()),
-                   std::max(0.0f, jni_env->CallFloatMethod(
-                                      event, jni_ids.motion_event_get_y, 0))) +
-          0.5f);
+      int32_t mouse_x =
+          int32_t(xe::clamp_float(jni_env->CallFloatMethod(
+                                      event, jni_ids.motion_event_get_x, 0),
+                                  0.0f, float(GetActualPhysicalWidth())) +
+                  0.5f);
+      int32_t mouse_y =
+          int32_t(xe::clamp_float(jni_env->CallFloatMethod(
+                                      event, jni_ids.motion_event_get_y, 0),
+                                  0.0f, float(GetActualPhysicalHeight())) +
+                  0.5f);
       static const MouseEvent::Button kMouseEventButtons[] = {
           MouseEvent::Button::kLeft,   MouseEvent::Button::kRight,
           MouseEvent::Button::kMiddle, MouseEvent::Button::kX1,