diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc
index bc9b662c5..80ed35551 100644
--- a/src/xenia/app/xenia_main.cc
+++ b/src/xenia/app/xenia_main.cc
@@ -78,7 +78,7 @@ std::unique_ptr<gpu::GraphicsSystem> CreateGraphicsSystem() {
     std::unique_ptr<gpu::GraphicsSystem> best;
 
     best = std::unique_ptr<gpu::GraphicsSystem>(
-        new xe::gpu::gl4::GL4GraphicsSystem());
+        new xe::gpu::vulkan::VulkanGraphicsSystem());
     if (best) {
       return best;
     }
diff --git a/src/xenia/cpu/mmio_handler.cc b/src/xenia/cpu/mmio_handler.cc
index e5412d8e7..3edd9703e 100644
--- a/src/xenia/cpu/mmio_handler.cc
+++ b/src/xenia/cpu/mmio_handler.cc
@@ -87,13 +87,12 @@ bool MMIOHandler::CheckStore(uint32_t virtual_address, uint32_t value) {
   return false;
 }
 
-uintptr_t MMIOHandler::AddPhysicalWriteWatch(uint32_t guest_address,
-                                             size_t length,
-                                             WriteWatchCallback callback,
-                                             void* callback_context,
-                                             void* callback_data) {
-  uint32_t base_address = guest_address;
-  assert_true(base_address < 0x1FFFFFFF);
+uintptr_t MMIOHandler::AddPhysicalAccessWatch(uint32_t guest_address,
+                                              size_t length, WatchType type,
+                                              AccessWatchCallback callback,
+                                              void* callback_context,
+                                              void* callback_data) {
+  uint32_t base_address = guest_address & 0x1FFFFFFF;
 
   // Can only protect sizes matching system page size.
   // This means we need to round up, which will cause spurious access
@@ -103,32 +102,45 @@ uintptr_t MMIOHandler::AddPhysicalWriteWatch(uint32_t guest_address,
                         xe::memory::page_size());
   base_address = base_address - (base_address % xe::memory::page_size());
 
+  auto lock = global_critical_region_.Acquire();
+
   // Add to table. The slot reservation may evict a previous watch, which
   // could include our target, so we do it first.
-  auto entry = new WriteWatchEntry();
+  auto entry = new AccessWatchEntry();
   entry->address = base_address;
   entry->length = uint32_t(length);
   entry->callback = callback;
   entry->callback_context = callback_context;
   entry->callback_data = callback_data;
-  global_critical_region_.mutex().lock();
-  write_watches_.push_back(entry);
-  global_critical_region_.mutex().unlock();
+  access_watches_.push_back(entry);
 
-  // Make the desired range read only under all address spaces.
+  auto page_access = memory::PageAccess::kNoAccess;
+  switch (type) {
+    case kWatchWrite:
+      page_access = memory::PageAccess::kReadOnly;
+      break;
+    case kWatchReadWrite:
+      page_access = memory::PageAccess::kNoAccess;
+      break;
+    default:
+      assert_unhandled_case(type);
+      break;
+  }
+
+  // Protect the range under all address spaces
   memory::Protect(physical_membase_ + entry->address, entry->length,
-                  xe::memory::PageAccess::kReadOnly, nullptr);
+                  page_access, nullptr);
   memory::Protect(virtual_membase_ + 0xA0000000 + entry->address, entry->length,
-                  xe::memory::PageAccess::kReadOnly, nullptr);
+                  page_access, nullptr);
   memory::Protect(virtual_membase_ + 0xC0000000 + entry->address, entry->length,
-                  xe::memory::PageAccess::kReadOnly, nullptr);
+                  page_access, nullptr);
   memory::Protect(virtual_membase_ + 0xE0000000 + entry->address, entry->length,
-                  xe::memory::PageAccess::kReadOnly, nullptr);
+                  page_access, nullptr);
 
   return reinterpret_cast<uintptr_t>(entry);
 }
 
-void MMIOHandler::ClearWriteWatch(WriteWatchEntry* entry) {
+void MMIOHandler::ClearAccessWatch(AccessWatchEntry* entry) {
   memory::Protect(physical_membase_ + entry->address, entry->length,
                   xe::memory::PageAccess::kReadWrite, nullptr);
   memory::Protect(virtual_membase_ + 0xA0000000 + entry->address, entry->length,
@@ -139,19 +151,20 @@ void MMIOHandler::ClearWriteWatch(WriteWatchEntry* entry) {
                   xe::memory::PageAccess::kReadWrite, nullptr);
 }
 
-void MMIOHandler::CancelWriteWatch(uintptr_t watch_handle) {
-  auto entry = reinterpret_cast<WriteWatchEntry*>(watch_handle);
+void MMIOHandler::CancelAccessWatch(uintptr_t watch_handle) {
+  auto entry = reinterpret_cast<AccessWatchEntry*>(watch_handle);
+  auto lock = global_critical_region_.Acquire();
 
   // Allow access to the range again.
-  ClearWriteWatch(entry);
+  ClearAccessWatch(entry);
 
   // Remove from table.
-  global_critical_region_.mutex().lock();
-  auto it = std::find(write_watches_.begin(), write_watches_.end(), entry);
-  if (it != write_watches_.end()) {
-    write_watches_.erase(it);
+  auto it = std::find(access_watches_.begin(), access_watches_.end(), entry);
+  assert_false(it == access_watches_.end());
+
+  if (it != access_watches_.end()) {
+    access_watches_.erase(it);
   }
-  global_critical_region_.mutex().unlock();
 
   delete entry;
 }
@@ -159,18 +172,19 @@ void MMIOHandler::CancelWriteWatch(uintptr_t watch_handle) {
 void MMIOHandler::InvalidateRange(uint32_t physical_address, size_t length) {
   auto lock = global_critical_region_.Acquire();
 
-  for (auto it = write_watches_.begin(); it != write_watches_.end();) {
+  for (auto it = access_watches_.begin(); it != access_watches_.end();) {
     auto entry = *it;
     if ((entry->address <= physical_address &&
          entry->address + entry->length > physical_address) ||
         (entry->address >= physical_address &&
          entry->address < physical_address + length)) {
       // This watch lies within the range. End it.
-      ClearWriteWatch(entry);
+      ClearAccessWatch(entry);
       entry->callback(entry->callback_context, entry->callback_data,
                       entry->address);
 
-      it = write_watches_.erase(it);
+      it = access_watches_.erase(it);
+      delete entry;
       continue;
     }
 
@@ -178,50 +192,49 @@ void MMIOHandler::InvalidateRange(uint32_t physical_address, size_t length) {
   }
 }
 
-bool MMIOHandler::CheckWriteWatch(uint64_t fault_address) {
-  uint32_t physical_address = uint32_t(fault_address);
-  if (physical_address > 0x1FFFFFFF) {
-    physical_address &= 0x1FFFFFFF;
-  }
-  std::list<WriteWatchEntry*> pending_invalidates;
-  global_critical_region_.mutex().lock();
-  // Now that we hold the lock, recheck and see if the pages are still
-  // protected.
-  memory::PageAccess cur_access;
-  size_t page_length = memory::page_size();
-  memory::QueryProtect((void*)fault_address, page_length, cur_access);
-  if (cur_access != memory::PageAccess::kReadOnly &&
-      cur_access != memory::PageAccess::kNoAccess) {
-    // Another thread has cleared this write watch. Abort.
-    global_critical_region_.mutex().unlock();
-    return true;
+bool MMIOHandler::IsRangeWatched(uint32_t physical_address, size_t length) {
+  auto lock = global_critical_region_.Acquire();
+
+  for (auto it = access_watches_.begin(); it != access_watches_.end(); ++it) {
+    auto entry = *it;
+    if ((entry->address <= physical_address &&
+         entry->address + entry->length > physical_address) ||
+        (entry->address >= physical_address &&
+         entry->address < physical_address + length)) {
+      // This watch lies within the range.
+      return true;
+    }
   }
 
-  for (auto it = write_watches_.begin(); it != write_watches_.end();) {
+  return false;
+}
+
+bool MMIOHandler::CheckAccessWatch(uint32_t physical_address) {
+  auto lock = global_critical_region_.Acquire();
+
+  bool hit = false;
+  for (auto it = access_watches_.begin(); it != access_watches_.end();) {
     auto entry = *it;
     if (entry->address <= physical_address &&
         entry->address + entry->length > physical_address) {
-      // Hit! Remove the writewatch.
-      pending_invalidates.push_back(entry);
+      // Hit! Remove the watch.
+      hit = true;
+      ClearAccessWatch(entry);
+      entry->callback(entry->callback_context, entry->callback_data,
+                      physical_address);
 
-      ClearWriteWatch(entry);
-      it = write_watches_.erase(it);
+      it = access_watches_.erase(it);
+      delete entry;
       continue;
     }
     ++it;
   }
-  global_critical_region_.mutex().unlock();
-  if (pending_invalidates.empty()) {
+
+  if (!hit) {
     // Rethrow access violation - range was not being watched.
     return false;
   }
-  while (!pending_invalidates.empty()) {
-    auto entry = pending_invalidates.back();
-    pending_invalidates.pop_back();
-    entry->callback(entry->callback_context, entry->callback_data,
-                    physical_address);
-    delete entry;
-  }
+
   // Range was watched, so lets eat this access violation.
   return true;
 }
@@ -414,9 +427,33 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
     }
   }
   if (!range) {
+    auto fault_address = reinterpret_cast<uint8_t*>(ex->fault_address());
+    uint32_t guest_address = 0;
+    if (fault_address >= virtual_membase_ &&
+        fault_address < physical_membase_) {
+      // Faulting on a virtual address.
+      guest_address = static_cast<uint32_t>(ex->fault_address()) & 0x1FFFFFFF;
+    } else {
+      // Faulting on a physical address.
+      guest_address = static_cast<uint32_t>(ex->fault_address());
+    }
+
+    // HACK: Recheck if the pages are still protected (race condition - another
+    // thread clears the writewatch we just hit)
+    // Do this under the lock so we don't introduce another race condition.
+    auto lock = global_critical_region_.Acquire();
+    memory::PageAccess cur_access;
+    size_t page_length = memory::page_size();
+    memory::QueryProtect((void*)fault_address, page_length, cur_access);
+    if (cur_access != memory::PageAccess::kReadOnly &&
+        cur_access != memory::PageAccess::kNoAccess) {
+      // Another thread has cleared this write watch. Abort.
+      return true;
+    }
+
     // Access is not found within any range, so fail and let the caller handle
     // it (likely by aborting).
-    return CheckWriteWatch(ex->fault_address());
+    return CheckAccessWatch(guest_address);
   }
 
   auto rip = ex->pc();
diff --git a/src/xenia/cpu/mmio_handler.h b/src/xenia/cpu/mmio_handler.h
index 70d89ac02..bb8cd665f 100644
--- a/src/xenia/cpu/mmio_handler.h
+++ b/src/xenia/cpu/mmio_handler.h
@@ -28,9 +28,8 @@ typedef uint32_t (*MMIOReadCallback)(void* ppc_context, void* callback_context,
                                      uint32_t addr);
 typedef void (*MMIOWriteCallback)(void* ppc_context, void* callback_context,
                                   uint32_t addr, uint32_t value);
-
-typedef void (*WriteWatchCallback)(void* context_ptr, void* data_ptr,
-                                   uint32_t address);
+typedef void (*AccessWatchCallback)(void* context_ptr, void* data_ptr,
+                                    uint32_t address);
 
 struct MMIORange {
   uint32_t address;
@@ -46,6 +45,12 @@ class MMIOHandler {
  public:
   virtual ~MMIOHandler();
 
+  enum WatchType {
+    kWatchInvalid = 0,
+    kWatchWrite = 1,
+    kWatchReadWrite = 2,
+  };
+
   static std::unique_ptr<MMIOHandler> Install(uint8_t* virtual_membase,
                                               uint8_t* physical_membase,
                                               uint8_t* membase_end);
@@ -59,17 +64,24 @@ class MMIOHandler {
   bool CheckLoad(uint32_t virtual_address, uint32_t* out_value);
   bool CheckStore(uint32_t virtual_address, uint32_t value);
 
-  uintptr_t AddPhysicalWriteWatch(uint32_t guest_address, size_t length,
-                                  WriteWatchCallback callback,
-                                  void* callback_context, void* callback_data);
-  void CancelWriteWatch(uintptr_t watch_handle);
+  // Memory watches: These are one-shot alarms that fire a callback (in the
+  // context of the thread that caused the callback) when a memory range is
+  // either written to or read from, depending on the watch type. These fire as
+  // soon as a read/write happens, and only fire once.
+  // These watches may be spuriously fired if memory is accessed nearby.
+  uintptr_t AddPhysicalAccessWatch(uint32_t guest_address, size_t length,
+                                   WatchType type, AccessWatchCallback callback,
+                                   void* callback_context, void* callback_data);
+  void CancelAccessWatch(uintptr_t watch_handle);
   void InvalidateRange(uint32_t physical_address, size_t length);
+  bool IsRangeWatched(uint32_t physical_address, size_t length);
 
  protected:
-  struct WriteWatchEntry {
+  struct AccessWatchEntry {
     uint32_t address;
     uint32_t length;
-    WriteWatchCallback callback;
+    WatchType type;
+    AccessWatchCallback callback;
     void* callback_context;
     void* callback_data;
   };
@@ -83,8 +95,8 @@ class MMIOHandler {
   static bool ExceptionCallbackThunk(Exception* ex, void* data);
   bool ExceptionCallback(Exception* ex);
 
-  void ClearWriteWatch(WriteWatchEntry* entry);
-  bool CheckWriteWatch(uint64_t fault_address);
+  void ClearAccessWatch(AccessWatchEntry* entry);
+  bool CheckAccessWatch(uint32_t guest_address);
 
   uint8_t* virtual_membase_;
   uint8_t* physical_membase_;
@@ -94,7 +106,7 @@ class MMIOHandler {
 
   xe::global_critical_region global_critical_region_;
   // TODO(benvanik): data structure magic.
-  std::list<WriteWatchEntry*> write_watches_;
+  std::list<AccessWatchEntry*> access_watches_;
 
   static MMIOHandler* global_handler_;
 };
diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h
index f2fbb6c54..c2784480b 100644
--- a/src/xenia/gpu/command_processor.h
+++ b/src/xenia/gpu/command_processor.h
@@ -84,9 +84,9 @@ class CommandProcessor {
     swap_request_handler_ = fn;
   }
 
-  void RequestFrameTrace(const std::wstring& root_path);
-  void BeginTracing(const std::wstring& root_path);
-  void EndTracing();
+  virtual void RequestFrameTrace(const std::wstring& root_path);
+  virtual void BeginTracing(const std::wstring& root_path);
+  virtual void EndTracing();
 
   void InitializeRingBuffer(uint32_t ptr, uint32_t page_count);
   void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
diff --git a/src/xenia/gpu/gl4/texture_cache.cc b/src/xenia/gpu/gl4/texture_cache.cc
index 4a8917e71..72e1c9639 100644
--- a/src/xenia/gpu/gl4/texture_cache.cc
+++ b/src/xenia/gpu/gl4/texture_cache.cc
@@ -427,7 +427,7 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
   // Not found, create.
   auto entry = std::make_unique<TextureEntry>();
   entry->texture_info = texture_info;
-  entry->write_watch_handle = 0;
+  entry->access_watch_handle = 0;
   entry->pending_invalidation = false;
   entry->handle = 0;
 
@@ -442,6 +442,7 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
       // Found! Acquire the handle and remove the readbuffer entry.
       read_buffer_textures_.erase(it);
       entry->handle = read_buffer_entry->handle;
+      entry->access_watch_handle = read_buffer_entry->access_watch_handle;
       delete read_buffer_entry;
       // TODO(benvanik): set more texture properties? swizzle/etc?
       auto entry_ptr = entry.get();
@@ -495,14 +496,15 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
   // Add a write watch. If any data in the given range is touched we'll get a
   // callback and evict the texture. We could reuse the storage, though the
   // driver is likely in a better position to pool that kind of stuff.
-  entry->write_watch_handle = memory_->AddPhysicalWriteWatch(
+  entry->access_watch_handle = memory_->AddPhysicalAccessWatch(
       texture_info.guest_address, texture_info.input_length,
+      cpu::MMIOHandler::kWatchWrite,
       [](void* context_ptr, void* data_ptr, uint32_t address) {
         auto self = reinterpret_cast<TextureCache*>(context_ptr);
         auto touched_entry = reinterpret_cast<TextureEntry*>(data_ptr);
         // Clear watch handle first so we don't redundantly
         // remove.
-        touched_entry->write_watch_handle = 0;
+        touched_entry->access_watch_handle = 0;
         touched_entry->pending_invalidation = true;
         // Add to pending list so Scavenge will clean it up.
         self->invalidated_textures_mutex_.lock();
@@ -574,14 +576,27 @@ GLuint TextureCache::ConvertTexture(Blitter* blitter, uint32_t guest_address,
                                   dest_rect, GL_LINEAR, swap_channels);
     }
 
-    // HACK: remove texture from write watch list so readback won't kill us.
-    // Not needed now, as readback is disabled.
-    /*
-    if (texture_entry->write_watch_handle) {
-      memory_->CancelWriteWatch(texture_entry->write_watch_handle);
-      texture_entry->write_watch_handle = 0;
+    // Setup a read/write access watch. If the game tries to touch the memory
+    // we were supposed to populate with this texture, then we'll actually
+    // populate it.
+    if (texture_entry->access_watch_handle) {
+      memory_->CancelAccessWatch(texture_entry->access_watch_handle);
+      texture_entry->access_watch_handle = 0;
     }
-    //*/
+
+    texture_entry->access_watch_handle = memory_->AddPhysicalAccessWatch(
+        guest_address, texture_entry->texture_info.input_length,
+        cpu::MMIOHandler::kWatchReadWrite,
+        [](void* context, void* data, uint32_t address) {
+          auto touched_entry = reinterpret_cast<TextureEntry*>(data);
+          touched_entry->access_watch_handle = 0;
+
+          // This happens. RDR resolves to a texture then upsizes it, BF1943
+          // writes to a resolved texture.
+          // TODO (for Vulkan): Copy this texture back into system memory.
+          // assert_always();
+        },
+        nullptr, texture_entry);
 
     return texture_entry->handle;
   }
@@ -618,6 +633,20 @@ GLuint TextureCache::ConvertTexture(Blitter* blitter, uint32_t guest_address,
   entry->block_height = block_height;
   entry->format = format;
 
+  entry->access_watch_handle = memory_->AddPhysicalAccessWatch(
+      guest_address, block_height * block_width * 4,
+      cpu::MMIOHandler::kWatchReadWrite,
+      [](void* context, void* data, uint32_t address) {
+        auto entry = reinterpret_cast<ReadBufferTexture*>(data);
+        entry->access_watch_handle = 0;
+
+        // This happens. RDR resolves to a texture then upsizes it, BF1943
+        // writes to a resolved texture.
+        // TODO (for Vulkan): Copy this texture back into system memory.
+        // assert_always();
+      },
+      nullptr, entry.get());
+
   glCreateTextures(GL_TEXTURE_2D, 1, &entry->handle);
   glTextureParameteri(entry->handle, GL_TEXTURE_BASE_LEVEL, 0);
   glTextureParameteri(entry->handle, GL_TEXTURE_MAX_LEVEL, 1);
@@ -636,9 +665,9 @@ GLuint TextureCache::ConvertTexture(Blitter* blitter, uint32_t guest_address,
 }
 
 void TextureCache::EvictTexture(TextureEntry* entry) {
-  if (entry->write_watch_handle) {
-    memory_->CancelWriteWatch(entry->write_watch_handle);
-    entry->write_watch_handle = 0;
+  if (entry->access_watch_handle) {
+    memory_->CancelAccessWatch(entry->access_watch_handle);
+    entry->access_watch_handle = 0;
   }
 
   for (auto& view : entry->views) {
diff --git a/src/xenia/gpu/gl4/texture_cache.h b/src/xenia/gpu/gl4/texture_cache.h
index d214dac53..d55aa37a1 100644
--- a/src/xenia/gpu/gl4/texture_cache.h
+++ b/src/xenia/gpu/gl4/texture_cache.h
@@ -44,7 +44,7 @@ class TextureCache {
   };
   struct TextureEntry {
     TextureInfo texture_info;
-    uintptr_t write_watch_handle;
+    uintptr_t access_watch_handle;
     GLuint handle;
     bool pending_invalidation;
     std::vector<std::unique_ptr<TextureEntryView>> views;
@@ -74,8 +74,12 @@ class TextureCache {
                         TextureFormat format, bool swap_channels,
                         GLuint src_texture, Rect2D src_rect, Rect2D dest_rect);
 
+  TextureEntry* LookupAddress(uint32_t guest_address, uint32_t width,
+                              uint32_t height, TextureFormat format);
+
  private:
   struct ReadBufferTexture {
+    uintptr_t access_watch_handle;
     uint32_t guest_address;
     uint32_t logical_width;
     uint32_t logical_height;
@@ -90,8 +94,6 @@ class TextureCache {
   void EvictSampler(SamplerEntry* entry);
   TextureEntry* LookupOrInsertTexture(const TextureInfo& texture_info,
                                       uint64_t opt_hash = 0);
-  TextureEntry* LookupAddress(uint32_t guest_address, uint32_t width,
-                              uint32_t height, TextureFormat format);
   void EvictTexture(TextureEntry* entry);
 
   bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info);
diff --git a/src/xenia/gpu/premake5.lua b/src/xenia/gpu/premake5.lua
index 1f6a1eea6..1c7870edc 100644
--- a/src/xenia/gpu/premake5.lua
+++ b/src/xenia/gpu/premake5.lua
@@ -22,6 +22,8 @@ project("xenia-gpu")
     project_root.."/third_party/gflags/src",
   })
   local_platform_files()
+  local_platform_files("spirv")
+  local_platform_files("spirv/passes")
 
 group("src")
 project("xenia-gpu-shader-compiler")
diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h
index 476369e53..7e0cd3ab2 100644
--- a/src/xenia/gpu/shader.h
+++ b/src/xenia/gpu/shader.h
@@ -99,6 +99,17 @@ struct InstructionResult {
   bool has_all_writes() const {
     return write_mask[0] && write_mask[1] && write_mask[2] && write_mask[3];
   }
+  // Returns number of components written
+  uint32_t num_writes() const {
+    uint32_t total = 0;
+    for (int i = 0; i < 4; i++) {
+      if (write_mask[i]) {
+        total++;
+      }
+    }
+
+    return total;
+  }
   // Returns true if any non-constant components are written.
   bool stores_non_constants() const {
     for (int i = 0; i < 4; ++i) {
@@ -547,6 +558,9 @@ class Shader {
   // True if the shader was translated and prepared without error.
   bool is_valid() const { return is_valid_; }
 
+  // True if the shader has already been translated.
+  bool is_translated() const { return is_translated_; }
+
   // Errors that occurred during translation.
   const std::vector<Error>& errors() const { return errors_; }
 
@@ -591,6 +605,7 @@ class Shader {
   bool writes_color_targets_[4] = {false, false, false, false};
 
   bool is_valid_ = false;
+  bool is_translated_ = false;
   std::vector<Error> errors_;
 
   std::string ucode_disassembly_;
diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc
index a89be80f5..5bb9ba016 100644
--- a/src/xenia/gpu/shader_translator.cc
+++ b/src/xenia/gpu/shader_translator.cc
@@ -51,6 +51,7 @@ void ShaderTranslator::Reset() {
   ucode_disasm_buffer_.Reset();
   ucode_disasm_line_number_ = 0;
   previous_ucode_disasm_scan_offset_ = 0;
+  register_count_ = 64;
   total_attrib_count_ = 0;
   vertex_bindings_.clear();
   texture_bindings_.clear();
@@ -95,9 +96,21 @@ bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) {
   return true;
 }
 
+bool ShaderTranslator::Translate(Shader* shader,
+                                 xenos::xe_gpu_program_cntl_t cntl) {
+  Reset();
+  register_count_ = shader->type() == ShaderType::kVertex ? cntl.vs_regs + 1
+                                                          : cntl.ps_regs + 1;
+
+  return TranslateInternal(shader);
+}
+
 bool ShaderTranslator::Translate(Shader* shader) {
   Reset();
+  return TranslateInternal(shader);
+}
 
+bool ShaderTranslator::TranslateInternal(Shader* shader) {
   shader_type_ = shader->type();
   ucode_dwords_ = shader->ucode_dwords();
   ucode_dword_count_ = shader->ucode_dword_count();
@@ -155,6 +168,7 @@ bool ShaderTranslator::Translate(Shader* shader) {
   }
 
   shader->is_valid_ = true;
+  shader->is_translated_ = true;
   for (const auto& error : shader->errors_) {
     if (error.is_fatal) {
       shader->is_valid_ = false;
@@ -369,9 +383,9 @@ bool ShaderTranslator::TranslateBlocks() {
     AddControlFlowTargetLabel(cf_a, &label_addresses);
     AddControlFlowTargetLabel(cf_b, &label_addresses);
 
-    PreProcessControlFlowInstruction(cf_index);
+    PreProcessControlFlowInstruction(cf_index, cf_a);
     ++cf_index;
-    PreProcessControlFlowInstruction(cf_index);
+    PreProcessControlFlowInstruction(cf_index, cf_b);
     ++cf_index;
   }
 
@@ -672,11 +686,11 @@ void ShaderTranslator::TranslateExecInstructions(
           static_cast<FetchOpcode>(ucode_dwords_[instr_offset * 3] & 0x1F);
       if (fetch_opcode == FetchOpcode::kVertexFetch) {
         auto& op = *reinterpret_cast<const VertexFetchInstruction*>(
-                       ucode_dwords_ + instr_offset * 3);
+            ucode_dwords_ + instr_offset * 3);
         TranslateVertexFetchInstruction(op);
       } else {
         auto& op = *reinterpret_cast<const TextureFetchInstruction*>(
-                       ucode_dwords_ + instr_offset * 3);
+            ucode_dwords_ + instr_offset * 3);
         TranslateTextureFetchInstruction(op);
       }
     } else {
@@ -986,16 +1000,19 @@ void ShaderTranslator::TranslateAluInstruction(const AluInstruction& op) {
     return;
   }
 
+  ParsedAluInstruction instr;
   if (op.has_vector_op()) {
     const auto& opcode_info =
         alu_vector_opcode_infos_[static_cast<int>(op.vector_opcode())];
-    ParseAluVectorInstruction(op, opcode_info);
+    ParseAluVectorInstruction(op, opcode_info, instr);
+    ProcessAluInstruction(instr);
   }
 
   if (op.has_scalar_op()) {
     const auto& opcode_info =
         alu_scalar_opcode_infos_[static_cast<int>(op.scalar_opcode())];
-    ParseAluScalarInstruction(op, opcode_info);
+    ParseAluScalarInstruction(op, opcode_info, instr);
+    ProcessAluInstruction(instr);
   }
 }
 
@@ -1044,9 +1061,8 @@ void ParseAluInstructionOperand(const AluInstruction& op, int i,
     uint32_t a = swizzle & 0x3;
     out_op->components[0] = GetSwizzleFromComponentIndex(a);
   } else if (swizzle_component_count == 2) {
-    swizzle >>= 4;
-    uint32_t a = ((swizzle >> 2) + 3) & 0x3;
-    uint32_t b = (swizzle + 2) & 0x3;
+    uint32_t a = ((swizzle >> 6) + 3) & 0x3;
+    uint32_t b = ((swizzle >> 0) + 0) & 0x3;
     out_op->components[0] = GetSwizzleFromComponentIndex(a);
     out_op->components[1] = GetSwizzleFromComponentIndex(b);
   } else {
@@ -1088,8 +1104,8 @@ void ParseAluInstructionOperandSpecial(const AluInstruction& op,
 }
 
 void ShaderTranslator::ParseAluVectorInstruction(
-    const AluInstruction& op, const AluOpcodeInfo& opcode_info) {
-  ParsedAluInstruction i;
+    const AluInstruction& op, const AluOpcodeInfo& opcode_info,
+    ParsedAluInstruction& i) {
   i.dword_index = 0;
   i.type = ParsedAluInstruction::Type::kVector;
   i.vector_opcode = op.vector_opcode();
@@ -1126,6 +1142,10 @@ void ShaderTranslator::ParseAluVectorInstruction(
         } else {
           // Unimplemented.
           // assert_always();
+          XELOGE(
+              "ShaderTranslator::ParseAluVectorInstruction: Unsupported write "
+              "to export %d",
+              dest_num);
           i.result.storage_target = InstructionStorageTarget::kNone;
           i.result.storage_index = 0;
         }
@@ -1203,13 +1223,11 @@ void ShaderTranslator::ParseAluVectorInstruction(
   }
 
   i.Disassemble(&ucode_disasm_buffer_);
-
-  ProcessAluInstruction(i);
 }
 
 void ShaderTranslator::ParseAluScalarInstruction(
-    const AluInstruction& op, const AluOpcodeInfo& opcode_info) {
-  ParsedAluInstruction i;
+    const AluInstruction& op, const AluOpcodeInfo& opcode_info,
+    ParsedAluInstruction& i) {
   i.dword_index = 0;
   i.type = ParsedAluInstruction::Type::kScalar;
   i.scalar_opcode = op.scalar_opcode();
@@ -1319,8 +1337,6 @@ void ShaderTranslator::ParseAluScalarInstruction(
   }
 
   i.Disassemble(&ucode_disasm_buffer_);
-
-  ProcessAluInstruction(i);
 }
 
 }  // namespace gpu
diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h
index d1b27a997..9801cb2d6 100644
--- a/src/xenia/gpu/shader_translator.h
+++ b/src/xenia/gpu/shader_translator.h
@@ -30,6 +30,7 @@ class ShaderTranslator {
   // DEPRECATED(benvanik): remove this when shader cache is removed.
   bool GatherAllBindingInformation(Shader* shader);
 
+  bool Translate(Shader* shader, xenos::xe_gpu_program_cntl_t cntl);
   bool Translate(Shader* shader);
 
  protected:
@@ -38,6 +39,8 @@ class ShaderTranslator {
   // Resets translator state before beginning translation.
   virtual void Reset();
 
+  // Register count.
+  uint32_t register_count() const { return register_count_; }
   // True if the current shader is a vertex shader.
   bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; }
   // True if the current shader is a pixel shader.
@@ -79,7 +82,8 @@ class ShaderTranslator {
   }
 
   // Pre-process a control-flow instruction before anything else.
-  virtual void PreProcessControlFlowInstruction(uint32_t cf_index) {}
+  virtual void PreProcessControlFlowInstruction(
+      uint32_t cf_index, const ucode::ControlFlowInstruction& instr) {}
 
   // Handles translation for control flow label addresses.
   // This is triggered once for each label required (due to control flow
@@ -131,6 +135,8 @@ class ShaderTranslator {
     int src_swizzle_component_count;
   };
 
+  bool TranslateInternal(Shader* shader);
+
   void MarkUcodeInstruction(uint32_t dword_offset);
   void AppendUcodeDisasm(char c);
   void AppendUcodeDisasm(const char* value);
@@ -173,14 +179,18 @@ class ShaderTranslator {
 
   void TranslateAluInstruction(const ucode::AluInstruction& op);
   void ParseAluVectorInstruction(const ucode::AluInstruction& op,
-                                 const AluOpcodeInfo& opcode_info);
+                                 const AluOpcodeInfo& opcode_info,
+                                 ParsedAluInstruction& instr);
   void ParseAluScalarInstruction(const ucode::AluInstruction& op,
-                                 const AluOpcodeInfo& opcode_info);
+                                 const AluOpcodeInfo& opcode_info,
+                                 ParsedAluInstruction& instr);
 
   // Input shader metadata and microcode.
   ShaderType shader_type_;
   const uint32_t* ucode_dwords_;
   size_t ucode_dword_count_;
+  xenos::xe_gpu_program_cntl_t program_cntl_;
+  uint32_t register_count_;
 
   // Accumulated translation errors.
   std::vector<Shader::Error> errors_;
diff --git a/src/xenia/gpu/spirv/compiler.cc b/src/xenia/gpu/spirv/compiler.cc
new file mode 100644
index 000000000..d31b36996
--- /dev/null
+++ b/src/xenia/gpu/spirv/compiler.cc
@@ -0,0 +1,36 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/gpu/spirv/compiler.h"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+Compiler::Compiler() {}
+
+void Compiler::AddPass(std::unique_ptr<CompilerPass> pass) {
+  compiler_passes_.push_back(std::move(pass));
+}
+
+bool Compiler::Compile(spv::Module* module) {
+  for (auto& pass : compiler_passes_) {
+    if (!pass->Run(module)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void Compiler::Reset() { compiler_passes_.clear(); }
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
\ No newline at end of file
diff --git a/src/xenia/gpu/spirv/compiler.h b/src/xenia/gpu/spirv/compiler.h
new file mode 100644
index 000000000..fd27969ee
--- /dev/null
+++ b/src/xenia/gpu/spirv/compiler.h
@@ -0,0 +1,41 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_SPIRV_COMPILER_H_
+#define XENIA_GPU_SPIRV_COMPILER_H_
+
+#include "xenia/base/arena.h"
+#include "xenia/gpu/spirv/compiler_pass.h"
+
+#include "third_party/glslang-spirv/SpvBuilder.h"
+#include "third_party/spirv/GLSL.std.450.hpp11"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+// SPIR-V Compiler. Designed to optimize SPIR-V code before feeding it into the
+// drivers.
+class Compiler {
+ public:
+  Compiler();
+
+  void AddPass(std::unique_ptr<CompilerPass> pass);
+  void Reset();
+  bool Compile(spv::Module* module);
+
+ private:
+  std::vector<std::unique_ptr<CompilerPass>> compiler_passes_;
+};
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
+
+#endif  // XENIA_GPU_SPIRV_COMPILER_H_
\ No newline at end of file
diff --git a/src/xenia/gpu/spirv/compiler_pass.h b/src/xenia/gpu/spirv/compiler_pass.h
new file mode 100644
index 000000000..0d81aeeee
--- /dev/null
+++ b/src/xenia/gpu/spirv/compiler_pass.h
@@ -0,0 +1,37 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_SPIRV_COMPILER_PASS_H_
+#define XENIA_GPU_SPIRV_COMPILER_PASS_H_
+
+#include "xenia/base/arena.h"
+
+#include "third_party/glslang-spirv/SpvBuilder.h"
+#include "third_party/spirv/GLSL.std.450.hpp11"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+class CompilerPass {
+ public:
+  CompilerPass() = default;
+  virtual ~CompilerPass() {}
+
+  virtual bool Run(spv::Module* module) = 0;
+
+ private:
+  xe::Arena ir_arena_;
+};
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
+
+#endif
\ No newline at end of file
diff --git a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp
new file mode 100644
index 000000000..4d719f769
--- /dev/null
+++ b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp
@@ -0,0 +1,30 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+ControlFlowAnalysisPass::ControlFlowAnalysisPass() {}
+
+bool ControlFlowAnalysisPass::Run(spv::Module* module) {
+  for (auto function : module->getFunctions()) {
+    // For each OpBranchConditional, see if we can find a point where control
+    // flow converges and then append an OpSelectionMerge.
+    // Potential problems: while loops constructed from branch instructions
+  }
+
+  return true;
+}
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
\ No newline at end of file
diff --git a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h
new file mode 100644
index 000000000..6b279e251
--- /dev/null
+++ b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h
@@ -0,0 +1,34 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
+#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
+
+#include "xenia/gpu/spirv/compiler_pass.h"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+// Control-flow analysis pass. Runs through control-flow and adds merge opcodes
+// where necessary.
+class ControlFlowAnalysisPass : public CompilerPass {
+ public:
+  ControlFlowAnalysisPass();
+
+  bool Run(spv::Module* module) override;
+
+ private:
+};
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
+
+#endif  // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
\ No newline at end of file
diff --git a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc
new file mode 100644
index 000000000..7b01aa5aa
--- /dev/null
+++ b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc
@@ -0,0 +1,48 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+ControlFlowSimplificationPass::ControlFlowSimplificationPass() {}
+
+bool ControlFlowSimplificationPass::Run(spv::Module* module) {
+  for (auto function : module->getFunctions()) {
+    // Walk through the blocks in the function and merge any blocks which are
+    // unconditionally dominated.
+    for (auto it = function->getBlocks().end() - 1;
+         it != function->getBlocks().begin() - 1;) {
+      auto block = *it;
+      if (!block->isUnreachable() && block->getPredecessors().size() == 1) {
+        auto prev_block = block->getPredecessors()[0];
+        auto last_instr =
+            prev_block->getInstruction(prev_block->getInstructionCount() - 1);
+        if (last_instr->getOpCode() == spv::Op::OpBranch) {
+          if (prev_block->getSuccessors().size() == 1 &&
+              prev_block->getSuccessors()[0] == block) {
+            // We're dominated by this block. Merge into it.
+            prev_block->merge(block);
+            block->setUnreachable();
+          }
+        }
+      }
+
+      --it;
+    }
+  }
+
+  return true;
+}
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
\ No newline at end of file
diff --git a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h
new file mode 100644
index 000000000..f851d24f1
--- /dev/null
+++ b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h
@@ -0,0 +1,34 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
+#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
+
+#include "xenia/gpu/spirv/compiler_pass.h"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+// Control-flow simplification pass. Combines adjacent blocks and marks
+// any unreachable blocks.
+class ControlFlowSimplificationPass : public CompilerPass {
+ public:
+  ControlFlowSimplificationPass();
+
+  bool Run(spv::Module* module) override;
+
+ private:
+};
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
+
+#endif  // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
\ No newline at end of file
diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc
index bdd4c7e97..229951c8e 100644
--- a/src/xenia/gpu/spirv_shader_translator.cc
+++ b/src/xenia/gpu/spirv_shader_translator.cc
@@ -2,7 +2,7 @@
  ******************************************************************************
  * Xenia : Xbox 360 Emulator Research Project                                 *
  ******************************************************************************
- * Copyright 2015 Ben Vanik. All rights reserved.                             *
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
  * Released under the BSD license - see LICENSE in the root for more details. *
  ******************************************************************************
  */
@@ -12,16 +12,24 @@
 #include <cstring>
 
 #include "xenia/base/logging.h"
+#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h"
+#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h"
 
 namespace xe {
 namespace gpu {
 using namespace ucode;
 
+constexpr uint32_t kMaxInterpolators = 16;
+constexpr uint32_t kMaxTemporaryRegisters = 64;
+
 using spv::GLSLstd450;
 using spv::Id;
 using spv::Op;
 
-SpirvShaderTranslator::SpirvShaderTranslator() = default;
+SpirvShaderTranslator::SpirvShaderTranslator() {
+  compiler_.AddPass(std::make_unique<spirv::ControlFlowSimplificationPass>());
+  compiler_.AddPass(std::make_unique<spirv::ControlFlowAnalysisPass>());
+}
 
 SpirvShaderTranslator::~SpirvShaderTranslator() = default;
 
@@ -39,6 +47,7 @@ void SpirvShaderTranslator::StartTranslation() {
                    spv::MemoryModel::MemoryModelGLSL450);
   b.addCapability(spv::Capability::CapabilityShader);
   b.addCapability(spv::Capability::CapabilityGenericPointer);
+
   if (is_vertex_shader()) {
     b.addCapability(spv::Capability::CapabilityClipDistance);
     b.addCapability(spv::Capability::CapabilityCullDistance);
@@ -48,18 +57,18 @@ void SpirvShaderTranslator::StartTranslation() {
   }
 
   spv::Block* function_block = nullptr;
-  translated_main_ = b.makeFunctionEntry(spv::Decoration::DecorationInvariant,
-                                         b.makeVoidType(), "translated_main",
-                                         {}, {}, &function_block);
+  translated_main_ =
+      b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "translated_main",
+                          {}, {}, &function_block);
 
   bool_type_ = b.makeBoolType();
   float_type_ = b.makeFloatType(32);
   int_type_ = b.makeIntType(32);
-  Id uint_type = b.makeUintType(32);
+  uint_type_ = b.makeUintType(32);
   vec2_float_type_ = b.makeVectorType(float_type_, 2);
   vec3_float_type_ = b.makeVectorType(float_type_, 3);
   vec4_float_type_ = b.makeVectorType(float_type_, 4);
-  vec4_uint_type_ = b.makeVectorType(uint_type, 4);
+  vec4_uint_type_ = b.makeVectorType(uint_type_, 4);
   vec4_bool_type_ = b.makeVectorType(bool_type_, 4);
 
   vec4_float_one_ = b.makeCompositeConstant(
@@ -71,8 +80,8 @@ void SpirvShaderTranslator::StartTranslation() {
       std::vector<Id>({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f),
                        b.makeFloatConstant(0.f), b.makeFloatConstant(0.f)}));
 
-  registers_type_ =
-      b.makeArrayType(vec4_float_type_, b.makeUintConstant(64), 0);
+  registers_type_ = b.makeArrayType(vec4_float_type_,
+                                    b.makeUintConstant(register_count()), 0);
   registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassFunction,
                                     registers_type_, "r");
 
@@ -85,16 +94,14 @@ void SpirvShaderTranslator::StartTranslation() {
                          "ps");
   pv_ = b.createVariable(spv::StorageClass::StorageClassFunction,
                          vec4_float_type_, "pv");
-  a0_ = b.createVariable(spv::StorageClass::StorageClassFunction,
-                         b.makeUintType(32), "a0");
+  a0_ = b.createVariable(spv::StorageClass::StorageClassFunction, int_type_,
+                         "a0");
 
   // Uniform constants.
   Id float_consts_type =
       b.makeArrayType(vec4_float_type_, b.makeUintConstant(512), 1);
-  Id loop_consts_type =
-      b.makeArrayType(b.makeUintType(32), b.makeUintConstant(32), 1);
-  Id bool_consts_type =
-      b.makeArrayType(b.makeUintType(32), b.makeUintConstant(8), 1);
+  Id loop_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(32), 1);
+  Id bool_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(8), 1);
 
   Id consts_struct_type = b.makeStructType(
       {float_consts_type, loop_consts_type, bool_consts_type}, "consts_type");
@@ -136,7 +143,7 @@ void SpirvShaderTranslator::StartTranslation() {
 
   // Push constants, represented by SpirvPushConstants.
   Id push_constants_type = b.makeStructType(
-      {vec4_float_type_, vec4_float_type_, vec4_float_type_, uint_type},
+      {vec4_float_type_, vec4_float_type_, vec4_float_type_, uint_type_},
       "push_consts_type");
   b.addDecoration(push_constants_type, spv::Decoration::DecorationBlock);
 
@@ -164,48 +171,35 @@ void SpirvShaderTranslator::StartTranslation() {
                                   push_constants_type, "push_consts");
 
   // Texture bindings
-  Id img_t[] = {
-      b.makeImageType(float_type_, spv::Dim::Dim1D, false, false, false, 1,
-                      spv::ImageFormat::ImageFormatUnknown),
-      b.makeImageType(float_type_, spv::Dim::Dim2D, false, false, false, 1,
-                      spv::ImageFormat::ImageFormatUnknown),
-      b.makeImageType(float_type_, spv::Dim::Dim3D, false, false, false, 1,
-                      spv::ImageFormat::ImageFormatUnknown),
-      b.makeImageType(float_type_, spv::Dim::DimCube, false, false, false, 1,
-                      spv::ImageFormat::ImageFormatUnknown)};
-  Id samplers_t = b.makeSamplerType();
+  Id tex_t[] = {b.makeSampledImageType(b.makeImageType(
+                    float_type_, spv::Dim::Dim1D, false, false, false, 1,
+                    spv::ImageFormat::ImageFormatUnknown)),
+                b.makeSampledImageType(b.makeImageType(
+                    float_type_, spv::Dim::Dim2D, false, false, false, 1,
+                    spv::ImageFormat::ImageFormatUnknown)),
+                b.makeSampledImageType(b.makeImageType(
+                    float_type_, spv::Dim::Dim3D, false, false, false, 1,
+                    spv::ImageFormat::ImageFormatUnknown)),
+                b.makeSampledImageType(b.makeImageType(
+                    float_type_, spv::Dim::DimCube, false, false, false, 1,
+                    spv::ImageFormat::ImageFormatUnknown))};
 
-  Id img_a_t[] = {b.makeArrayType(img_t[0], b.makeUintConstant(32), 0),
-                  b.makeArrayType(img_t[1], b.makeUintConstant(32), 0),
-                  b.makeArrayType(img_t[2], b.makeUintConstant(32), 0),
-                  b.makeArrayType(img_t[3], b.makeUintConstant(32), 0)};
-  Id samplers_a = b.makeArrayType(samplers_t, b.makeUintConstant(32), 0);
-
-  Id img_s[] = {
-      b.makeStructType({img_a_t[0]}, "img1D_type"),
-      b.makeStructType({img_a_t[1]}, "img2D_type"),
-      b.makeStructType({img_a_t[2]}, "img3D_type"),
-      b.makeStructType({img_a_t[3]}, "imgCube_type"),
-  };
-  Id samplers_s = b.makeStructType({samplers_a}, "samplers_type");
+  Id tex_a_t[] = {b.makeArrayType(tex_t[0], b.makeUintConstant(32), 0),
+                  b.makeArrayType(tex_t[1], b.makeUintConstant(32), 0),
+                  b.makeArrayType(tex_t[2], b.makeUintConstant(32), 0),
+                  b.makeArrayType(tex_t[3], b.makeUintConstant(32), 0)};
 
   for (int i = 0; i < 4; i++) {
-    img_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant,
-                               img_s[i],
-                               xe::format_string("images%dD", i + 1).c_str());
-    b.addDecoration(img_[i], spv::Decoration::DecorationBlock);
-    b.addDecoration(img_[i], spv::Decoration::DecorationDescriptorSet, 1);
-    b.addDecoration(img_[i], spv::Decoration::DecorationBinding, i + 1);
+    tex_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant,
+                               tex_a_t[i],
+                               xe::format_string("textures%dD", i + 1).c_str());
+    b.addDecoration(tex_[i], spv::Decoration::DecorationDescriptorSet, 1);
+    b.addDecoration(tex_[i], spv::Decoration::DecorationBinding, i);
   }
-  samplers_ = b.createVariable(spv::StorageClass::StorageClassUniformConstant,
-                               samplers_s, "samplers");
-  b.addDecoration(samplers_, spv::Decoration::DecorationBlock);
-  b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 1);
-  b.addDecoration(samplers_, spv::Decoration::DecorationBinding, 0);
 
   // Interpolators.
-  Id interpolators_type =
-      b.makeArrayType(vec4_float_type_, b.makeUintConstant(16), 0);
+  Id interpolators_type = b.makeArrayType(
+      vec4_float_type_, b.makeUintConstant(kMaxInterpolators), 0);
   if (is_vertex_shader()) {
     // Vertex inputs/outputs.
     for (const auto& binding : vertex_bindings()) {
@@ -247,47 +241,132 @@ void SpirvShaderTranslator::StartTranslation() {
         b.addDecoration(attrib_var, spv::Decoration::DecorationLocation,
                         attrib.attrib_index);
 
-        vertex_binding_map_[binding.fetch_constant][attrib.fetch_instr
-                                                        .attributes.offset] =
-            attrib_var;
+        vertex_binding_map_[binding.fetch_constant]
+                           [attrib.fetch_instr.attributes.offset] = attrib_var;
       }
     }
 
     interpolators_ = b.createVariable(spv::StorageClass::StorageClassOutput,
                                       interpolators_type, "interpolators");
-    b.addDecoration(interpolators_, spv::Decoration::DecorationNoPerspective);
     b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0);
+    for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators);
+         i++) {
+      // Zero interpolators.
+      auto ptr = b.createAccessChain(spv::StorageClass::StorageClassOutput,
+                                     interpolators_,
+                                     std::vector<Id>({b.makeUintConstant(i)}));
+      b.createStore(vec4_float_zero_, ptr);
+    }
 
     pos_ = b.createVariable(spv::StorageClass::StorageClassOutput,
                             vec4_float_type_, "gl_Position");
     b.addDecoration(pos_, spv::Decoration::DecorationBuiltIn,
                     spv::BuiltIn::BuiltInPosition);
+
+    vertex_id_ = b.createVariable(spv::StorageClass::StorageClassInput,
+                                  int_type_, "gl_VertexId");
+    b.addDecoration(vertex_id_, spv::Decoration::DecorationBuiltIn,
+                    spv::BuiltIn::BuiltInVertexId);
+
+    auto vertex_id = b.createLoad(vertex_id_);
+    vertex_id = b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, vertex_id);
+    auto r0_ptr = b.createAccessChain(spv::StorageClass::StorageClassFunction,
+                                      registers_ptr_,
+                                      std::vector<Id>({b.makeUintConstant(0)}));
+    auto r0 = b.createLoad(r0_ptr);
+    r0 = b.createCompositeInsert(vertex_id, r0, vec4_float_type_,
+                                 std::vector<uint32_t>({0}));
+    b.createStore(r0, r0_ptr);
   } else {
     // Pixel inputs from vertex shader.
     interpolators_ = b.createVariable(spv::StorageClass::StorageClassInput,
                                       interpolators_type, "interpolators");
-    b.addDecoration(interpolators_, spv::Decoration::DecorationNoPerspective);
     b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0);
 
     // Pixel fragment outputs (one per render target).
     Id frag_outputs_type =
         b.makeArrayType(vec4_float_type_, b.makeUintConstant(4), 0);
     frag_outputs_ = b.createVariable(spv::StorageClass::StorageClassOutput,
-                                     frag_outputs_type, "o");
+                                     frag_outputs_type, "oC");
     b.addDecoration(frag_outputs_, spv::Decoration::DecorationLocation, 0);
 
+    frag_depth_ = b.createVariable(spv::StorageClass::StorageClassOutput,
+                                   float_type_, "gl_FragDepth");
+    b.addDecoration(frag_depth_, spv::Decoration::DecorationBuiltIn,
+                    spv::BuiltIn::BuiltInFragDepth);
+
     // TODO(benvanik): frag depth, etc.
 
     // Copy interpolators to r[0..16].
-    b.createNoResultOp(spv::Op::OpCopyMemorySized,
-                       {registers_ptr_, interpolators_,
-                        b.makeUintConstant(16 * 4 * sizeof(float))});
+    // TODO: Need physical addressing in order to do this.
+    // b.createNoResultOp(spv::Op::OpCopyMemorySized,
+    //                   {registers_ptr_, interpolators_,
+    //                    b.makeUintConstant(16 * 4 * sizeof(float))});
+    for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators);
+         i++) {
+      // For now, copy interpolators register-by-register :/
+      auto idx = b.makeUintConstant(i);
+      auto i_a = b.createAccessChain(spv::StorageClass::StorageClassInput,
+                                     interpolators_, std::vector<Id>({idx}));
+      auto r_a = b.createAccessChain(spv::StorageClass::StorageClassFunction,
+                                     registers_ptr_, std::vector<Id>({idx}));
+      b.createNoResultOp(spv::Op::OpCopyMemory, std::vector<Id>({r_a, i_a}));
+    }
+
+    // Setup ps_param_gen
+    auto ps_param_gen_idx_ptr = b.createAccessChain(
+        spv::StorageClass::StorageClassPushConstant, push_consts_,
+        std::vector<Id>({b.makeUintConstant(3)}));
+    auto ps_param_gen_idx = b.createLoad(ps_param_gen_idx_ptr);
+
+    auto frag_coord = b.createVariable(spv::StorageClass::StorageClassInput,
+                                       vec4_float_type_, "gl_FragCoord");
+    b.addDecoration(frag_coord, spv::Decoration::DecorationBuiltIn,
+                    spv::BuiltIn::BuiltInFragCoord);
+
+    auto point_coord = b.createVariable(spv::StorageClass::StorageClassInput,
+                                        vec2_float_type_, "gl_PointCoord");
+    b.addDecoration(point_coord, spv::Decoration::DecorationBuiltIn,
+                    spv::BuiltIn::BuiltInPointCoord);
+    auto param = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_,
+                            {frag_coord, point_coord, 0, 1, 4, 5});
+    /*
+    // TODO: gl_FrontFacing
+    auto param_x = b.createCompositeExtract(param, float_type_, 0);
+    auto param_x_inv = b.createBinOp(spv::Op::OpFMul, float_type_, param_x,
+                                     b.makeFloatConstant(-1.f));
+    param_x = b.createCompositeInsert(param_x_inv, param, vec4_float_type_, 0);
+    */
+
+    auto cond = b.createBinOp(spv::Op::OpINotEqual, bool_type_,
+                              ps_param_gen_idx, b.makeUintConstant(-1));
+    spv::Builder::If ifb(cond, b);
+
+    // FYI: We do this instead of r[ps_param_gen_idx] because that causes
+    // nvidia to move all registers into local memory (slow!)
+    for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators);
+         i++) {
+      auto reg_ptr = b.createAccessChain(
+          spv::StorageClass::StorageClassFunction, registers_ptr_,
+          std::vector<Id>({b.makeUintConstant(i)}));
+
+      auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, ps_param_gen_idx,
+                                b.makeUintConstant(i));
+      auto reg = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, cond, param,
+                               b.createLoad(reg_ptr));
+      b.createStore(reg, reg_ptr);
+    }
+
+    ifb.makeEndIf();
   }
 }
 
 std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
   auto& b = *builder_;
 
+  assert_false(open_predicated_block_);
+  auto block = &b.makeNewBlock();
+  b.createBranch(block);
   b.makeReturn(false);
 
   // main() entry point.
@@ -330,8 +409,7 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
     p_w = b.createTriOp(spv::Op::OpSelect, float_type_, c_w, p_w, p_w_inv);
 
     // pos.xyz = vtx_fmt.xyz != 0.0 ? pos.xyz / pos.w : pos.xyz
-    auto p_all_w = b.smearScalar(spv::Decoration::DecorationInvariant, p_w,
-                                 vec4_float_type_);
+    auto p_all_w = b.smearScalar(spv::NoPrecision, p_w, vec4_float_type_);
     auto p_inv = b.createBinOp(spv::Op::OpFDiv, vec4_float_type_, p, p_all_w);
     p = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, p_inv, p);
 
@@ -346,10 +424,66 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
                    {p, p_scaled, 4, 5, 2, 3});
 
     b.createStore(p, pos_);
+  } else {
+    // Alpha test
+    auto alpha_test_enabled = b.createCompositeExtract(
+        push_consts_, float_type_, std::vector<uint32_t>{2, 0});
+    auto alpha_test_func = b.createCompositeExtract(
+        push_consts_, float_type_, std::vector<uint32_t>{2, 1});
+    auto alpha_test_ref = b.createCompositeExtract(push_consts_, float_type_,
+                                                   std::vector<uint32_t>{2, 2});
+    alpha_test_func =
+        b.createUnaryOp(spv::Op::OpConvertFToU, uint_type_, alpha_test_func);
+    auto oC0_alpha = b.createCompositeExtract(frag_outputs_, float_type_,
+                                              std::vector<uint32_t>({0, 3}));
+
+    auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_,
+                              alpha_test_enabled, b.makeFloatConstant(1.f));
+    spv::Builder::If alpha_if(cond, b);
+
+    std::vector<spv::Block*> switch_segments;
+    b.makeSwitch(alpha_test_func, 8, std::vector<int>({0, 1, 2, 3, 4, 5, 6, 7}),
+                 std::vector<int>({0, 1, 2, 3, 4, 5, 6, 7}), 7,
+                 switch_segments);
+
+    const static spv::Op alpha_op_map[] = {
+        spv::Op::OpNop,
+        spv::Op::OpFOrdGreaterThanEqual,
+        spv::Op::OpFOrdNotEqual,
+        spv::Op::OpFOrdGreaterThan,
+        spv::Op::OpFOrdLessThanEqual,
+        spv::Op::OpFOrdEqual,
+        spv::Op::OpFOrdLessThan,
+        spv::Op::OpNop,
+    };
+
+    // if (alpha_func == 0) passes = false;
+    b.nextSwitchSegment(switch_segments, 0);
+    b.makeDiscard();
+    b.addSwitchBreak();
+
+    for (int i = 1; i < 7; i++) {
+      b.nextSwitchSegment(switch_segments, i);
+      auto cond =
+          b.createBinOp(alpha_op_map[i], bool_type_, oC0_alpha, alpha_test_ref);
+      spv::Builder::If discard_if(cond, b);
+      b.makeDiscard();
+      discard_if.makeEndIf();
+      b.addSwitchBreak();
+    }
+
+    // if (alpha_func == 7) passes = true;
+    b.nextSwitchSegment(switch_segments, 7);
+    b.endSwitch(switch_segments);
+
+    alpha_if.makeEndIf();
   }
 
   b.makeReturn(false);
 
+  // Compile the spv IR
+  compiler_.Compile(b.getModule());
+
   std::vector<uint32_t> spirv_words;
   b.dump(spirv_words);
 
@@ -365,28 +499,55 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
 }
 
 void SpirvShaderTranslator::PostTranslation(Shader* shader) {
+  // Validation.
+  // TODO(DrChat): Only do this if a flag is set (this is pretty slow).
+  auto validation = validator_.Validate(
+      reinterpret_cast<const uint32_t*>(shader->translated_binary().data()),
+      shader->translated_binary().size() / 4);
+  if (validation->has_error()) {
+    XELOGE("SPIR-V Shader Validation failed! Error: %s",
+           validation->error_string());
+  }
+
   // TODO(benvanik): only if needed? could be slowish.
   auto disasm = disassembler_.Disassemble(
       reinterpret_cast<const uint32_t*>(shader->translated_binary().data()),
       shader->translated_binary().size() / 4);
   if (disasm->has_error()) {
     XELOGE("Failed to disassemble SPIRV - invalid?");
-    return;
+  } else {
+    set_host_disassembly(shader, disasm->to_string());
   }
-  set_host_disassembly(shader, disasm->to_string());
 }
 
 void SpirvShaderTranslator::PreProcessControlFlowInstruction(
-    uint32_t cf_index) {
+    uint32_t cf_index, const ControlFlowInstruction& instr) {
   auto& b = *builder_;
 
-  cf_blocks_[cf_index] = &b.makeNewBlock();
+  if (cf_blocks_.find(cf_index) == cf_blocks_.end()) {
+    CFBlock block;
+    block.block = &b.makeNewBlock();
+    cf_blocks_[cf_index] = block;
+  } else {
+    cf_blocks_[cf_index].block = &b.makeNewBlock();
+  }
+
+  if (instr.opcode() == ControlFlowOpcode::kCondJmp) {
+    auto cf_block = cf_blocks_.find(instr.cond_jmp.address());
+    if (cf_block == cf_blocks_.end()) {
+      CFBlock block;
+      block.prev_dominates = false;
+      cf_blocks_[instr.cond_jmp.address()] = block;
+    } else {
+      cf_block->second.prev_dominates = false;
+    }
+  } else if (instr.opcode() == ControlFlowOpcode::kLoopStart) {
+    // TODO
+  }
 }
 
 void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) {
   auto& b = *builder_;
-
-  EmitUnimplementedTranslationError();
 }
 
 void SpirvShaderTranslator::ProcessControlFlowInstructionBegin(
@@ -395,7 +556,7 @@ void SpirvShaderTranslator::ProcessControlFlowInstructionBegin(
 
   if (cf_index == 0) {
     // Kind of cheaty, but emit a branch to the first block.
-    b.createBranch(cf_blocks_[cf_index]);
+    b.createBranch(cf_blocks_[cf_index].block);
   }
 }
 
@@ -407,15 +568,20 @@ void SpirvShaderTranslator::ProcessControlFlowInstructionEnd(
 void SpirvShaderTranslator::ProcessControlFlowNopInstruction() {
   auto& b = *builder_;
 
-  b.createNoResultOp(spv::Op::OpNop);
+  // b.createNoResultOp(spv::Op::OpNop);
 }
 
 void SpirvShaderTranslator::ProcessExecInstructionBegin(
     const ParsedExecInstruction& instr) {
   auto& b = *builder_;
 
+  assert_false(open_predicated_block_);
+  open_predicated_block_ = false;
+  predicated_block_cond_ = false;
+  predicated_block_end_ = nullptr;
+
   // Head has the logic to check if the body should execute.
-  auto head = cf_blocks_[instr.dword_index];
+  auto head = cf_blocks_[instr.dword_index].block;
   b.setBuildPoint(head);
   auto body = head;
   switch (instr.type) {
@@ -432,24 +598,46 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin(
       v = b.createLoad(v);
 
       // Bitfield extract the bool constant.
-      v = b.createTriOp(spv::Op::OpBitFieldUExtract, b.makeUintType(32), v,
+      // FIXME: NVidia's compiler seems to be broken on this instruction?
+      /*
+      v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v,
                         b.makeUintConstant(instr.bool_constant_index % 32),
                         b.makeUintConstant(1));
 
+      auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v,
+                                b.makeUintConstant(instr.condition ? 1 : 0));
+      */
+      v = b.createBinOp(
+          spv::Op::OpBitwiseAnd, uint_type_, v,
+          b.makeUintConstant(1 << (instr.bool_constant_index % 32)));
+      auto cond = b.createBinOp(
+          instr.condition ? spv::Op::OpINotEqual : spv::Op::OpIEqual,
+          bool_type_, v, b.makeUintConstant(0));
+
       // Conditional branch
       assert_true(cf_blocks_.size() > instr.dword_index + 1);
       body = &b.makeNewBlock();
-      auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, v,
-                                b.makeBoolConstant(instr.condition));
-      b.createConditionalBranch(cond, body, cf_blocks_[instr.dword_index + 1]);
+
+      auto next_block = cf_blocks_[instr.dword_index + 1];
+      if (next_block.prev_dominates) {
+        b.createSelectionMerge(next_block.block, spv::SelectionControlMaskNone);
+      }
+      b.createConditionalBranch(cond, body, next_block.block);
     } break;
     case ParsedExecInstruction::Type::kPredicated: {
       // Branch based on p0.
       assert_true(cf_blocks_.size() > instr.dword_index + 1);
       body = &b.makeNewBlock();
-      auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, p0_,
-                                b.makeBoolConstant(instr.condition));
-      b.createConditionalBranch(cond, body, cf_blocks_[instr.dword_index + 1]);
+      auto cond =
+          b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
+                        b.makeBoolConstant(instr.condition));
+
+      auto next_block = cf_blocks_[instr.dword_index + 1];
+      if (next_block.prev_dominates) {
+        b.createSelectionMerge(next_block.block, spv::SelectionControlMaskNone);
+      }
+      b.createConditionalBranch(cond, body, next_block.block);
+
     } break;
   }
   b.setBuildPoint(body);
@@ -459,11 +647,19 @@ void SpirvShaderTranslator::ProcessExecInstructionEnd(
     const ParsedExecInstruction& instr) {
   auto& b = *builder_;
 
+  if (open_predicated_block_) {
+    b.createBranch(predicated_block_end_);
+    b.setBuildPoint(predicated_block_end_);
+    open_predicated_block_ = false;
+    predicated_block_cond_ = false;
+    predicated_block_end_ = nullptr;
+  }
+
   if (instr.is_end) {
     b.makeReturn(false);
   } else {
     assert_true(cf_blocks_.size() > instr.dword_index + 1);
-    b.createBranch(cf_blocks_[instr.dword_index + 1]);
+    b.createBranch(cf_blocks_[instr.dword_index + 1].block);
   }
 }
 
@@ -471,7 +667,7 @@ void SpirvShaderTranslator::ProcessLoopStartInstruction(
     const ParsedLoopStartInstruction& instr) {
   auto& b = *builder_;
 
-  auto head = cf_blocks_[instr.dword_index];
+  auto head = cf_blocks_[instr.dword_index].block;
   b.setBuildPoint(head);
 
   // TODO: Emit a spv LoopMerge
@@ -480,46 +676,50 @@ void SpirvShaderTranslator::ProcessLoopStartInstruction(
   EmitUnimplementedTranslationError();
 
   assert_true(cf_blocks_.size() > instr.dword_index + 1);
-  b.createBranch(cf_blocks_[instr.dword_index + 1]);
+  b.createBranch(cf_blocks_[instr.dword_index + 1].block);
 }
 
 void SpirvShaderTranslator::ProcessLoopEndInstruction(
     const ParsedLoopEndInstruction& instr) {
   auto& b = *builder_;
 
-  auto head = cf_blocks_[instr.dword_index];
+  auto head = cf_blocks_[instr.dword_index].block;
   b.setBuildPoint(head);
 
   EmitUnimplementedTranslationError();
 
   assert_true(cf_blocks_.size() > instr.dword_index + 1);
-  b.createBranch(cf_blocks_[instr.dword_index + 1]);
+  b.createBranch(cf_blocks_[instr.dword_index + 1].block);
 }
 
 void SpirvShaderTranslator::ProcessCallInstruction(
     const ParsedCallInstruction& instr) {
   auto& b = *builder_;
 
-  auto head = cf_blocks_[instr.dword_index];
+  auto head = cf_blocks_[instr.dword_index].block;
   b.setBuildPoint(head);
 
+  // Unused instruction(?)
+  assert_always();
   EmitUnimplementedTranslationError();
 
   assert_true(cf_blocks_.size() > instr.dword_index + 1);
-  b.createBranch(cf_blocks_[instr.dword_index + 1]);
+  b.createBranch(cf_blocks_[instr.dword_index + 1].block);
 }
 
 void SpirvShaderTranslator::ProcessReturnInstruction(
     const ParsedReturnInstruction& instr) {
   auto& b = *builder_;
 
-  auto head = cf_blocks_[instr.dword_index];
+  auto head = cf_blocks_[instr.dword_index].block;
   b.setBuildPoint(head);
 
+  // Unused instruction(?)
+  assert_always();
   EmitUnimplementedTranslationError();
 
   assert_true(cf_blocks_.size() > instr.dword_index + 1);
-  b.createBranch(cf_blocks_[instr.dword_index + 1]);
+  b.createBranch(cf_blocks_[instr.dword_index + 1].block);
 }
 
 // CF jump
@@ -527,13 +727,15 @@ void SpirvShaderTranslator::ProcessJumpInstruction(
     const ParsedJumpInstruction& instr) {
   auto& b = *builder_;
 
-  auto head = cf_blocks_[instr.dword_index];
+  auto head = cf_blocks_[instr.dword_index].block;
   b.setBuildPoint(head);
   switch (instr.type) {
     case ParsedJumpInstruction::Type::kUnconditional: {
-      b.createBranch(cf_blocks_[instr.target_address]);
+      b.createBranch(cf_blocks_[instr.target_address].block);
     } break;
     case ParsedJumpInstruction::Type::kConditional: {
+      assert_true(cf_blocks_.size() > instr.dword_index + 1);
+
       // Based off of bool_consts
       std::vector<Id> offsets;
       offsets.push_back(b.makeUintConstant(2));  // bool_consts
@@ -542,23 +744,35 @@ void SpirvShaderTranslator::ProcessJumpInstruction(
                                    consts_, offsets);
       v = b.createLoad(v);
 
+      // FIXME: NVidia's compiler seems to be broken on this instruction?
+      /*
       // Bitfield extract the bool constant.
-      v = b.createTriOp(spv::Op::OpBitFieldUExtract, b.makeUintType(32), v,
+      v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v,
                         b.makeUintConstant(instr.bool_constant_index % 32),
                         b.makeUintConstant(1));
 
       // Conditional branch
-      auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, v,
-                                b.makeBoolConstant(instr.condition));
-      b.createConditionalBranch(cond, cf_blocks_[instr.target_address],
-                                cf_blocks_[instr.dword_index]);
+      auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v,
+                                b.makeUintConstant(instr.condition ? 1 : 0));
+      */
+      v = b.createBinOp(
+          spv::Op::OpBitwiseAnd, uint_type_, v,
+          b.makeUintConstant(1 << (instr.bool_constant_index % 32)));
+      auto cond = b.createBinOp(
+          instr.condition ? spv::Op::OpINotEqual : spv::Op::OpIEqual,
+          bool_type_, v, b.makeUintConstant(0));
+
+      b.createConditionalBranch(cond, cf_blocks_[instr.target_address].block,
+                                cf_blocks_[instr.dword_index + 1].block);
     } break;
     case ParsedJumpInstruction::Type::kPredicated: {
       assert_true(cf_blocks_.size() > instr.dword_index + 1);
-      auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, p0_,
-                                b.makeBoolConstant(instr.condition));
-      b.createConditionalBranch(cond, cf_blocks_[instr.target_address],
-                                cf_blocks_[instr.dword_index]);
+
+      auto cond =
+          b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
+                        b.makeBoolConstant(instr.condition));
+      b.createConditionalBranch(cond, cf_blocks_[instr.target_address].block,
+                                cf_blocks_[instr.dword_index + 1].block);
     } break;
   }
 }
@@ -567,7 +781,7 @@ void SpirvShaderTranslator::ProcessAllocInstruction(
     const ParsedAllocInstruction& instr) {
   auto& b = *builder_;
 
-  auto head = cf_blocks_[instr.dword_index];
+  auto head = cf_blocks_[instr.dword_index].block;
   b.setBuildPoint(head);
 
   switch (instr.type) {
@@ -585,24 +799,113 @@ void SpirvShaderTranslator::ProcessAllocInstruction(
   }
 
   assert_true(cf_blocks_.size() > instr.dword_index + 1);
-  b.createBranch(cf_blocks_[instr.dword_index + 1]);
+  b.createBranch(cf_blocks_[instr.dword_index + 1].block);
 }
 
 void SpirvShaderTranslator::ProcessVertexFetchInstruction(
     const ParsedVertexFetchInstruction& instr) {
   auto& b = *builder_;
+  assert_true(is_vertex_shader());
+  assert_not_zero(vertex_id_);
 
-  // TODO: instr.is_predicated
+  // Close the open predicated block if this instr isn't predicated or the
+  // conditions do not match.
+  if (open_predicated_block_ &&
+      (!instr.is_predicated ||
+       instr.predicate_condition != predicated_block_cond_)) {
+    b.createBranch(predicated_block_end_);
+    b.setBuildPoint(predicated_block_end_);
+    open_predicated_block_ = false;
+    predicated_block_cond_ = false;
+    predicated_block_end_ = nullptr;
+  }
+
+  if (!open_predicated_block_ && instr.is_predicated) {
+    Id pred_cond =
+        b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
+                      b.makeBoolConstant(instr.predicate_condition));
+    auto block = &b.makeNewBlock();
+    open_predicated_block_ = true;
+    predicated_block_cond_ = instr.predicate_condition;
+    predicated_block_end_ = &b.makeNewBlock();
+
+    b.createSelectionMerge(predicated_block_end_,
+                           spv::SelectionControlMaskNone);
+    b.createConditionalBranch(pred_cond, block, predicated_block_end_);
+    b.setBuildPoint(block);
+  }
 
   // Operand 0 is the index
   // Operand 1 is the binding
   // TODO: Indexed fetch
-  auto vertex_ptr =
-      vertex_binding_map_[instr.operands[1].storage_index][instr.attributes
-                                                               .offset];
-  assert_not_zero(vertex_ptr);
+  auto vertex_id = LoadFromOperand(instr.operands[0]);
+  vertex_id = b.createCompositeExtract(vertex_id, float_type_, 0);
+  vertex_id = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, vertex_id);
+  auto shader_vertex_id = b.createLoad(vertex_id_);
+  auto cond =
+      b.createBinOp(spv::Op::OpIEqual, bool_type_, vertex_id, shader_vertex_id);
+  cond = b.smearScalar(spv::NoPrecision, cond, vec4_bool_type_);
 
+  // Skip loading if it's an indexed fetch.
+  auto vertex_ptr = vertex_binding_map_[instr.operands[1].storage_index]
+                                       [instr.attributes.offset];
+  assert_not_zero(vertex_ptr);
   auto vertex = b.createLoad(vertex_ptr);
+
+  switch (instr.attributes.data_format) {
+    case VertexFormat::k_8_8_8_8:
+    case VertexFormat::k_16_16:
+    case VertexFormat::k_16_16_16_16:
+    case VertexFormat::k_16_16_16_16_FLOAT:
+    case VertexFormat::k_32:
+    case VertexFormat::k_32_32:
+    case VertexFormat::k_32_32_32_32:
+    case VertexFormat::k_32_FLOAT:
+    case VertexFormat::k_32_32_FLOAT:
+    case VertexFormat::k_32_32_32_FLOAT:
+    case VertexFormat::k_32_32_32_32_FLOAT:
+      // These are handled, for now.
+      break;
+
+    case VertexFormat::k_10_11_11: {
+      // No conversion needed. Natively supported.
+    } break;
+
+    case VertexFormat::k_11_11_10: {
+      // This needs to be converted.
+    } break;
+  }
+
+  auto vertex_components = b.getNumComponents(vertex);
+  Id alt_vertex = 0;
+  switch (vertex_components) {
+    case 1:
+      alt_vertex = b.makeFloatConstant(0.f);
+      break;
+    case 2:
+      alt_vertex = b.makeCompositeConstant(
+          vec2_float_type_, std::vector<Id>({b.makeFloatConstant(0.f),
+                                             b.makeFloatConstant(1.f)}));
+      break;
+    case 3:
+      alt_vertex = b.makeCompositeConstant(
+          vec3_float_type_,
+          std::vector<Id>({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f),
+                           b.makeFloatConstant(1.f)}));
+      break;
+    case 4:
+      alt_vertex = b.makeCompositeConstant(
+          vec4_float_type_,
+          std::vector<Id>({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f),
+                           b.makeFloatConstant(0.f),
+                           b.makeFloatConstant(1.f)}));
+      break;
+    default:
+      assert_unhandled_case(vertex_components);
+  }
+
+  vertex = b.createTriOp(spv::Op::OpSelect, b.getTypeId(vertex), cond, vertex,
+                         alt_vertex);
   StoreToResult(vertex, instr.result);
 }
 
@@ -610,7 +913,33 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
     const ParsedTextureFetchInstruction& instr) {
   auto& b = *builder_;
 
-  // TODO: instr.is_predicated
+  // Close the open predicated block if this instr isn't predicated or the
+  // conditions do not match.
+  if (open_predicated_block_ &&
+      (!instr.is_predicated ||
+       instr.predicate_condition != predicated_block_cond_)) {
+    b.createBranch(predicated_block_end_);
+    b.setBuildPoint(predicated_block_end_);
+    open_predicated_block_ = false;
+    predicated_block_cond_ = false;
+    predicated_block_end_ = nullptr;
+  }
+
+  if (!open_predicated_block_ && instr.is_predicated) {
+    Id pred_cond =
+        b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
+                      b.makeBoolConstant(instr.predicate_condition));
+    auto block = &b.makeNewBlock();
+    open_predicated_block_ = true;
+    predicated_block_cond_ = instr.predicate_condition;
+    predicated_block_end_ = &b.makeNewBlock();
+
+    b.createSelectionMerge(predicated_block_end_,
+                           spv::SelectionControlMaskNone);
+    b.createConditionalBranch(pred_cond, block, predicated_block_end_);
+    b.setBuildPoint(block);
+  }
+
   // Operand 0 is the offset
   // Operand 1 is the sampler index
   Id dest = 0;
@@ -619,23 +948,13 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
 
   uint32_t dim_idx = 0;
   switch (instr.dimension) {
-    case TextureDimension::k1D:
-      src = b.createCompositeExtract(src, float_type_, 0);
+    case TextureDimension::k1D: {
       dim_idx = 0;
-      break;
+    } break;
     case TextureDimension::k2D: {
-      auto s0 = b.createCompositeExtract(src, float_type_, 0);
-      auto s1 = b.createCompositeExtract(src, float_type_, 1);
-      src = b.createCompositeConstruct(vec2_float_type_,
-                                       std::vector<Id>({s0, s1}));
       dim_idx = 1;
     } break;
     case TextureDimension::k3D: {
-      auto s0 = b.createCompositeExtract(src, float_type_, 0);
-      auto s1 = b.createCompositeExtract(src, float_type_, 1);
-      auto s2 = b.createCompositeExtract(src, float_type_, 2);
-      src = b.createCompositeConstruct(vec3_float_type_,
-                                       std::vector<Id>({s0, s1, s2}));
       dim_idx = 2;
     } break;
     case TextureDimension::kCube: {
@@ -647,28 +966,21 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
 
   switch (instr.opcode) {
     case FetchOpcode::kTextureFetch: {
-      auto image_index = b.makeUintConstant(instr.operands[1].storage_index);
-      auto image_ptr = b.createAccessChain(
-          spv::StorageClass::StorageClassUniformConstant, img_[dim_idx],
-          std::vector<Id>({b.makeUintConstant(0), image_index}));
-      auto sampler_ptr = b.createAccessChain(
-          spv::StorageClass::StorageClassUniformConstant, samplers_,
-          std::vector<Id>({b.makeUintConstant(0), image_index}));
-      auto image = b.createLoad(image_ptr);
-      auto sampler = b.createLoad(sampler_ptr);
-
-      auto tex = b.createBinOp(spv::Op::OpSampledImage, b.getImageType(image),
-                               image, sampler);
+      auto texture_index = b.makeUintConstant(instr.operands[1].storage_index);
+      auto texture_ptr =
+          b.createAccessChain(spv::StorageClass::StorageClassUniformConstant,
+                              tex_[dim_idx], std::vector<Id>({texture_index}));
+      auto texture = b.createLoad(texture_ptr);
 
       spv::Builder::TextureParameters params = {0};
       params.coords = src;
-      params.sampler = sampler;
-      dest = b.createTextureCall(spv::Decoration::DecorationInvariant,
-                                 vec4_float_type_, false, false, false, false,
-                                 false, params);
+      params.sampler = texture;
+      dest = b.createTextureCall(spv::NoPrecision, vec4_float_type_, false,
+                                 false, false, false, false, params);
     } break;
     default:
       // TODO: the rest of these
+      assert_always();
       break;
   }
 
@@ -698,19 +1010,41 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
     const ParsedAluInstruction& instr) {
   auto& b = *builder_;
 
+  // TODO: If we have identical operands, reuse previous one.
   Id sources[3] = {0};
   Id dest = 0;
   for (size_t i = 0; i < instr.operand_count; i++) {
     sources[i] = LoadFromOperand(instr.operands[i]);
   }
 
-  Id pred_cond = 0;
-  if (instr.is_predicated) {
-    pred_cond =
-        b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
-                      b.makeBoolConstant(instr.predicate_condition));
+  // Close the open predicated block if this instr isn't predicated or the
+  // conditions do not match.
+  if (open_predicated_block_ &&
+      (!instr.is_predicated ||
+       instr.predicate_condition != predicated_block_cond_)) {
+    b.createBranch(predicated_block_end_);
+    b.setBuildPoint(predicated_block_end_);
+    open_predicated_block_ = false;
+    predicated_block_cond_ = false;
+    predicated_block_end_ = nullptr;
   }
 
+  if (!open_predicated_block_ && instr.is_predicated) {
+    Id pred_cond =
+        b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
+                      b.makeBoolConstant(instr.predicate_condition));
+    auto block = &b.makeNewBlock();
+    open_predicated_block_ = true;
+    predicated_block_cond_ = instr.predicate_condition;
+    predicated_block_end_ = &b.makeNewBlock();
+
+    b.createSelectionMerge(predicated_block_end_,
+                           spv::SelectionControlMaskNone);
+    b.createConditionalBranch(pred_cond, block, predicated_block_end_);
+    b.setBuildPoint(block);
+  }
+
+  bool close_predicated_block = false;
   switch (instr.vector_opcode) {
     case AluVectorOpcode::kAdd: {
       dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, sources[0],
@@ -746,23 +1080,52 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
     } break;
 
     case AluVectorOpcode::kDst: {
-      // TODO
+      auto src0_y = b.createCompositeExtract(sources[0], float_type_, 1);
+      auto src1_y = b.createCompositeExtract(sources[1], float_type_, 1);
+      auto dst_y = b.createBinOp(spv::Op::OpFMul, float_type_, src0_y, src1_y);
+
+      auto src0_z = b.createCompositeExtract(sources[0], float_type_, 3);
+      auto src1_w = b.createCompositeExtract(sources[0], float_type_, 4);
+      dest = b.createCompositeConstruct(
+          vec4_float_type_,
+          std::vector<Id>({b.makeFloatConstant(1.f), dst_y, src0_z, src1_w}));
+    } break;
+
+    case AluVectorOpcode::kDp2Add: {
+      auto src0_xy = b.createOp(spv::Op::OpVectorShuffle, vec2_float_type_,
+                                {sources[0], sources[0], 0, 1});
+      auto src1_xy = b.createOp(spv::Op::OpVectorShuffle, vec2_float_type_,
+                                {sources[1], sources[1], 0, 1});
+      auto src2_x = b.createCompositeExtract(sources[2], float_type_, 0);
+      dest = b.createBinOp(spv::Op::OpDot, float_type_, src0_xy, src1_xy);
+      dest = b.createBinOp(spv::Op::OpFAdd, float_type_, dest, src2_x);
+      dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_);
+    } break;
+
+    case AluVectorOpcode::kDp3: {
+      auto src0_xyz = b.createOp(spv::Op::OpVectorShuffle, vec3_float_type_,
+                                 {sources[0], sources[0], 0, 1, 2});
+      auto src1_xyz = b.createOp(spv::Op::OpVectorShuffle, vec3_float_type_,
+                                 {sources[1], sources[1], 0, 1, 2});
+      dest = b.createBinOp(spv::Op::OpDot, float_type_, src0_xyz, src1_xyz);
+      dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_);
     } break;
 
     case AluVectorOpcode::kDp4: {
       dest = b.createBinOp(spv::Op::OpDot, float_type_, sources[0], sources[1]);
+      dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_);
     } break;
 
     case AluVectorOpcode::kFloor: {
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, vec4_float_type_,
-          spv::GLSLstd450::kFloor, {sources[0]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_,
+                                             spv::GLSLstd450::kFloor,
+                                             {sources[0]});
     } break;
 
     case AluVectorOpcode::kFrc: {
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, vec4_float_type_,
-          spv::GLSLstd450::kFract, {sources[0]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_,
+                                             spv::GLSLstd450::kFract,
+                                             {sources[0]});
     } break;
 
     case AluVectorOpcode::kKillEq: {
@@ -771,10 +1134,6 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
       auto cond = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_,
                                 sources[0], sources[1]);
       cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond);
-      if (pred_cond) {
-        cond =
-            b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond);
-      }
       b.createConditionalBranch(cond, kill_block, continue_block);
 
       b.setBuildPoint(kill_block);
@@ -790,10 +1149,6 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
       auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual,
                                 vec4_bool_type_, sources[0], sources[1]);
       cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond);
-      if (pred_cond) {
-        cond =
-            b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond);
-      }
       b.createConditionalBranch(cond, kill_block, continue_block);
 
       b.setBuildPoint(kill_block);
@@ -809,10 +1164,6 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
       auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_,
                                 sources[0], sources[1]);
       cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond);
-      if (pred_cond) {
-        cond =
-            b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond);
-      }
       b.createConditionalBranch(cond, kill_block, continue_block);
 
       b.setBuildPoint(kill_block);
@@ -828,10 +1179,6 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
       auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_,
                                 sources[0], sources[1]);
       cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond);
-      if (pred_cond) {
-        cond =
-            b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond);
-      }
       b.createConditionalBranch(cond, kill_block, continue_block);
 
       b.setBuildPoint(kill_block);
@@ -848,6 +1195,23 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
     } break;
 
     case AluVectorOpcode::kMax4: {
+      auto src0_x = b.createCompositeExtract(sources[0], float_type_, 0);
+      auto src0_y = b.createCompositeExtract(sources[0], float_type_, 1);
+      auto src0_z = b.createCompositeExtract(sources[0], float_type_, 2);
+      auto src0_w = b.createCompositeExtract(sources[0], float_type_, 3);
+
+      auto max_xy = CreateGlslStd450InstructionCall(
+          spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax,
+          {src0_x, src0_y});
+      auto max_zw = CreateGlslStd450InstructionCall(
+          spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax,
+          {src0_z, src0_w});
+      auto max_xyzw = CreateGlslStd450InstructionCall(
+          spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax,
+          {max_xy, max_zw});
+
+      // FIXME: Docs say this only updates pv.x?
+      dest = b.smearScalar(spv::NoPrecision, max_xyzw, vec4_float_type_);
     } break;
 
     case AluVectorOpcode::kMaxA: {
@@ -857,27 +1221,38 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
                            b.makeFloatConstant(0.5f));
       addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, addr);
       addr = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, int_type_,
-          spv::GLSLstd450::kSClamp,
+          spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp,
           {addr, b.makeIntConstant(-256), b.makeIntConstant(255)});
       b.createStore(addr, a0_);
 
       // dest = src0 >= src1 ? src0 : src1
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, vec4_float_type_,
-          spv::GLSLstd450::kFMax, {sources[0], sources[1]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_,
+                                             spv::GLSLstd450::kFMax,
+                                             {sources[0], sources[1]});
     } break;
 
     case AluVectorOpcode::kMax: {
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, vec4_float_type_,
-          spv::GLSLstd450::kFMax, {sources[0], sources[1]});
+      if (sources[0] == sources[1]) {
+        // mov dst, src
+        dest = sources[0];
+        break;
+      }
+
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_,
+                                             spv::GLSLstd450::kFMax,
+                                             {sources[0], sources[1]});
     } break;
 
     case AluVectorOpcode::kMin: {
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, vec4_float_type_,
-          spv::GLSLstd450::kFMin, {sources[0], sources[1]});
+      if (sources[0] == sources[1]) {
+        // mov dst, src
+        dest = sources[0];
+        break;
+      }
+
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_,
+                                             spv::GLSLstd450::kFMin,
+                                             {sources[0], sources[1]});
     } break;
 
     case AluVectorOpcode::kMul: {
@@ -893,17 +1268,18 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
       auto c_and =
           b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1);
       auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0);
+      c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_);
       auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3);
 
       // p0
       b.createStore(c_and_w, p0_);
+      close_predicated_block = true;
 
       // dest
       auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0);
       s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x,
                            b.makeFloatConstant(1.f));
-      auto s0 = b.smearScalar(spv::Decoration::DecorationInvariant, s0_x,
-                              vec4_float_type_);
+      auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_);
 
       dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x,
                            vec4_float_zero_, s0);
@@ -917,17 +1293,18 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
       auto c_and =
           b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1);
       auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0);
+      c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_);
       auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3);
 
       // p0
       b.createStore(c_and_w, p0_);
+      close_predicated_block = true;
 
       // dest
       auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0);
       s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x,
                            b.makeFloatConstant(1.f));
-      auto s0 = b.smearScalar(spv::Decoration::DecorationInvariant, s0_x,
-                              vec4_float_type_);
+      auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_);
 
       dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x,
                            vec4_float_zero_, s0);
@@ -941,17 +1318,18 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
       auto c_and =
           b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1);
       auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0);
+      c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_);
       auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3);
 
       // p0
       b.createStore(c_and_w, p0_);
+      close_predicated_block = true;
 
       // dest
       auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0);
       s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x,
                            b.makeFloatConstant(1.f));
-      auto s0 = b.smearScalar(spv::Decoration::DecorationInvariant, s0_x,
-                              vec4_float_type_);
+      auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_);
 
       dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x,
                            vec4_float_zero_, s0);
@@ -965,17 +1343,18 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
       auto c_and =
           b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1);
       auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0);
+      c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_);
       auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3);
 
       // p0
       b.createStore(c_and_w, p0_);
+      close_predicated_block = true;
 
       // dest
       auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0);
       s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x,
                            b.makeFloatConstant(1.f));
-      auto s0 = b.smearScalar(spv::Decoration::DecorationInvariant, s0_x,
-                              vec4_float_type_);
+      auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_);
 
       dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x,
                            vec4_float_zero_, s0);
@@ -1014,25 +1393,27 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
     } break;
 
     case AluVectorOpcode::kTrunc: {
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, vec4_float_type_,
-          GLSLstd450::kTrunc, {sources[0]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_,
+                                             GLSLstd450::kTrunc, {sources[0]});
     } break;
 
     default:
+      assert_unhandled_case(instr.vector_opcode);
       break;
   }
 
+  assert_not_zero(dest);
   if (dest) {
-    // If predicated, discard the result from the instruction.
-    Id pv_dest = dest;
-    if (instr.is_predicated) {
-      pv_dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, pred_cond,
-                              dest, b.createLoad(pv_));
-    }
+    b.createStore(dest, pv_);
+    StoreToResult(dest, instr.result);
+  }
 
-    b.createStore(pv_dest, pv_);
-    StoreToResult(dest, instr.result, pred_cond);
+  if (close_predicated_block && open_predicated_block_) {
+    b.createBranch(predicated_block_end_);
+    b.setBuildPoint(predicated_block_end_);
+    open_predicated_block_ = false;
+    predicated_block_cond_ = false;
+    predicated_block_end_ = nullptr;
   }
 }
 
@@ -1040,6 +1421,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
     const ParsedAluInstruction& instr) {
   auto& b = *builder_;
 
+  // TODO: If we have identical operands, reuse previous one.
   Id sources[3] = {0};
   Id dest = 0;
   for (size_t i = 0, x = 0; i < instr.operand_count; i++) {
@@ -1075,13 +1457,34 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
     }
   }
 
-  Id pred_cond = 0;
-  if (instr.is_predicated) {
-    pred_cond =
-        b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
-                      b.makeBoolConstant(instr.predicate_condition));
+  // Close the open predicated block if this instr isn't predicated or the
+  // conditions do not match.
+  if (open_predicated_block_ &&
+      (!instr.is_predicated ||
+       instr.predicate_condition != predicated_block_cond_)) {
+    b.createBranch(predicated_block_end_);
+    b.setBuildPoint(predicated_block_end_);
+    open_predicated_block_ = false;
+    predicated_block_cond_ = false;
+    predicated_block_end_ = nullptr;
   }
 
+  if (!open_predicated_block_ && instr.is_predicated) {
+    Id pred_cond =
+        b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
+                      b.makeBoolConstant(instr.predicate_condition));
+    auto block = &b.makeNewBlock();
+    open_predicated_block_ = true;
+    predicated_block_cond_ = instr.predicate_condition;
+    predicated_block_end_ = &b.makeNewBlock();
+
+    b.createSelectionMerge(predicated_block_end_,
+                           spv::SelectionControlMaskNone);
+    b.createConditionalBranch(pred_cond, block, predicated_block_end_);
+    b.setBuildPoint(block);
+  }
+
+  bool close_predicated_block = false;
   switch (instr.scalar_opcode) {
     case AluScalarOpcode::kAdds:
     case AluScalarOpcode::kAddsc0:
@@ -1093,32 +1496,29 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
 
     case AluScalarOpcode::kAddsPrev: {
       // dest = src0 + ps
-      dest = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], ps_);
+      dest = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0],
+                           b.createLoad(ps_));
     } break;
 
     case AluScalarOpcode::kCos: {
       // dest = cos(src0)
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kCos,
-          {sources[0]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             GLSLstd450::kCos, {sources[0]});
     } break;
 
     case AluScalarOpcode::kExp: {
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kExp2,
-          {sources[0]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             GLSLstd450::kExp2, {sources[0]});
     } break;
 
     case AluScalarOpcode::kFloors: {
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFloor,
-          {sources[0]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             GLSLstd450::kFloor, {sources[0]});
     } break;
 
     case AluScalarOpcode::kFrcs: {
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFract,
-          {sources[0]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             GLSLstd450::kFract, {sources[0]});
     } break;
 
     case AluScalarOpcode::kKillsEq: {
@@ -1126,7 +1526,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
       auto kill_block = &b.makeNewBlock();
       auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
                                 b.makeFloatConstant(0.f));
-      cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond);
       b.createConditionalBranch(cond, kill_block, continue_block);
 
       b.setBuildPoint(kill_block);
@@ -1141,10 +1540,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
       auto kill_block = &b.makeNewBlock();
       auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_,
                                 sources[0], b.makeFloatConstant(0.f));
-      if (pred_cond) {
-        cond =
-            b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond);
-      }
       b.createConditionalBranch(cond, kill_block, continue_block);
 
       b.setBuildPoint(kill_block);
@@ -1159,10 +1554,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
       auto kill_block = &b.makeNewBlock();
       auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_,
                                 sources[0], b.makeFloatConstant(0.f));
-      if (pred_cond) {
-        cond =
-            b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond);
-      }
       b.createConditionalBranch(cond, kill_block, continue_block);
 
       b.setBuildPoint(kill_block);
@@ -1177,10 +1568,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
       auto kill_block = &b.makeNewBlock();
       auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0],
                                 b.makeFloatConstant(0.f));
-      if (pred_cond) {
-        cond =
-            b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond);
-      }
       b.createConditionalBranch(cond, kill_block, continue_block);
 
       b.setBuildPoint(kill_block);
@@ -1195,10 +1582,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
       auto kill_block = &b.makeNewBlock();
       auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
                                 b.makeFloatConstant(1.f));
-      if (pred_cond) {
-        cond =
-            b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond);
-      }
       b.createConditionalBranch(cond, kill_block, continue_block);
 
       b.setBuildPoint(kill_block);
@@ -1209,27 +1592,32 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
     } break;
 
     case AluScalarOpcode::kLogc: {
+      auto t = CreateGlslStd450InstructionCall(
+          spv::NoPrecision, float_type_, spv::GLSLstd450::kLog2, {sources[0]});
+
+      // FIXME: We don't check to see if t == -INF, we just check for INF
+      auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, t);
+      dest = b.createTriOp(spv::Op::OpSelect, float_type_, c,
+                           b.makeFloatConstant(-FLT_MAX), t);
     } break;
 
     case AluScalarOpcode::kLog: {
-      auto log = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, float_type_,
-          spv::GLSLstd450::kLog2, {sources[0]});
+      dest = CreateGlslStd450InstructionCall(
+          spv::NoPrecision, float_type_, spv::GLSLstd450::kLog2, {sources[0]});
     } break;
 
     case AluScalarOpcode::kMaxAsf: {
       auto addr =
           b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, sources[0]);
       addr = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, int_type_,
-          spv::GLSLstd450::kSClamp,
+          spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp,
           {addr, b.makeIntConstant(-256), b.makeIntConstant(255)});
       b.createStore(addr, a0_);
 
       // dest = src0 >= src1 ? src0 : src1
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, float_type_,
-          spv::GLSLstd450::kFMax, {sources[0], sources[1]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             spv::GLSLstd450::kFMax,
+                                             {sources[0], sources[1]});
     } break;
 
     case AluScalarOpcode::kMaxAs: {
@@ -1238,29 +1626,28 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
                                 b.makeFloatConstant(0.5f));
       addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, addr);
       addr = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, int_type_,
-          spv::GLSLstd450::kSClamp,
+          spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp,
           {addr, b.makeIntConstant(-256), b.makeIntConstant(255)});
       b.createStore(addr, a0_);
 
       // dest = src0 >= src1 ? src0 : src1
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, float_type_,
-          spv::GLSLstd450::kFMax, {sources[0], sources[1]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             spv::GLSLstd450::kFMax,
+                                             {sources[0], sources[1]});
     } break;
 
     case AluScalarOpcode::kMaxs: {
       // dest = max(src0, src1)
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFMax,
-          {sources[0], sources[1]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             GLSLstd450::kFMax,
+                                             {sources[0], sources[1]});
     } break;
 
     case AluScalarOpcode::kMins: {
       // dest = min(src0, src1)
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFMin,
-          {sources[0], sources[1]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             GLSLstd450::kFMin,
+                                             {sources[0], sources[1]});
     } break;
 
     case AluScalarOpcode::kMuls:
@@ -1273,7 +1660,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
 
     case AluScalarOpcode::kMulsPrev: {
       // dest = src0 * ps
-      dest = b.createBinOp(spv::Op::OpFMul, float_type_, sources[0], ps_);
+      dest = b.createBinOp(spv::Op::OpFMul, float_type_, sources[0],
+                           b.createLoad(ps_));
     } break;
 
     case AluScalarOpcode::kMulsPrev2: {
@@ -1281,28 +1669,57 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
     } break;
 
     case AluScalarOpcode::kRcpc: {
-      // TODO: dest = src0 != 0.0 ? 1.0 / src0 : FLT_MAX;
+      dest = b.createBinOp(spv::Op::OpFDiv, float_type_,
+                           b.makeFloatConstant(1.f), sources[0]);
+      dest = CreateGlslStd450InstructionCall(
+          spv::NoPrecision, float_type_, spv::GLSLstd450::kFClamp,
+          {dest, b.makeFloatConstant(-FLT_MAX), b.makeFloatConstant(FLT_MAX)});
     } break;
 
-    case AluScalarOpcode::kRcp:
     case AluScalarOpcode::kRcpf: {
+      dest = b.createBinOp(spv::Op::OpFDiv, float_type_,
+                           b.makeFloatConstant(1.f), sources[0]);
+      auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, dest);
+      dest = b.createTriOp(spv::Op::OpSelect, float_type_, c,
+                           b.makeFloatConstant(0.f), dest);
+    } break;
+
+    case AluScalarOpcode::kRcp: {
       // dest = src0 != 0.0 ? 1.0 / src0 : 0.0;
       auto c = b.createBinOp(spv::Op::OpFOrdEqual, float_type_, sources[0],
                              b.makeFloatConstant(0.f));
       auto d = b.createBinOp(spv::Op::OpFDiv, float_type_,
                              b.makeFloatConstant(1.f), sources[0]);
-      dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c,
+      dest = b.createTriOp(spv::Op::OpSelect, float_type_, c,
                            b.makeFloatConstant(0.f), d);
     } break;
 
+    case AluScalarOpcode::kRsqc: {
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             spv::GLSLstd450::kInverseSqrt,
+                                             {sources[0]});
+      dest = CreateGlslStd450InstructionCall(
+          spv::NoPrecision, float_type_, spv::GLSLstd450::kFClamp,
+          {dest, b.makeFloatConstant(-FLT_MAX), b.makeFloatConstant(FLT_MAX)});
+    } break;
+
+    case AluScalarOpcode::kRsqf: {
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             spv::GLSLstd450::kInverseSqrt,
+                                             {sources[0]});
+      auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, dest);
+      dest = b.createTriOp(spv::Op::OpSelect, float_type_, c,
+                           b.makeFloatConstant(0.f), dest);
+    } break;
+
     case AluScalarOpcode::kRsq: {
       // dest = src0 != 0.0 ? inversesqrt(src0) : 0.0;
       auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
                              b.makeFloatConstant(0.f));
-      auto d = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, vec4_float_type_,
-          spv::GLSLstd450::kInverseSqrt, {sources[0]});
-      dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c,
+      auto d = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                               spv::GLSLstd450::kInverseSqrt,
+                                               {sources[0]});
+      dest = b.createTriOp(spv::Op::OpSelect, float_type_, c,
                            b.makeFloatConstant(0.f), d);
     } break;
 
@@ -1340,6 +1757,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
 
     case AluScalarOpcode::kSetpClr: {
       b.createStore(b.makeBoolConstant(false), p0_);
+      close_predicated_block = true;
       dest = b.makeFloatConstant(FLT_MAX);
     } break;
 
@@ -1348,6 +1766,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
                                 b.makeFloatConstant(0.f));
       // p0 = cond
       b.createStore(cond, p0_);
+      close_predicated_block = true;
 
       // dest = cond ? 0.f : 1.f;
       dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
@@ -1359,6 +1778,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
                                 sources[0], b.makeFloatConstant(0.f));
       // p0 = cond
       b.createStore(cond, p0_);
+      close_predicated_block = true;
 
       // dest = cond ? 0.f : 1.f;
       dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
@@ -1370,6 +1790,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
                                 sources[0], b.makeFloatConstant(0.f));
       // p0 = cond
       b.createStore(cond, p0_);
+      close_predicated_block = true;
 
       // dest = cond ? 0.f : 1.f;
       dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
@@ -1377,12 +1798,11 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
     } break;
 
     case AluScalarOpcode::kSetpInv: {
+      // p0 = src0 == 1.0
       auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
                                 b.makeFloatConstant(1.f));
-      auto pred =
-          b.createTriOp(spv::Op::OpSelect, bool_type_, cond,
-                        b.makeBoolConstant(true), b.makeBoolConstant(false));
-      b.createStore(pred, p0_);
+      b.createStore(cond, p0_);
+      close_predicated_block = true;
 
       // if (!cond) dest = src0 == 0.0 ? 1.0 : src0;
       auto dst_cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_,
@@ -1399,6 +1819,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
 
       // p0 = cond
       b.createStore(cond, p0_);
+      close_predicated_block = true;
 
       // dest = cond ? 0.f : 1.f;
       dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
@@ -1411,9 +1832,10 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
       auto c = b.createBinOp(spv::Op::OpFOrdLessThanEqual, bool_type_, src,
                              b.makeFloatConstant(0.f));
       b.createStore(c, p0_);
+      close_predicated_block = true;
 
       dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFMax,
+          spv::NoPrecision, float_type_, GLSLstd450::kFMax,
           {sources[0], b.makeFloatConstant(0.f)});
     } break;
 
@@ -1421,13 +1843,18 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
       auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
                              b.makeFloatConstant(0.f));
       b.createStore(c, p0_);
+      close_predicated_block = true;
       dest = sources[0];
     } break;
 
     case AluScalarOpcode::kSin: {
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kSin,
-          {sources[0]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             GLSLstd450::kSin, {sources[0]});
+    } break;
+
+    case AluScalarOpcode::kSqrt: {
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             GLSLstd450::kSqrt, {sources[0]});
     } break;
 
     case AluScalarOpcode::kSubs:
@@ -1438,29 +1865,32 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
     } break;
 
     case AluScalarOpcode::kSubsPrev: {
-      dest = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], ps_);
+      dest = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0],
+                           b.createLoad(ps_));
     } break;
 
     case AluScalarOpcode::kTruncs: {
-      dest = CreateGlslStd450InstructionCall(
-          spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kTrunc,
-          {sources[0]});
+      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
+                                             GLSLstd450::kTrunc, {sources[0]});
     } break;
 
     default:
+      assert_unhandled_case(instr.scalar_opcode);
       break;
   }
 
+  assert_not_zero(dest);
   if (dest) {
-    // If predicated, discard the result from the instruction.
-    Id ps_dest = dest;
-    if (instr.is_predicated) {
-      ps_dest = b.createTriOp(spv::Op::OpSelect, float_type_, pred_cond, dest,
-                              b.createLoad(ps_));
-    }
+    b.createStore(dest, ps_);
+    StoreToResult(dest, instr.result);
+  }
 
-    b.createStore(ps_dest, ps_);
-    StoreToResult(dest, instr.result, pred_cond);
+  if (close_predicated_block && open_predicated_block_) {
+    b.createBranch(predicated_block_end_);
+    b.setBuildPoint(predicated_block_end_);
+    open_predicated_block_ = false;
+    predicated_block_cond_ = false;
+    predicated_block_end_ = nullptr;
   }
 }
 
@@ -1494,15 +1924,15 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) {
     case InstructionStorageAddressingMode::kAddressAbsolute: {
       // storage_index + a0
       storage_index =
-          b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), b.createLoad(a0_),
+          b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_),
                         b.makeUintConstant(storage_base + op.storage_index));
     } break;
     case InstructionStorageAddressingMode::kAddressRelative: {
       // TODO: Based on loop index
       // storage_index + aL.x
-      storage_index = b.createBinOp(
-          spv::Op::OpIAdd, b.makeUintType(32), b.makeUintConstant(0),
-          b.makeUintConstant(storage_base + op.storage_index));
+      storage_index =
+          b.createBinOp(spv::Op::OpIAdd, uint_type_, b.makeUintConstant(0),
+                        b.makeUintConstant(storage_base + op.storage_index));
     } break;
     default:
       assert_always();
@@ -1544,8 +1974,7 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) {
 
   if (op.is_absolute_value) {
     storage_value = CreateGlslStd450InstructionCall(
-        spv::Decoration::DecorationInvariant, storage_type, GLSLstd450::kFAbs,
-        {storage_value});
+        spv::NoPrecision, storage_type, GLSLstd450::kFAbs, {storage_value});
   }
   if (op.is_negated) {
     storage_value =
@@ -1598,8 +2027,7 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) {
 }
 
 void SpirvShaderTranslator::StoreToResult(Id source_value_id,
-                                          const InstructionResult& result,
-                                          Id predicate_cond) {
+                                          const InstructionResult& result) {
   auto& b = *builder_;
 
   if (result.storage_target == InstructionStorageTarget::kNone) {
@@ -1624,7 +2052,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
     case InstructionStorageAddressingMode::kAddressAbsolute: {
       // storage_index + a0
       storage_index =
-          b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), b.createLoad(a0_),
+          b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_),
                         b.makeUintConstant(result.storage_index));
     } break;
     case InstructionStorageAddressingMode::kAddressRelative: {
@@ -1677,7 +2105,11 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
       break;
     case InstructionStorageTarget::kDepth:
       assert_true(is_pixel_shader());
-      // TODO(benvanik): result.storage_index
+      storage_pointer = frag_depth_;
+      storage_class = spv::StorageClass::StorageClassOutput;
+      storage_type = float_type_;
+      storage_offsets.push_back(0);
+      storage_array = false;
       break;
     case InstructionStorageTarget::kNone:
       assert_unhandled_case(result.storage_target);
@@ -1696,10 +2128,18 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
 
   // Only load from storage if we need it later.
   Id storage_value = 0;
-  if (!result.has_all_writes() || predicate_cond) {
+  if (!result.has_all_writes()) {
     storage_value = b.createLoad(storage_pointer);
   }
 
+  // Clamp the input value.
+  if (result.is_clamped) {
+    source_value_id = CreateGlslStd450InstructionCall(
+        spv::NoPrecision, b.getTypeId(source_value_id),
+        spv::GLSLstd450::kFClamp,
+        {source_value_id, b.makeFloatConstant(0.0), b.makeFloatConstant(1.0)});
+  }
+
   // Convert to the appropriate type, if needed.
   if (b.getTypeId(source_value_id) != storage_type) {
     std::vector<Id> constituents;
@@ -1707,22 +2147,22 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
     auto n_dst = b.getNumTypeComponents(storage_type);
     assert_true(n_el < n_dst);
 
-    constituents.push_back(source_value_id);
-    for (int i = n_el; i < n_dst; i++) {
-      // Pad with zeroes.
-      constituents.push_back(b.makeFloatConstant(0.f));
+    if (n_el == 1) {
+      // Smear scalar.
+      for (int i = 0; i < n_dst; i++) {
+        constituents.push_back(source_value_id);
+      }
+    } else {
+      // FIXME: This may not work as intended.
+      constituents.push_back(source_value_id);
+      for (int i = n_el; i < n_dst; i++) {
+        // Pad with zeroes.
+        constituents.push_back(b.makeFloatConstant(0.f));
+      }
     }
 
-    source_value_id = b.createConstructor(spv::Decoration::DecorationInvariant,
-                                          constituents, storage_type);
-  }
-
-  // Clamp the input value.
-  if (result.is_clamped) {
-    source_value_id = CreateGlslStd450InstructionCall(
-        spv::Decoration::DecorationInvariant, b.getTypeId(source_value_id),
-        spv::GLSLstd450::kFClamp,
-        {source_value_id, b.makeFloatConstant(0.0), b.makeFloatConstant(1.0)});
+    source_value_id =
+        b.createConstructor(spv::NoPrecision, constituents, storage_type);
   }
 
   // swizzle
@@ -1788,13 +2228,8 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
   assert_true(b.getNumComponents(source_value_id) ==
               b.getNumTypeComponents(storage_type));
 
-  // Discard if predicate condition is false.
-  if (predicate_cond) {
-    source_value_id =
-        b.createTriOp(spv::Op::OpSelect, storage_type, predicate_cond,
-                      source_value_id, storage_value);
-  }
-
+  assert_true(b.getTypeId(source_value_id) ==
+              b.getDerefTypeId(storage_pointer));
   b.createStore(source_value_id, storage_pointer);
 }
 
diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h
index 0d8b1e14c..b6a761a24 100644
--- a/src/xenia/gpu/spirv_shader_translator.h
+++ b/src/xenia/gpu/spirv_shader_translator.h
@@ -2,7 +2,7 @@
  ******************************************************************************
  * Xenia : Xbox 360 Emulator Research Project                                 *
  ******************************************************************************
- * Copyright 2015 Ben Vanik. All rights reserved.                             *
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
  * Released under the BSD license - see LICENSE in the root for more details. *
  ******************************************************************************
  */
@@ -17,7 +17,9 @@
 #include "third_party/glslang-spirv/SpvBuilder.h"
 #include "third_party/spirv/GLSL.std.450.hpp11"
 #include "xenia/gpu/shader_translator.h"
+#include "xenia/gpu/spirv/compiler.h"
 #include "xenia/ui/spirv/spirv_disassembler.h"
+#include "xenia/ui/spirv/spirv_validator.h"
 
 namespace xe {
 namespace gpu {
@@ -54,7 +56,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
   std::vector<uint8_t> CompleteTranslation() override;
   void PostTranslation(Shader* shader) override;
 
-  void PreProcessControlFlowInstruction(uint32_t cf_index) override;
+  void PreProcessControlFlowInstruction(
+      uint32_t cf_index, const ucode::ControlFlowInstruction& instr) override;
   void ProcessLabel(uint32_t cf_index) override;
   void ProcessControlFlowInstructionBegin(uint32_t cf_index) override;
   void ProcessControlFlowInstructionEnd(uint32_t cf_index) override;
@@ -91,10 +94,16 @@ class SpirvShaderTranslator : public ShaderTranslator {
   // Stores a value based on the specified result information.
   // The value will be transformed into the appropriate form for the result and
   // the proper components will be selected.
-  void StoreToResult(spv::Id source_value_id, const InstructionResult& result,
-                     spv::Id predicate_cond = 0);
+  void StoreToResult(spv::Id source_value_id, const InstructionResult& result);
 
   xe::ui::spirv::SpirvDisassembler disassembler_;
+  xe::ui::spirv::SpirvValidator validator_;
+  xe::gpu::spirv::Compiler compiler_;
+
+  // True if there's an open predicated block
+  bool open_predicated_block_ = false;
+  bool predicated_block_cond_ = false;
+  spv::Block* predicated_block_end_ = nullptr;
 
   // TODO(benvanik): replace with something better, make reusable, etc.
   std::unique_ptr<spv::Builder> builder_;
@@ -104,11 +113,10 @@ class SpirvShaderTranslator : public ShaderTranslator {
   spv::Function* translated_main_ = 0;
 
   // Types.
-  spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0;
+  spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0, uint_type_ = 0;
   spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0;
   spv::Id vec4_uint_type_ = 0;
   spv::Id vec4_bool_type_ = 0;
-  spv::Id sampled_image_type_ = 0;
 
   // Constants.
   spv::Id vec4_float_zero_ = 0, vec4_float_one_ = 0;
@@ -121,13 +129,19 @@ class SpirvShaderTranslator : public ShaderTranslator {
   spv::Id pos_ = 0;
   spv::Id push_consts_ = 0;
   spv::Id interpolators_ = 0;
-  spv::Id frag_outputs_ = 0;
+  spv::Id vertex_id_ = 0;
+  spv::Id frag_outputs_ = 0, frag_depth_ = 0;
   spv::Id samplers_ = 0;
-  spv::Id img_[4] = {0};  // Images {1D, 2D, 3D, Cube}
+  spv::Id tex_[4] = {0};  // Images {1D, 2D, 3D, Cube}
 
   // Map of {binding -> {offset -> spv input}}
   std::map<uint32_t, std::map<uint32_t, spv::Id>> vertex_binding_map_;
-  std::map<uint32_t, spv::Block*> cf_blocks_;
+
+  struct CFBlock {
+    spv::Block* block = nullptr;
+    bool prev_dominates = true;
+  };
+  std::map<uint32_t, CFBlock> cf_blocks_;
 };
 
 }  // namespace gpu
diff --git a/src/xenia/gpu/texture_info.h b/src/xenia/gpu/texture_info.h
index 500f22bb3..0cb2ed2ba 100644
--- a/src/xenia/gpu/texture_info.h
+++ b/src/xenia/gpu/texture_info.h
@@ -88,6 +88,66 @@ enum class TextureFormat : uint32_t {
   kUnknown = 0xFFFFFFFFu,
 };
 
+inline size_t GetTexelSize(TextureFormat format) {
+  switch (format) {
+    case TextureFormat::k_1_5_5_5:
+      return 2;
+      break;
+    case TextureFormat::k_2_10_10_10:
+      return 4;
+      break;
+    case TextureFormat::k_4_4_4_4:
+      return 2;
+      break;
+    case TextureFormat::k_5_6_5:
+      return 2;
+      break;
+    case TextureFormat::k_8:
+      return 1;
+      break;
+    case TextureFormat::k_8_8:
+      return 2;
+      break;
+    case TextureFormat::k_8_8_8_8:
+      return 4;
+      break;
+    case TextureFormat::k_16:
+      return 4;
+      break;
+    case TextureFormat::k_16_FLOAT:
+      return 4;
+      break;
+    case TextureFormat::k_16_16:
+      return 4;
+      break;
+    case TextureFormat::k_16_16_FLOAT:
+      return 4;
+      break;
+    case TextureFormat::k_16_16_16_16:
+      return 8;
+      break;
+    case TextureFormat::k_16_16_16_16_FLOAT:
+      return 8;
+      break;
+    case TextureFormat::k_32_FLOAT:
+      return 4;
+      break;
+    case TextureFormat::k_32_32_FLOAT:
+      return 8;
+      break;
+    case TextureFormat::k_32_32_32_32_FLOAT:
+      return 16;
+      break;
+    case TextureFormat::k_10_11_11:
+    case TextureFormat::k_11_11_10:
+      return 4;
+      break;
+    default:
+      assert_unhandled_case(format);
+      return 0;
+  }
+}
+
 inline TextureFormat ColorFormatToTextureFormat(ColorFormat color_format) {
   return static_cast<TextureFormat>(color_format);
 }
diff --git a/src/xenia/gpu/trace_player.cc b/src/xenia/gpu/trace_player.cc
index 54c199736..b79b49df2 100644
--- a/src/xenia/gpu/trace_player.cc
+++ b/src/xenia/gpu/trace_player.cc
@@ -51,7 +51,7 @@ void TracePlayer::SeekFrame(int target_frame) {
 
   assert_true(frame->start_ptr <= frame->end_ptr);
   PlayTrace(frame->start_ptr, frame->end_ptr - frame->start_ptr,
-            TracePlaybackMode::kBreakOnSwap);
+            TracePlaybackMode::kBreakOnSwap, false);
 }
 
 void TracePlayer::SeekCommand(int target_command) {
@@ -71,11 +71,11 @@ void TracePlayer::SeekCommand(int target_command) {
     const auto& previous_command = frame->commands[previous_command_index];
     PlayTrace(previous_command.end_ptr,
               command.end_ptr - previous_command.end_ptr,
-              TracePlaybackMode::kBreakOnSwap);
+              TracePlaybackMode::kBreakOnSwap, false);
   } else {
     // Full playback from frame start.
     PlayTrace(frame->start_ptr, command.end_ptr - frame->start_ptr,
-              TracePlaybackMode::kBreakOnSwap);
+              TracePlaybackMode::kBreakOnSwap, true);
   }
 }
 
@@ -84,19 +84,25 @@ void TracePlayer::WaitOnPlayback() {
 }
 
 void TracePlayer::PlayTrace(const uint8_t* trace_data, size_t trace_size,
-                            TracePlaybackMode playback_mode) {
-  graphics_system_->command_processor()->CallInThread(
-      [this, trace_data, trace_size, playback_mode]() {
-        PlayTraceOnThread(trace_data, trace_size, playback_mode);
-      });
+                            TracePlaybackMode playback_mode,
+                            bool clear_caches) {
+  playing_trace_ = true;
+  graphics_system_->command_processor()->CallInThread([=]() {
+    PlayTraceOnThread(trace_data, trace_size, playback_mode, clear_caches);
+  });
 }
 
 void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data,
                                     size_t trace_size,
-                                    TracePlaybackMode playback_mode) {
+                                    TracePlaybackMode playback_mode,
+                                    bool clear_caches) {
   auto memory = graphics_system_->memory();
   auto command_processor = graphics_system_->command_processor();
 
+  if (clear_caches) {
+    command_processor->ClearCaches();
+  }
+
   command_processor->set_swap_mode(SwapMode::kIgnored);
   playback_percent_ = 0;
   auto trace_end = trace_data + trace_size;
diff --git a/src/xenia/gpu/trace_player.h b/src/xenia/gpu/trace_player.h
index d3926d460..0c3c6571a 100644
--- a/src/xenia/gpu/trace_player.h
+++ b/src/xenia/gpu/trace_player.h
@@ -50,9 +50,9 @@ class TracePlayer : public TraceReader {
 
  private:
   void PlayTrace(const uint8_t* trace_data, size_t trace_size,
-                 TracePlaybackMode playback_mode);
+                 TracePlaybackMode playback_mode, bool clear_caches);
   void PlayTraceOnThread(const uint8_t* trace_data, size_t trace_size,
-                         TracePlaybackMode playback_mode);
+                         TracePlaybackMode playback_mode, bool clear_caches);
 
   xe::ui::Loop* loop_;
   GraphicsSystem* graphics_system_;
diff --git a/src/xenia/gpu/trace_reader.cc b/src/xenia/gpu/trace_reader.cc
index fb58c436b..6bedfb9b4 100644
--- a/src/xenia/gpu/trace_reader.cc
+++ b/src/xenia/gpu/trace_reader.cc
@@ -75,6 +75,10 @@ void TraceReader::ParseTrace() {
   const uint8_t* packet_start_ptr = nullptr;
   const uint8_t* last_ptr = trace_ptr;
   bool pending_break = false;
+  auto current_command_buffer = new CommandBuffer();
+  current_frame.command_tree =
+      std::unique_ptr<CommandBuffer>(current_command_buffer);
+
   while (trace_ptr < trace_data_ + trace_size_) {
     ++current_frame.command_count;
     auto type = static_cast<TraceCommandType>(xe::load<uint32_t>(trace_ptr));
@@ -94,11 +98,29 @@ void TraceReader::ParseTrace() {
         auto cmd =
             reinterpret_cast<const IndirectBufferStartCommand*>(trace_ptr);
         trace_ptr += sizeof(*cmd) + cmd->count * 4;
+
+        // Traverse down a level.
+        auto sub_command_buffer = new CommandBuffer();
+        sub_command_buffer->parent = current_command_buffer;
+        current_command_buffer->commands.push_back(
+            CommandBuffer::Command(sub_command_buffer));
+        current_command_buffer = sub_command_buffer;
         break;
       }
       case TraceCommandType::kIndirectBufferEnd: {
         auto cmd = reinterpret_cast<const IndirectBufferEndCommand*>(trace_ptr);
         trace_ptr += sizeof(*cmd);
+
+        // IB packet is wrapped in a kPacketStart/kPacketEnd. Skip the end.
+        auto end_cmd = reinterpret_cast<const PacketEndCommand*>(trace_ptr);
+        assert_true(end_cmd->type == TraceCommandType::kPacketEnd);
+        trace_ptr += sizeof(*cmd);
+
+        // Go back up a level. If parent is null, this frame started in an
+        // indirect buffer.
+        if (current_command_buffer->parent) {
+          current_command_buffer = current_command_buffer->parent;
+        }
         break;
       }
       case TraceCommandType::kPacketStart: {
@@ -125,6 +147,8 @@ void TraceReader::ParseTrace() {
             command.end_ptr = trace_ptr;
             current_frame.commands.push_back(std::move(command));
             last_ptr = trace_ptr;
+            current_command_buffer->commands.push_back(CommandBuffer::Command(
+                uint32_t(current_frame.commands.size() - 1)));
             break;
           }
           case PacketCategory::kSwap:
@@ -136,6 +160,9 @@ void TraceReader::ParseTrace() {
         if (pending_break) {
           current_frame.end_ptr = trace_ptr;
           frames_.push_back(std::move(current_frame));
+          current_command_buffer = new CommandBuffer();
+          current_frame.command_tree =
+              std::unique_ptr<CommandBuffer>(current_command_buffer);
           current_frame.start_ptr = trace_ptr;
           current_frame.end_ptr = nullptr;
           current_frame.command_count = 0;
diff --git a/src/xenia/gpu/trace_reader.h b/src/xenia/gpu/trace_reader.h
index 5445bd1f9..b3245da46 100644
--- a/src/xenia/gpu/trace_reader.h
+++ b/src/xenia/gpu/trace_reader.h
@@ -11,6 +11,7 @@
 #define XENIA_GPU_TRACE_READER_H_
 
 #include <string>
+#include <vector>
 
 #include "xenia/base/mapped_memory.h"
 #include "xenia/gpu/trace_protocol.h"
@@ -51,6 +52,42 @@ namespace gpu {
 
 class TraceReader {
  public:
+  struct CommandBuffer {
+    struct Command {
+      enum class Type {
+        kCommand,
+        kBuffer,
+      };
+
+      Command() {}
+      Command(Command&& other) {
+        type = other.type;
+        command_id = other.command_id;
+        command_subtree = std::move(other.command_subtree);
+      }
+      Command(CommandBuffer* buf) {
+        type = Type::kBuffer;
+        command_subtree = std::unique_ptr<CommandBuffer>(buf);
+      }
+      Command(uint32_t id) {
+        type = Type::kCommand;
+        command_id = id;
+      }
+      ~Command() = default;
+
+      Type type;
+      uint32_t command_id = -1;
+      std::unique_ptr<CommandBuffer> command_subtree = nullptr;
+    };
+
+    CommandBuffer() {}
+    ~CommandBuffer() {}
+
+    // Parent command buffer, if one exists.
+    CommandBuffer* parent = nullptr;
+    std::vector<Command> commands;
+  };
+
   struct Frame {
     struct Command {
       enum class Type {
@@ -74,7 +111,12 @@ class TraceReader {
     const uint8_t* start_ptr = nullptr;
     const uint8_t* end_ptr = nullptr;
     int command_count = 0;
+
+    // Flat list of all commands in this frame.
     std::vector<Command> commands;
+
+    // Tree of all command buffers
+    std::unique_ptr<CommandBuffer> command_tree;
   };
 
   TraceReader() = default;
diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc
index 7ce20c7ca..8079631f5 100644
--- a/src/xenia/gpu/trace_viewer.cc
+++ b/src/xenia/gpu/trace_viewer.cc
@@ -390,6 +390,66 @@ void TraceViewer::DrawPacketDisassemblerUI() {
   ImGui::End();
 }
 
+int TraceViewer::RecursiveDrawCommandBufferUI(
+    const TraceReader::Frame* frame, TraceReader::CommandBuffer* buffer) {
+  int selected_id = -1;
+  int column_width = int(ImGui::GetContentRegionMax().x);
+
+  for (size_t i = 0; i < buffer->commands.size(); i++) {
+    switch (buffer->commands[i].type) {
+      case TraceReader::CommandBuffer::Command::Type::kBuffer: {
+        auto subtree = buffer->commands[i].command_subtree.get();
+        if (!subtree->commands.size()) {
+          continue;
+        }
+
+        ImGui::PushID(int(i));
+        if (ImGui::TreeNode((void*)0, "Indirect Buffer %d", i)) {
+          ImGui::Indent();
+          auto id = RecursiveDrawCommandBufferUI(
+              frame, buffer->commands[i].command_subtree.get());
+          ImGui::Unindent();
+          ImGui::TreePop();
+
+          if (id != -1) {
+            selected_id = id;
+          }
+        }
+        ImGui::PopID();
+      } break;
+
+      case TraceReader::CommandBuffer::Command::Type::kCommand: {
+        uint32_t command_id = buffer->commands[i].command_id;
+
+        const auto& command = frame->commands[command_id];
+        bool is_selected = command_id == player_->current_command_index();
+        const char* label;
+        switch (command.type) {
+          case TraceReader::Frame::Command::Type::kDraw:
+            label = "Draw";
+            break;
+          case TraceReader::Frame::Command::Type::kSwap:
+            label = "Swap";
+            break;
+        }
+
+        ImGui::PushID(command_id);
+        if (ImGui::Selectable(label, &is_selected)) {
+          selected_id = command_id;
+        }
+        ImGui::SameLine(column_width - 60.0f);
+        ImGui::Text("%d", command_id);
+        ImGui::PopID();
+        // if (did_seek && target_command == i) {
+        //   ImGui::SetScrollPosHere();
+        // }
+      } break;
+    }
+  }
+
+  return selected_id;
+}
+
 void TraceViewer::DrawCommandListUI() {
   ImGui::SetNextWindowPos(ImVec2(5, 70), ImGuiSetCond_FirstUseEver);
   if (!ImGui::Begin("Command List", nullptr, ImVec2(200, 640))) {
@@ -473,31 +533,12 @@ void TraceViewer::DrawCommandListUI() {
     ImGui::SetScrollPosHere();
   }
 
-  for (int i = 0; i < int(frame->commands.size()); ++i) {
-    ImGui::PushID(i);
-    is_selected = i == player_->current_command_index();
-    const auto& command = frame->commands[i];
-    const char* label;
-    switch (command.type) {
-      case TraceReader::Frame::Command::Type::kDraw:
-        label = "Draw";
-        break;
-      case TraceReader::Frame::Command::Type::kSwap:
-        label = "Swap";
-        break;
-    }
-    if (ImGui::Selectable(label, &is_selected)) {
-      if (!player_->is_playing_trace()) {
-        player_->SeekCommand(i);
-      }
-    }
-    ImGui::SameLine(column_width - 60.0f);
-    ImGui::Text("%d", i);
-    ImGui::PopID();
-    if (did_seek && target_command == i) {
-      ImGui::SetScrollPosHere();
-    }
+  auto id = RecursiveDrawCommandBufferUI(frame, frame->command_tree.get());
+  if (id != -1 && id != player_->current_command_index() &&
+      !player_->is_playing_trace()) {
+    player_->SeekCommand(id);
   }
+
   ImGui::EndChild();
   ImGui::End();
 }
@@ -639,8 +680,8 @@ void TraceViewer::DrawTextureInfo(
 
   ImGui::Columns(2);
   ImVec2 button_size(256, 256);
-  if (ImGui::ImageButton(ImTextureID(texture | ui::ImGuiDrawer::kIgnoreAlpha),
-                         button_size, ImVec2(0, 0), ImVec2(1, 1))) {
+  if (ImGui::ImageButton(ImTextureID(texture), button_size, ImVec2(0, 0),
+                         ImVec2(1, 1))) {
     // show viewer
   }
   ImGui::NextColumn();
@@ -1108,11 +1149,14 @@ void TraceViewer::DrawStateUI() {
         ((window_scissor_br >> 16) & 0x7FFF) -
             ((window_scissor_tl >> 16) & 0x7FFF));
     uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
+    uint32_t surface_actual = (surface_info >> 18) & 0x3FFF;
     uint32_t surface_pitch = surface_info & 0x3FFF;
     auto surface_msaa = (surface_info >> 16) & 0x3;
     static const char* kMsaaNames[] = {
         "1X", "2X", "4X",
     };
+    ImGui::BulletText("Surface Pitch - Actual: %d - %d", surface_pitch,
+                      surface_actual);
     ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]);
     uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
     bool vport_xscale_enable = (vte_control & (1 << 0)) > 0;
@@ -1124,6 +1168,9 @@ void TraceViewer::DrawStateUI() {
     assert_true(vport_xscale_enable == vport_yscale_enable ==
                 vport_zscale_enable == vport_xoffset_enable ==
                 vport_yoffset_enable == vport_zoffset_enable);
+    if (!vport_xscale_enable) {
+      ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored);
+    }
     ImGui::BulletText(
         "Viewport Offset: %f, %f, %f",
         vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0,
@@ -1134,6 +1181,10 @@ void TraceViewer::DrawStateUI() {
         vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1,
         vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1,
         vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1);
+    if (!vport_xscale_enable) {
+      ImGui::PopStyleColor();
+    }
+
     ImGui::BulletText("Vertex Format: %s, %s, %s, %s",
                       ((vte_control >> 8) & 0x1) ? "x/w0" : "x",
                       ((vte_control >> 8) & 0x1) ? "y/w0" : "y",
@@ -1318,7 +1369,7 @@ void TraceViewer::DrawStateUI() {
         if (write_mask) {
           auto color_target = GetColorRenderTarget(surface_pitch, surface_msaa,
                                                    color_base, color_format);
-          tex = ImTextureID(color_target | ui::ImGuiDrawer::kIgnoreAlpha);
+          tex = ImTextureID(color_target);
           if (ImGui::ImageButton(tex, button_size, ImVec2(0, 0),
                                  ImVec2(1, 1))) {
             // show viewer
@@ -1330,10 +1381,9 @@ void TraceViewer::DrawStateUI() {
         }
         if (ImGui::IsItemHovered()) {
           ImGui::BeginTooltip();
-          ImGui::Text(
-              "Color Target %d (%s), base %.4X, pitch %d, msaa %d, format %d",
-              i, write_mask ? "enabled" : "disabled", color_base, surface_pitch,
-              surface_msaa, color_format);
+          ImGui::Text("Color Target %d (%s), base %.4X, pitch %d, format %d", i,
+                      write_mask ? "enabled" : "disabled", color_base,
+                      surface_pitch, color_format);
 
           if (tex) {
             ImVec2 rel_pos;
@@ -1407,17 +1457,19 @@ void TraceViewer::DrawStateUI() {
 
       auto button_pos = ImGui::GetCursorScreenPos();
       ImVec2 button_size(256, 256);
-      ImGui::ImageButton(
-          ImTextureID(depth_target | ui::ImGuiDrawer::kIgnoreAlpha),
-          button_size, ImVec2(0, 0), ImVec2(1, 1));
+      ImGui::ImageButton(ImTextureID(depth_target), button_size, ImVec2(0, 0),
+                         ImVec2(1, 1));
       if (ImGui::IsItemHovered()) {
         ImGui::BeginTooltip();
 
+        ImGui::Text("Depth Target: base %.4X, pitch %d, format %d", depth_base,
+                    surface_pitch, depth_format);
+
         ImVec2 rel_pos;
         rel_pos.x = ImGui::GetMousePos().x - button_pos.x;
         rel_pos.y = ImGui::GetMousePos().y - button_pos.y;
-        ZoomedImage(ImTextureID(depth_target | ui::ImGuiDrawer::kIgnoreAlpha),
-                    rel_pos, button_size, 32.f, ImVec2(256, 256));
+        ZoomedImage(ImTextureID(depth_target), rel_pos, button_size, 32.f,
+                    ImVec2(256, 256));
 
         ImGui::EndTooltip();
       }
diff --git a/src/xenia/gpu/trace_viewer.h b/src/xenia/gpu/trace_viewer.h
index 6f7c900fc..7e82ad831 100644
--- a/src/xenia/gpu/trace_viewer.h
+++ b/src/xenia/gpu/trace_viewer.h
@@ -80,6 +80,8 @@ class TraceViewer {
   void DrawUI();
   void DrawControllerUI();
   void DrawPacketDisassemblerUI();
+  int RecursiveDrawCommandBufferUI(const TraceReader::Frame* frame,
+                                   TraceReader::CommandBuffer* buffer);
   void DrawCommandListUI();
   void DrawStateUI();
 
diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc
index 7fd3c4768..02bd88a83 100644
--- a/src/xenia/gpu/vulkan/buffer_cache.cc
+++ b/src/xenia/gpu/vulkan/buffer_cache.cc
@@ -22,98 +22,19 @@ namespace vulkan {
 
 using xe::ui::vulkan::CheckResult;
 
-// Space kept between tail and head when wrapping.
-constexpr VkDeviceSize kDeadZone = 4 * 1024;
-
 constexpr VkDeviceSize kConstantRegisterUniformRange =
     512 * 4 * 4 + 8 * 4 + 32 * 4;
 
 BufferCache::BufferCache(RegisterFile* register_file,
                          ui::vulkan::VulkanDevice* device, size_t capacity)
-    : register_file_(register_file),
-      device_(*device),
-      transient_capacity_(capacity) {
-  // Uniform buffer.
-  VkBufferCreateInfo uniform_buffer_info;
-  uniform_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
-  uniform_buffer_info.pNext = nullptr;
-  uniform_buffer_info.flags = 0;
-  uniform_buffer_info.size = transient_capacity_;
-  uniform_buffer_info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
-  uniform_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-  uniform_buffer_info.queueFamilyIndexCount = 0;
-  uniform_buffer_info.pQueueFamilyIndices = nullptr;
-  auto err = vkCreateBuffer(device_, &uniform_buffer_info, nullptr,
-                            &transient_uniform_buffer_);
-  CheckResult(err, "vkCreateBuffer");
-
-  // Index buffer.
-  VkBufferCreateInfo index_buffer_info;
-  index_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
-  index_buffer_info.pNext = nullptr;
-  index_buffer_info.flags = 0;
-  index_buffer_info.size = transient_capacity_;
-  index_buffer_info.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
-  index_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-  index_buffer_info.queueFamilyIndexCount = 0;
-  index_buffer_info.pQueueFamilyIndices = nullptr;
-  err = vkCreateBuffer(device_, &index_buffer_info, nullptr,
-                       &transient_index_buffer_);
-  CheckResult(err, "vkCreateBuffer");
-
-  // Vertex buffer.
-  VkBufferCreateInfo vertex_buffer_info;
-  vertex_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
-  vertex_buffer_info.pNext = nullptr;
-  vertex_buffer_info.flags = 0;
-  vertex_buffer_info.size = transient_capacity_;
-  vertex_buffer_info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
-  vertex_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-  vertex_buffer_info.queueFamilyIndexCount = 0;
-  vertex_buffer_info.pQueueFamilyIndices = nullptr;
-  err = vkCreateBuffer(*device, &vertex_buffer_info, nullptr,
-                       &transient_vertex_buffer_);
-  CheckResult(err, "vkCreateBuffer");
-
-  // Allocate the underlying buffer we use for all storage.
-  // We query all types and take the max alignment.
-  VkMemoryRequirements uniform_buffer_requirements;
-  VkMemoryRequirements index_buffer_requirements;
-  VkMemoryRequirements vertex_buffer_requirements;
-  vkGetBufferMemoryRequirements(device_, transient_uniform_buffer_,
-                                &uniform_buffer_requirements);
-  vkGetBufferMemoryRequirements(device_, transient_index_buffer_,
-                                &index_buffer_requirements);
-  vkGetBufferMemoryRequirements(device_, transient_vertex_buffer_,
-                                &vertex_buffer_requirements);
-  uniform_buffer_alignment_ = uniform_buffer_requirements.alignment;
-  index_buffer_alignment_ = index_buffer_requirements.alignment;
-  vertex_buffer_alignment_ = vertex_buffer_requirements.alignment;
-  VkMemoryRequirements buffer_requirements;
-  buffer_requirements.size = transient_capacity_;
-  buffer_requirements.alignment =
-      std::max(uniform_buffer_requirements.alignment,
-               std::max(index_buffer_requirements.alignment,
-                        vertex_buffer_requirements.alignment));
-  buffer_requirements.memoryTypeBits =
-      uniform_buffer_requirements.memoryTypeBits |
-      index_buffer_requirements.memoryTypeBits |
-      vertex_buffer_requirements.memoryTypeBits;
-  transient_buffer_memory_ = device->AllocateMemory(
-      buffer_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
-
-  // Alias all buffers to our memory.
-  vkBindBufferMemory(device_, transient_uniform_buffer_,
-                     transient_buffer_memory_, 0);
-  vkBindBufferMemory(device_, transient_index_buffer_, transient_buffer_memory_,
-                     0);
-  vkBindBufferMemory(device_, transient_vertex_buffer_,
-                     transient_buffer_memory_, 0);
-
-  // Map memory and keep it mapped while we use it.
-  err = vkMapMemory(device_, transient_buffer_memory_, 0, VK_WHOLE_SIZE, 0,
-                    &transient_buffer_data_);
-  CheckResult(err, "vkMapMemory");
+    : register_file_(register_file), device_(*device) {
+  transient_buffer_ = std::make_unique<ui::vulkan::CircularBuffer>(device);
+  if (!transient_buffer_->Initialize(capacity,
+                                     VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
+                                         VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
+                                         VK_BUFFER_USAGE_VERTEX_BUFFER_BIT)) {
+    assert_always();
+  }
 
   // Descriptor pool used for all of our cached descriptors.
   // In the steady state we don't allocate anything, so these are all manually
@@ -129,8 +50,8 @@ BufferCache::BufferCache(RegisterFile* register_file,
   pool_sizes[0].descriptorCount = 2;
   descriptor_pool_info.poolSizeCount = 1;
   descriptor_pool_info.pPoolSizes = pool_sizes;
-  err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr,
-                               &descriptor_pool_);
+  auto err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr,
+                                    &descriptor_pool_);
   CheckResult(err, "vkCreateDescriptorPool");
 
   // Create the descriptor set layout used for our uniform buffer.
@@ -180,7 +101,7 @@ BufferCache::BufferCache(RegisterFile* register_file,
 
   // Initialize descriptor set with our buffers.
   VkDescriptorBufferInfo buffer_info;
-  buffer_info.buffer = transient_uniform_buffer_;
+  buffer_info.buffer = transient_buffer_->gpu_buffer();
   buffer_info.offset = 0;
   buffer_info.range = kConstantRegisterUniformRange;
   VkWriteDescriptorSet descriptor_writes[2];
@@ -212,25 +133,20 @@ BufferCache::~BufferCache() {
                        &transient_descriptor_set_);
   vkDestroyDescriptorSetLayout(device_, descriptor_set_layout_, nullptr);
   vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr);
-  vkUnmapMemory(device_, transient_buffer_memory_);
-  vkFreeMemory(device_, transient_buffer_memory_, nullptr);
-  vkDestroyBuffer(device_, transient_uniform_buffer_, nullptr);
-  vkDestroyBuffer(device_, transient_index_buffer_, nullptr);
-  vkDestroyBuffer(device_, transient_vertex_buffer_, nullptr);
+  transient_buffer_->Shutdown();
 }
 
 std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
     const Shader::ConstantRegisterMap& vertex_constant_register_map,
-    const Shader::ConstantRegisterMap& pixel_constant_register_map) {
+    const Shader::ConstantRegisterMap& pixel_constant_register_map,
+    std::shared_ptr<ui::vulkan::Fence> fence) {
   // Fat struct, including all registers:
   // struct {
   //   vec4 float[512];
   //   uint bool[8];
   //   uint loop[32];
   // };
-  size_t total_size =
-      xe::round_up(kConstantRegisterUniformRange, uniform_buffer_alignment_);
-  auto offset = AllocateTransientData(uniform_buffer_alignment_, total_size);
+  auto offset = AllocateTransientData(kConstantRegisterUniformRange, fence);
   if (offset == VK_WHOLE_SIZE) {
     // OOM.
     return {VK_WHOLE_SIZE, VK_WHOLE_SIZE};
@@ -238,8 +154,7 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
 
   // Copy over all the registers.
   const auto& values = register_file_->values;
-  uint8_t* dest_ptr =
-      reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset;
+  uint8_t* dest_ptr = transient_buffer_->host_base() + offset;
   std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32,
               (512 * 4 * 4));
   dest_ptr += 512 * 4 * 4;
@@ -258,8 +173,8 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
 // constant indexing.
 #if 0
   // Allocate space in the buffer for our data.
-  auto offset = AllocateTransientData(uniform_buffer_alignment_,
-                                      constant_register_map.packed_byte_length);
+  auto offset =
+      AllocateTransientData(constant_register_map.packed_byte_length, fence);
   if (offset == VK_WHOLE_SIZE) {
     // OOM.
     return VK_WHOLE_SIZE;
@@ -304,11 +219,12 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
 }
 
 std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
-    const void* source_ptr, size_t source_length, IndexFormat format) {
+    const void* source_ptr, size_t source_length, IndexFormat format,
+    std::shared_ptr<ui::vulkan::Fence> fence) {
   // TODO(benvanik): check cache.
 
   // Allocate space in the buffer for our data.
-  auto offset = AllocateTransientData(index_buffer_alignment_, source_length);
+  auto offset = AllocateTransientData(source_length, fence);
   if (offset == VK_WHOLE_SIZE) {
     // OOM.
     return {nullptr, VK_WHOLE_SIZE};
@@ -319,25 +235,24 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
   // TODO(benvanik): memcpy then use compute shaders to swap?
   if (format == IndexFormat::kInt16) {
     // Endian::k8in16, swap half-words.
-    xe::copy_and_swap_16_aligned(
-        reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset, source_ptr,
-        source_length / 2);
+    xe::copy_and_swap_16_aligned(transient_buffer_->host_base() + offset,
+                                 source_ptr, source_length / 2);
   } else if (format == IndexFormat::kInt32) {
     // Endian::k8in32, swap words.
-    xe::copy_and_swap_32_aligned(
-        reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset, source_ptr,
-        source_length / 4);
+    xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset,
+                                 source_ptr, source_length / 4);
   }
 
-  return {transient_index_buffer_, offset};
+  return {transient_buffer_->gpu_buffer(), offset};
 }
 
 std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
-    const void* source_ptr, size_t source_length) {
+    const void* source_ptr, size_t source_length, Endian endian,
+    std::shared_ptr<ui::vulkan::Fence> fence) {
   // TODO(benvanik): check cache.
 
   // Allocate space in the buffer for our data.
-  auto offset = AllocateTransientData(vertex_buffer_alignment_, source_length);
+  auto offset = AllocateTransientData(source_length, fence);
   if (offset == VK_WHOLE_SIZE) {
     // OOM.
     return {nullptr, VK_WHOLE_SIZE};
@@ -345,60 +260,38 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
 
   // Copy data into the buffer.
   // TODO(benvanik): memcpy then use compute shaders to swap?
-  // Endian::k8in32, swap words.
-  xe::copy_and_swap_32_aligned(
-      reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset, source_ptr,
-      source_length / 4);
+  assert_true(endian == Endian::k8in32);
+  if (endian == Endian::k8in32) {
+    // Endian::k8in32, swap words.
+    xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset,
+                                 source_ptr, source_length / 4);
+  }
 
-  return {transient_vertex_buffer_, offset};
+  return {transient_buffer_->gpu_buffer(), offset};
 }
 
-VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize alignment,
-                                                VkDeviceSize length) {
+VkDeviceSize BufferCache::AllocateTransientData(
+    VkDeviceSize length, std::shared_ptr<ui::vulkan::Fence> fence) {
   // Try fast path (if we have space).
-  VkDeviceSize offset = TryAllocateTransientData(alignment, length);
+  VkDeviceSize offset = TryAllocateTransientData(length, fence);
   if (offset != VK_WHOLE_SIZE) {
     return offset;
   }
 
   // Ran out of easy allocations.
   // Try consuming fences before we panic.
-  assert_always("Reclamation not yet implemented");
+  transient_buffer_->Scavenge();
 
   // Try again. It may still fail if we didn't get enough space back.
-  return TryAllocateTransientData(alignment, length);
+  offset = TryAllocateTransientData(length, fence);
+  return offset;
 }
 
-VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize alignment,
-                                                   VkDeviceSize length) {
-  if (transient_tail_offset_ >= transient_head_offset_) {
-    // Tail follows head, so things are easy:
-    // |    H----T   |
-    if (xe::round_up(transient_tail_offset_, alignment) + length <=
-        transient_capacity_) {
-      // Allocation fits from tail to end of buffer, so grow.
-      // |    H----**T |
-      VkDeviceSize offset = xe::round_up(transient_tail_offset_, alignment);
-      transient_tail_offset_ = offset + length;
-      return offset;
-    } else if (length + kDeadZone <= transient_head_offset_) {
-      // Can't fit at the end, but can fit if we wrap around.
-      // |**T H----....|
-      VkDeviceSize offset = 0;
-      transient_tail_offset_ = length;
-      return offset;
-    }
-  } else {
-    // Head follows tail, so we're reversed:
-    // |----T    H---|
-    if (xe::round_up(transient_tail_offset_, alignment) + length + kDeadZone <=
-        transient_head_offset_) {
-      // Fits from tail to head.
-      // |----***T H---|
-      VkDeviceSize offset = xe::round_up(transient_tail_offset_, alignment);
-      transient_tail_offset_ = offset + length;
-      return offset;
-    }
+VkDeviceSize BufferCache::TryAllocateTransientData(
+    VkDeviceSize length, std::shared_ptr<ui::vulkan::Fence> fence) {
+  auto alloc = transient_buffer_->Acquire(length, fence);
+  if (alloc) {
+    return alloc->offset;
   }
 
   // No more space.
@@ -420,9 +313,9 @@ void BufferCache::Flush(VkCommandBuffer command_buffer) {
   VkMappedMemoryRange dirty_range;
   dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
   dirty_range.pNext = nullptr;
-  dirty_range.memory = transient_buffer_memory_;
+  dirty_range.memory = transient_buffer_->gpu_memory();
   dirty_range.offset = 0;
-  dirty_range.size = transient_capacity_;
+  dirty_range.size = transient_buffer_->capacity();
   vkFlushMappedMemoryRanges(device_, 1, &dirty_range);
 }
 
@@ -434,6 +327,8 @@ void BufferCache::ClearCache() {
   // TODO(benvanik): caching.
 }
 
+void BufferCache::Scavenge() { transient_buffer_->Scavenge(); }
+
 }  // namespace vulkan
 }  // namespace gpu
 }  // namespace xe
diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h
index 1c7330e52..8695fc36d 100644
--- a/src/xenia/gpu/vulkan/buffer_cache.h
+++ b/src/xenia/gpu/vulkan/buffer_cache.h
@@ -13,6 +13,7 @@
 #include "xenia/gpu/register_file.h"
 #include "xenia/gpu/shader.h"
 #include "xenia/gpu/xenos.h"
+#include "xenia/ui/vulkan/circular_buffer.h"
 #include "xenia/ui/vulkan/vulkan.h"
 #include "xenia/ui/vulkan/vulkan_device.h"
 
@@ -50,22 +51,24 @@ class BufferCache {
   // The returned offsets may alias.
   std::pair<VkDeviceSize, VkDeviceSize> UploadConstantRegisters(
       const Shader::ConstantRegisterMap& vertex_constant_register_map,
-      const Shader::ConstantRegisterMap& pixel_constant_register_map);
+      const Shader::ConstantRegisterMap& pixel_constant_register_map,
+      std::shared_ptr<ui::vulkan::Fence> fence);
 
   // Uploads index buffer data from guest memory, possibly eliding with
   // recently uploaded data or cached copies.
   // Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
   // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
-  std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(const void* source_ptr,
-                                                      size_t source_length,
-                                                      IndexFormat format);
+  std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(
+      const void* source_ptr, size_t source_length, IndexFormat format,
+      std::shared_ptr<ui::vulkan::Fence> fence);
 
   // Uploads vertex buffer data from guest memory, possibly eliding with
   // recently uploaded data or cached copies.
   // Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
   // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
-  std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(const void* source_ptr,
-                                                       size_t source_length);
+  std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(
+      const void* source_ptr, size_t source_length, Endian endian,
+      std::shared_ptr<ui::vulkan::Fence> fence);
 
   // Flushes all pending data to the GPU.
   // Until this is called the GPU is not guaranteed to see any data.
@@ -81,36 +84,26 @@ class BufferCache {
   // Clears all cached content and prevents future elision with pending data.
   void ClearCache();
 
+  // Wipes all data no longer needed.
+  void Scavenge();
+
  private:
   // Allocates a block of memory in the transient buffer.
   // When memory is not available fences are checked and space is reclaimed.
   // Returns VK_WHOLE_SIZE if requested amount of memory is not available.
-  VkDeviceSize AllocateTransientData(VkDeviceSize alignment,
-                                     VkDeviceSize length);
+  VkDeviceSize AllocateTransientData(VkDeviceSize length,
+                                     std::shared_ptr<ui::vulkan::Fence> fence);
   // Tries to allocate a block of memory in the transient buffer.
   // Returns VK_WHOLE_SIZE if requested amount of memory is not available.
-  VkDeviceSize TryAllocateTransientData(VkDeviceSize alignment,
-                                        VkDeviceSize length);
+  VkDeviceSize TryAllocateTransientData(
+      VkDeviceSize length, std::shared_ptr<ui::vulkan::Fence> fence);
 
   RegisterFile* register_file_ = nullptr;
   VkDevice device_ = nullptr;
 
   // Staging ringbuffer we cycle through fast. Used for data we don't
   // plan on keeping past the current frame.
-  size_t transient_capacity_ = 0;
-  VkBuffer transient_uniform_buffer_ = nullptr;
-  VkBuffer transient_index_buffer_ = nullptr;
-  VkBuffer transient_vertex_buffer_ = nullptr;
-  VkDeviceMemory transient_buffer_memory_ = nullptr;
-  void* transient_buffer_data_ = nullptr;
-  VkDeviceSize transient_head_offset_ = 0;
-  VkDeviceSize transient_tail_offset_ = 0;
-
-  // Required alignments for our various types.
-  // All allocations must start at the appropriate alignment.
-  VkDeviceSize uniform_buffer_alignment_ = 0;
-  VkDeviceSize index_buffer_alignment_ = 0;
-  VkDeviceSize vertex_buffer_alignment_ = 0;
+  std::unique_ptr<ui::vulkan::CircularBuffer> transient_buffer_ = nullptr;
 
   VkDescriptorPool descriptor_pool_ = nullptr;
   VkDescriptorSetLayout descriptor_set_layout_ = nullptr;
diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc
index 542329af5..e80cb4675 100644
--- a/src/xenia/gpu/vulkan/pipeline_cache.cc
+++ b/src/xenia/gpu/vulkan/pipeline_cache.cc
@@ -17,6 +17,9 @@
 #include "xenia/gpu/gpu_flags.h"
 #include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
 
+#include <cinttypes>
+#include <string>
+
 namespace xe {
 namespace gpu {
 namespace vulkan {
@@ -154,40 +157,19 @@ VulkanShader* PipelineCache::LoadShader(ShaderType shader_type,
                                           host_address, dword_count);
   shader_map_.insert({data_hash, shader});
 
-  // Perform translation.
-  // If this fails the shader will be marked as invalid and ignored later.
-  if (!shader_translator_.Translate(shader)) {
-    XELOGE("Shader translation failed; marking shader as ignored");
-    return shader;
-  }
-
-  // Prepare the shader for use (creates our VkShaderModule).
-  // It could still fail at this point.
-  if (!shader->Prepare()) {
-    XELOGE("Shader preparation failed; marking shader as ignored");
-    return shader;
-  }
-
-  if (shader->is_valid()) {
-    XELOGGPU("Generated %s shader at 0x%.8X (%db):\n%s",
-             shader_type == ShaderType::kVertex ? "vertex" : "pixel",
-             guest_address, dword_count * 4,
-             shader->ucode_disassembly().c_str());
-  }
-
-  // Dump shader files if desired.
-  if (!FLAGS_dump_shaders.empty()) {
-    shader->Dump(FLAGS_dump_shaders, "vk");
-  }
-
   return shader;
 }
 
-bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer,
-                                      const RenderState* render_state,
-                                      VulkanShader* vertex_shader,
-                                      VulkanShader* pixel_shader,
-                                      PrimitiveType primitive_type) {
+PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
+    VkCommandBuffer command_buffer, const RenderState* render_state,
+    VulkanShader* vertex_shader, VulkanShader* pixel_shader,
+    PrimitiveType primitive_type, VkPipeline* pipeline_out) {
+#if FINE_GRAINED_DRAW_SCOPES
+  SCOPE_profile_cpu_f("gpu");
+#endif  // FINE_GRAINED_DRAW_SCOPES
+
+  assert_not_null(pipeline_out);
+
   // Perform a pass over all registers and state updating our cached structures.
   // This will tell us if anything has changed that requires us to either build
   // a new pipeline or use an existing one.
@@ -208,7 +190,7 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer,
       // Error updating state - bail out.
       // We are in an indeterminate state, so reset things for the next attempt.
       current_pipeline_ = nullptr;
-      return false;
+      return update_status;
   }
   if (!pipeline) {
     // Should have a hash key produced by the UpdateState pass.
@@ -217,24 +199,12 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer,
     current_pipeline_ = pipeline;
     if (!pipeline) {
       // Unable to create pipeline.
-      return false;
+      return UpdateStatus::kError;
     }
   }
 
-  // Bind the pipeline.
-  vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
-
-  // Issue all changed dynamic state information commands.
-  // TODO(benvanik): dynamic state is kept in the command buffer, so if we
-  // have issued it before (regardless of pipeline) we don't need to do it now.
-  // TODO(benvanik): track whether we have issued on the given command buffer.
-  bool full_dynamic_state = true;
-  if (!SetDynamicState(command_buffer, full_dynamic_state)) {
-    // Failed to update state.
-    return false;
-  }
-
-  return true;
+  *pipeline_out = pipeline;
+  return update_status;
 }
 
 void PipelineCache::ClearCache() {
@@ -291,16 +261,140 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state,
   pipeline_info.basePipelineHandle = nullptr;
   pipeline_info.basePipelineIndex = 0;
   VkPipeline pipeline = nullptr;
-  auto err = vkCreateGraphicsPipelines(device_, nullptr, 1, &pipeline_info,
-                                       nullptr, &pipeline);
+  auto err = vkCreateGraphicsPipelines(device_, pipeline_cache_, 1,
+                                       &pipeline_info, nullptr, &pipeline);
   CheckResult(err, "vkCreateGraphicsPipelines");
 
+  // Dump shader disassembly.
+  if (FLAGS_vulkan_dump_disasm) {
+    DumpShaderDisasmNV(pipeline_info);
+  }
+
   // Add to cache with the hash key for reuse.
   cached_pipelines_.insert({hash_key, pipeline});
 
   return pipeline;
 }
 
+bool PipelineCache::TranslateShader(VulkanShader* shader,
+                                    xenos::xe_gpu_program_cntl_t cntl) {
+  // Perform translation.
+  // If this fails the shader will be marked as invalid and ignored later.
+  if (!shader_translator_.Translate(shader, cntl)) {
+    XELOGE("Shader translation failed; marking shader as ignored");
+    return false;
+  }
+
+  // Prepare the shader for use (creates our VkShaderModule).
+  // It could still fail at this point.
+  if (!shader->Prepare()) {
+    XELOGE("Shader preparation failed; marking shader as ignored");
+    return false;
+  }
+
+  if (shader->is_valid()) {
+    XELOGGPU("Generated %s shader (%db) - hash %.16" PRIX64 ":\n%s\n",
+             shader->type() == ShaderType::kVertex ? "vertex" : "pixel",
+             shader->ucode_dword_count() * 4, shader->ucode_data_hash(),
+             shader->ucode_disassembly().c_str());
+  }
+
+  // Dump shader files if desired.
+  if (!FLAGS_dump_shaders.empty()) {
+    shader->Dump(FLAGS_dump_shaders, "vk");
+  }
+
+  return shader->is_valid();
+}
+
+void PipelineCache::DumpShaderDisasmNV(
+    const VkGraphicsPipelineCreateInfo& pipeline_info) {
+  // !! HACK !!: This only works on NVidia drivers. Dumps shader disasm.
+  // This code is super ugly. Update this when NVidia includes an official
+  // way to dump shader disassembly.
+
+  VkPipelineCacheCreateInfo pipeline_cache_info;
+  VkPipelineCache dummy_pipeline_cache;
+  pipeline_cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
+  pipeline_cache_info.pNext = nullptr;
+  pipeline_cache_info.flags = 0;
+  pipeline_cache_info.initialDataSize = 0;
+  pipeline_cache_info.pInitialData = nullptr;
+  auto err = vkCreatePipelineCache(device_, &pipeline_cache_info, nullptr,
+                                   &dummy_pipeline_cache);
+  CheckResult(err, "vkCreatePipelineCache");
+
+  // Create a pipeline on the dummy cache and dump it.
+  VkPipeline dummy_pipeline;
+  err = vkCreateGraphicsPipelines(device_, dummy_pipeline_cache, 1,
+                                  &pipeline_info, nullptr, &dummy_pipeline);
+
+  std::vector<uint8_t> pipeline_data;
+  size_t data_size = 0;
+  err = vkGetPipelineCacheData(device_, dummy_pipeline_cache, &data_size,
+                               nullptr);
+  if (err == VK_SUCCESS) {
+    pipeline_data.resize(data_size);
+    vkGetPipelineCacheData(device_, dummy_pipeline_cache, &data_size,
+                           pipeline_data.data());
+
+    // Scan the data for the disassembly.
+    std::string disasm_vp, disasm_fp;
+
+    const char* disasm_start_vp = nullptr;
+    const char* disasm_start_fp = nullptr;
+    size_t search_offset = 0;
+    const char* search_start =
+        reinterpret_cast<const char*>(pipeline_data.data());
+    while (true) {
+      auto p = reinterpret_cast<const char*>(
+          memchr(pipeline_data.data() + search_offset, '!',
+                 pipeline_data.size() - search_offset));
+      if (!p) {
+        break;
+      }
+      if (!strncmp(p, "!!NV", 4)) {
+        if (!strncmp(p + 4, "vp", 2)) {
+          disasm_start_vp = p;
+        } else if (!strncmp(p + 4, "fp", 2)) {
+          disasm_start_fp = p;
+        }
+
+        if (disasm_start_fp && disasm_start_vp) {
+          // Found all we needed.
+          break;
+        }
+      }
+      search_offset = p - search_start;
+      ++search_offset;
+    }
+    if (disasm_start_vp) {
+      disasm_vp = std::string(disasm_start_vp);
+
+      // For some reason there's question marks all over the code.
+      disasm_vp.erase(std::remove(disasm_vp.begin(), disasm_vp.end(), '?'),
+                      disasm_vp.end());
+    } else {
+      disasm_vp = std::string("Shader disassembly not available.");
+    }
+
+    if (disasm_start_fp) {
+      disasm_fp = std::string(disasm_start_fp);
+
+      // For some reason there's question marks all over the code.
+      disasm_fp.erase(std::remove(disasm_fp.begin(), disasm_fp.end(), '?'),
+                      disasm_fp.end());
+    } else {
+      disasm_fp = std::string("Shader disassembly not available.");
+    }
+
+    XELOGI("%s\n=====================================\n%s\n", disasm_vp.c_str(),
+           disasm_fp.c_str());
+  }
+
+  vkDestroyPipelineCache(device_, dummy_pipeline_cache, nullptr);
+}
+
 VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type,
                                                 bool is_line_mode) {
   switch (primitive_type) {
@@ -334,10 +428,16 @@ VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type,
 
 bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
                                     bool full_update) {
+#if FINE_GRAINED_DRAW_SCOPES
+  SCOPE_profile_cpu_f("gpu");
+#endif  // FINE_GRAINED_DRAW_SCOPES
+
   auto& regs = set_dynamic_state_registers_;
 
   bool window_offset_dirty = SetShadowRegister(&regs.pa_sc_window_offset,
                                                XE_GPU_REG_PA_SC_WINDOW_OFFSET);
+  window_offset_dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
+                                           XE_GPU_REG_PA_SU_SC_MODE_CNTL);
 
   // Window parameters.
   // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
@@ -397,22 +497,21 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
   viewport_state_dirty |= SetShadowRegister(&regs.pa_cl_vport_zscale,
                                             XE_GPU_REG_PA_CL_VPORT_ZSCALE);
   if (viewport_state_dirty) {
-    // HACK: no clue where to get these values.
     // RB_SURFACE_INFO
     auto surface_msaa =
         static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3);
-    // TODO(benvanik): ??
+
+    // Apply a multiplier to emulate MSAA.
     float window_width_scalar = 1;
     float window_height_scalar = 1;
     switch (surface_msaa) {
       case MsaaSamples::k1X:
         break;
       case MsaaSamples::k2X:
-        window_width_scalar = 2;
+        window_height_scalar = 2;
         break;
       case MsaaSamples::k4X:
-        window_width_scalar = 2;
-        window_height_scalar = 2;
+        window_width_scalar = window_height_scalar = 2;
         break;
     }
 
@@ -429,10 +528,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
                 vport_yoffset_enable == vport_zoffset_enable);
 
     VkViewport viewport_rect;
-    viewport_rect.x = 0;
-    viewport_rect.y = 0;
-    viewport_rect.width = 100;
-    viewport_rect.height = 100;
+    std::memset(&viewport_rect, 0, sizeof(VkViewport));
     viewport_rect.minDepth = 0;
     viewport_rect.maxDepth = 1;
 
@@ -443,6 +539,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
       float voy = vport_yoffset_enable ? regs.pa_cl_vport_yoffset : 0;
       float vsx = vport_xscale_enable ? regs.pa_cl_vport_xscale : 1;
       float vsy = vport_yscale_enable ? regs.pa_cl_vport_yscale : 1;
+
       window_width_scalar = window_height_scalar = 1;
       float vpw = 2 * window_width_scalar * vsx;
       float vph = -2 * window_height_scalar * vsy;
@@ -490,25 +587,25 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
     vkCmdSetBlendConstants(command_buffer, regs.rb_blend_rgba);
   }
 
-  // VK_DYNAMIC_STATE_LINE_WIDTH
-  vkCmdSetLineWidth(command_buffer, 1.0f);
+  if (full_update) {
+    // VK_DYNAMIC_STATE_LINE_WIDTH
+    vkCmdSetLineWidth(command_buffer, 1.0f);
 
-  // VK_DYNAMIC_STATE_DEPTH_BIAS
-  vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f);
+    // VK_DYNAMIC_STATE_DEPTH_BIAS
+    vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f);
 
-  // VK_DYNAMIC_STATE_DEPTH_BOUNDS
-  vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f);
+    // VK_DYNAMIC_STATE_DEPTH_BOUNDS
+    vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f);
 
-  // VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK
-  vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
+    // VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK
+    vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
 
-  // VK_DYNAMIC_STATE_STENCIL_REFERENCE
-  vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
+    // VK_DYNAMIC_STATE_STENCIL_REFERENCE
+    vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
 
-  // VK_DYNAMIC_STATE_STENCIL_WRITE_MASK
-  vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
-
-  // TODO(benvanik): push constants.
+    // VK_DYNAMIC_STATE_STENCIL_WRITE_MASK
+    vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
+  }
 
   bool push_constants_dirty = full_update || viewport_state_dirty;
   push_constants_dirty |=
@@ -539,7 +636,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
       push_constants.window_scale[1] = -1.0f;
     } else {
       push_constants.window_scale[0] = 1.0f / 2560.0f;
-      push_constants.window_scale[1] = -1.0f / 2560.0f;
+      push_constants.window_scale[1] = 1.0f / 2560.0f;
     }
 
     // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
@@ -558,7 +655,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
     push_constants.vtx_fmt[3] = vtx_w0_fmt;
 
     // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
-    // Deprecated in Vulkan, implemented in shader.
+    // Emulated in shader.
     // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
     // ALPHATESTENABLE
     push_constants.alpha_test[0] =
@@ -657,16 +754,32 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
   bool dirty = false;
   dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
                              XE_GPU_REG_PA_SU_SC_MODE_CNTL);
+  dirty |= SetShadowRegister(&regs.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
   dirty |= regs.vertex_shader != vertex_shader;
   dirty |= regs.pixel_shader != pixel_shader;
   dirty |= regs.primitive_type != primitive_type;
+  regs.vertex_shader = vertex_shader;
+  regs.pixel_shader = pixel_shader;
+  regs.primitive_type = primitive_type;
   XXH64_update(&hash_state_, &regs, sizeof(regs));
   if (!dirty) {
     return UpdateStatus::kCompatible;
   }
-  regs.vertex_shader = vertex_shader;
-  regs.pixel_shader = pixel_shader;
-  regs.primitive_type = primitive_type;
+
+  xenos::xe_gpu_program_cntl_t sq_program_cntl;
+  sq_program_cntl.dword_0 = regs.sq_program_cntl;
+
+  if (!vertex_shader->is_translated() &&
+      !TranslateShader(vertex_shader, sq_program_cntl)) {
+    XELOGE("Failed to translate the vertex shader!");
+    return UpdateStatus::kError;
+  }
+
+  if (!pixel_shader->is_translated() &&
+      !TranslateShader(pixel_shader, sq_program_cntl)) {
+    XELOGE("Failed to translate the pixel shader!");
+    return UpdateStatus::kError;
+  }
 
   update_shader_stages_stage_count_ = 0;
 
@@ -723,11 +836,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState(
 
   bool dirty = false;
   dirty |= vertex_shader != regs.vertex_shader;
+  regs.vertex_shader = vertex_shader;
   XXH64_update(&hash_state_, &regs, sizeof(regs));
   if (!dirty) {
     return UpdateStatus::kCompatible;
   }
-  regs.vertex_shader = vertex_shader;
 
   state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
   state_info.pNext = nullptr;
@@ -765,11 +878,14 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState(
                                            : VK_FORMAT_A2R10G10B10_UNORM_PACK32;
           break;
         case VertexFormat::k_10_11_11:
-          assert_always("unsupported?");
+          assert_true(is_signed);
           vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32;
           break;
         case VertexFormat::k_11_11_10:
-          assert_true(is_signed);
+          // Converted in-shader.
+          // TODO(DrChat)
+          assert_always();
+          // vertex_attrib_descr.format = VK_FORMAT_R32_UINT;
           vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32;
           break;
         case VertexFormat::k_16_16:
@@ -802,19 +918,19 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState(
               is_signed ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32_UINT;
           break;
         case VertexFormat::k_32_FLOAT:
-          assert_true(is_signed);
+          // assert_true(is_signed);
           vertex_attrib_descr.format = VK_FORMAT_R32_SFLOAT;
           break;
         case VertexFormat::k_32_32_FLOAT:
-          assert_true(is_signed);
+          // assert_true(is_signed);
           vertex_attrib_descr.format = VK_FORMAT_R32G32_SFLOAT;
           break;
         case VertexFormat::k_32_32_32_FLOAT:
-          assert_true(is_signed);
+          // assert_true(is_signed);
           vertex_attrib_descr.format = VK_FORMAT_R32G32B32_SFLOAT;
           break;
         case VertexFormat::k_32_32_32_32_FLOAT:
-          assert_true(is_signed);
+          // assert_true(is_signed);
           vertex_attrib_descr.format = VK_FORMAT_R32G32B32A32_SFLOAT;
           break;
         default:
@@ -843,11 +959,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateInputAssemblyState(
                              XE_GPU_REG_PA_SU_SC_MODE_CNTL);
   dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index,
                              XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
+  regs.primitive_type = primitive_type;
   XXH64_update(&hash_state_, &regs, sizeof(regs));
   if (!dirty) {
     return UpdateStatus::kCompatible;
   }
-  regs.primitive_type = primitive_type;
 
   state_info.sType =
       VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
@@ -934,14 +1050,17 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState(
   auto& state_info = update_rasterization_state_info_;
 
   bool dirty = false;
+  dirty |= regs.primitive_type != primitive_type;
   dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
                              XE_GPU_REG_PA_SU_SC_MODE_CNTL);
   dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_tl,
                              XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL);
   dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_br,
                              XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
+  dirty |= SetShadowRegister(&regs.pa_sc_viz_query, XE_GPU_REG_PA_SC_VIZ_QUERY);
   dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index,
                              XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
+  regs.primitive_type = primitive_type;
   XXH64_update(&hash_state_, &regs, sizeof(regs));
   if (!dirty) {
     return UpdateStatus::kCompatible;
@@ -953,10 +1072,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState(
 
   // TODO(benvanik): right setting?
   state_info.depthClampEnable = VK_FALSE;
-
-  // TODO(benvanik): use in depth-only mode?
   state_info.rasterizerDiscardEnable = VK_FALSE;
 
+  // KILL_PIX_POST_EARLY_Z
+  if (regs.pa_sc_viz_query & 0x80) {
+    state_info.rasterizerDiscardEnable = VK_TRUE;
+  }
+
   bool poly_mode = ((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0;
   if (poly_mode) {
     uint32_t front_poly_mode = (regs.pa_su_sc_mode_cntl >> 5) & 0x7;
@@ -981,6 +1103,10 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState(
     case 2:
       state_info.cullMode = VK_CULL_MODE_BACK_BIT;
       break;
+    case 3:
+      // Cull both sides?
+      assert_always();
+      break;
   }
   if (regs.pa_su_sc_mode_cntl & 0x4) {
     state_info.frontFace = VK_FRONT_FACE_CLOCKWISE;
@@ -1007,18 +1133,53 @@ PipelineCache::UpdateStatus PipelineCache::UpdateMultisampleState() {
   auto& regs = update_multisample_state_regs_;
   auto& state_info = update_multisample_state_info_;
 
+  bool dirty = false;
+  dirty |= SetShadowRegister(&regs.pa_sc_aa_config, XE_GPU_REG_PA_SC_AA_CONFIG);
+  dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
+                             XE_GPU_REG_PA_SU_SC_MODE_CNTL);
+  dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
+  XXH64_update(&hash_state_, &regs, sizeof(regs));
+  if (!dirty) {
+    return UpdateStatus::kCompatible;
+  }
+
   state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
   state_info.pNext = nullptr;
   state_info.flags = 0;
 
-  state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
+  // PA_SC_AA_CONFIG MSAA_NUM_SAMPLES (0x7)
+  // PA_SC_AA_MASK (0xFFFF)
+  // PA_SU_SC_MODE_CNTL MSAA_ENABLE (0x10000)
+  // If set, all samples will be sampled at set locations. Otherwise, they're
+  // all sampled from the pixel center.
+  if (FLAGS_vulkan_native_msaa) {
+    auto msaa_num_samples =
+        static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3);
+    switch (msaa_num_samples) {
+      case MsaaSamples::k1X:
+        state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
+        break;
+      case MsaaSamples::k2X:
+        state_info.rasterizationSamples = VK_SAMPLE_COUNT_2_BIT;
+        break;
+      case MsaaSamples::k4X:
+        state_info.rasterizationSamples = VK_SAMPLE_COUNT_4_BIT;
+        break;
+      default:
+        assert_unhandled_case(msaa_num_samples);
+        break;
+    }
+  } else {
+    state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
+  }
+
   state_info.sampleShadingEnable = VK_FALSE;
   state_info.minSampleShading = 0;
   state_info.pSampleMask = nullptr;
   state_info.alphaToCoverageEnable = VK_FALSE;
   state_info.alphaToOneEnable = VK_FALSE;
 
-  return UpdateStatus::kCompatible;
+  return UpdateStatus::kMismatch;
 }
 
 PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
@@ -1038,19 +1199,60 @@ PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
   state_info.pNext = nullptr;
   state_info.flags = 0;
 
-  state_info.depthTestEnable = VK_FALSE;
-  state_info.depthWriteEnable = VK_FALSE;
-  state_info.depthCompareOp = VK_COMPARE_OP_ALWAYS;
+  static const VkCompareOp compare_func_map[] = {
+      /*  0 */ VK_COMPARE_OP_NEVER,
+      /*  1 */ VK_COMPARE_OP_LESS,
+      /*  2 */ VK_COMPARE_OP_EQUAL,
+      /*  3 */ VK_COMPARE_OP_LESS_OR_EQUAL,
+      /*  4 */ VK_COMPARE_OP_GREATER,
+      /*  5 */ VK_COMPARE_OP_NOT_EQUAL,
+      /*  6 */ VK_COMPARE_OP_GREATER_OR_EQUAL,
+      /*  7 */ VK_COMPARE_OP_ALWAYS,
+  };
+  static const VkStencilOp stencil_op_map[] = {
+      /*  0 */ VK_STENCIL_OP_KEEP,
+      /*  1 */ VK_STENCIL_OP_ZERO,
+      /*  2 */ VK_STENCIL_OP_REPLACE,
+      /*  3 */ VK_STENCIL_OP_INCREMENT_AND_WRAP,
+      /*  4 */ VK_STENCIL_OP_DECREMENT_AND_WRAP,
+      /*  5 */ VK_STENCIL_OP_INVERT,
+      /*  6 */ VK_STENCIL_OP_INCREMENT_AND_CLAMP,
+      /*  7 */ VK_STENCIL_OP_DECREMENT_AND_CLAMP,
+  };
+
+  // Depth state
+  // TODO: EARLY_Z_ENABLE (needs to be enabled in shaders)
+  state_info.depthWriteEnable = !!(regs.rb_depthcontrol & 0x4);
+  state_info.depthTestEnable = !!(regs.rb_depthcontrol & 0x2);
+  state_info.stencilTestEnable = !!(regs.rb_depthcontrol & 0x1);
+
+  state_info.depthCompareOp =
+      compare_func_map[(regs.rb_depthcontrol >> 4) & 0x7];
   state_info.depthBoundsTestEnable = VK_FALSE;
-  state_info.stencilTestEnable = VK_FALSE;
-  state_info.front.failOp = VK_STENCIL_OP_KEEP;
-  state_info.front.passOp = VK_STENCIL_OP_KEEP;
-  state_info.front.depthFailOp = VK_STENCIL_OP_KEEP;
-  state_info.front.compareOp = VK_COMPARE_OP_ALWAYS;
-  state_info.back.failOp = VK_STENCIL_OP_KEEP;
-  state_info.back.passOp = VK_STENCIL_OP_KEEP;
-  state_info.back.depthFailOp = VK_STENCIL_OP_KEEP;
-  state_info.back.compareOp = VK_COMPARE_OP_ALWAYS;
+
+  uint32_t stencil_ref = (regs.rb_stencilrefmask & 0x000000FF);
+  uint32_t stencil_read_mask = (regs.rb_stencilrefmask & 0x0000FF00) >> 8;
+
+  // Stencil state
+  state_info.front.compareOp =
+      compare_func_map[(regs.rb_depthcontrol >> 8) & 0x7];
+  state_info.front.failOp = stencil_op_map[(regs.rb_depthcontrol >> 11) & 0x7];
+  state_info.front.passOp = stencil_op_map[(regs.rb_depthcontrol >> 14) & 0x7];
+  state_info.front.depthFailOp =
+      stencil_op_map[(regs.rb_depthcontrol >> 17) & 0x7];
+
+  // BACKFACE_ENABLE
+  if (!!(regs.rb_depthcontrol & 0x80)) {
+    state_info.back.compareOp =
+        compare_func_map[(regs.rb_depthcontrol >> 20) & 0x7];
+    state_info.back.failOp = stencil_op_map[(regs.rb_depthcontrol >> 23) & 0x7];
+    state_info.back.passOp = stencil_op_map[(regs.rb_depthcontrol >> 26) & 0x7];
+    state_info.back.depthFailOp =
+        stencil_op_map[(regs.rb_depthcontrol >> 29) & 0x7];
+  } else {
+    // Back state is identical to front state.
+    std::memcpy(&state_info.back, &state_info.front, sizeof(VkStencilOpState));
+  }
 
   // Ignored; set dynamically.
   state_info.minDepthBounds = 0;
@@ -1089,6 +1291,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() {
       SetShadowRegister(&regs.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2);
   dirty |=
       SetShadowRegister(&regs.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3);
+  dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
   XXH64_update(&hash_state_, &regs, sizeof(regs));
   if (!dirty) {
     return UpdateStatus::kCompatible;
@@ -1101,6 +1304,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() {
   state_info.logicOpEnable = VK_FALSE;
   state_info.logicOp = VK_LOGIC_OP_NO_OP;
 
+  auto enable_mode = static_cast<xenos::ModeControl>(regs.rb_modecontrol & 0x7);
+
   static const VkBlendFactor kBlendFactorMap[] = {
       /*  0 */ VK_BLEND_FACTOR_ZERO,
       /*  1 */ VK_BLEND_FACTOR_ONE,
@@ -1153,7 +1358,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() {
     // A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE
     // Lines up with VkColorComponentFlagBits, where R=bit 1, G=bit 2, etc..
     uint32_t write_mask = (regs.rb_color_mask >> (i * 4)) & 0xF;
-    attachment_state.colorWriteMask = write_mask;
+    attachment_state.colorWriteMask =
+        enable_mode == xenos::ModeControl::kColorDepth ? write_mask : 0;
   }
 
   state_info.attachmentCount = 4;
diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h
index 3e623f14e..49144f50f 100644
--- a/src/xenia/gpu/vulkan/pipeline_cache.h
+++ b/src/xenia/gpu/vulkan/pipeline_cache.h
@@ -32,6 +32,12 @@ namespace vulkan {
 // including shaders, various blend/etc options, and input configuration.
 class PipelineCache {
  public:
+  enum class UpdateStatus {
+    kCompatible,
+    kMismatch,
+    kError,
+  };
+
   PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device,
                 VkDescriptorSetLayout uniform_descriptor_set_layout,
                 VkDescriptorSetLayout texture_descriptor_set_layout);
@@ -46,11 +52,17 @@ class PipelineCache {
   // otherwise a new one may be created. Any state that can be set dynamically
   // in the command buffer is issued at this time.
   // Returns whether the pipeline could be successfully created.
-  bool ConfigurePipeline(VkCommandBuffer command_buffer,
-                         const RenderState* render_state,
-                         VulkanShader* vertex_shader,
-                         VulkanShader* pixel_shader,
-                         PrimitiveType primitive_type);
+  UpdateStatus ConfigurePipeline(VkCommandBuffer command_buffer,
+                                 const RenderState* render_state,
+                                 VulkanShader* vertex_shader,
+                                 VulkanShader* pixel_shader,
+                                 PrimitiveType primitive_type,
+                                 VkPipeline* pipeline_out);
+
+  // Sets required dynamic state on the command buffer.
+  // Only state that has changed since the last call will be set unless
+  // full_update is true.
+  bool SetDynamicState(VkCommandBuffer command_buffer, bool full_update);
 
   // Pipeline layout shared by all pipelines.
   VkPipelineLayout pipeline_layout() const { return pipeline_layout_; }
@@ -63,16 +75,14 @@ class PipelineCache {
   // state.
   VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key);
 
+  bool TranslateShader(VulkanShader* shader, xenos::xe_gpu_program_cntl_t cntl);
+  void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info);
+
   // Gets a geometry shader used to emulate the given primitive type.
   // Returns nullptr if the primitive doesn't need to be emulated.
   VkShaderModule GetGeometryShader(PrimitiveType primitive_type,
                                    bool is_line_mode);
 
-  // Sets required dynamic state on the command buffer.
-  // Only state that has changed since the last call will be set unless
-  // full_update is true.
-  bool SetDynamicState(VkCommandBuffer command_buffer, bool full_update);
-
   RegisterFile* register_file_ = nullptr;
   VkDevice device_ = nullptr;
 
@@ -111,12 +121,6 @@ class PipelineCache {
   VkPipeline current_pipeline_ = nullptr;
 
  private:
-  enum class UpdateStatus {
-    kCompatible,
-    kMismatch,
-    kError,
-  };
-
   UpdateStatus UpdateState(VulkanShader* vertex_shader,
                            VulkanShader* pixel_shader,
                            PrimitiveType primitive_type);
@@ -154,6 +158,7 @@ class PipelineCache {
   struct UpdateShaderStagesRegisters {
     PrimitiveType primitive_type;
     uint32_t pa_su_sc_mode_cntl;
+    uint32_t sq_program_cntl;
     VulkanShader* vertex_shader;
     VulkanShader* pixel_shader;
 
@@ -205,11 +210,12 @@ class PipelineCache {
   VkPipelineViewportStateCreateInfo update_viewport_state_info_;
 
   struct UpdateRasterizationStateRegisters {
+    PrimitiveType primitive_type;
     uint32_t pa_su_sc_mode_cntl;
     uint32_t pa_sc_screen_scissor_tl;
     uint32_t pa_sc_screen_scissor_br;
+    uint32_t pa_sc_viz_query;
     uint32_t multi_prim_ib_reset_index;
-    PrimitiveType prim_type;
 
     UpdateRasterizationStateRegisters() { Reset(); }
     void Reset() { std::memset(this, 0, sizeof(*this)); }
@@ -217,6 +223,10 @@ class PipelineCache {
   VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_;
 
   struct UpdateMultisampleStateeRegisters {
+    uint32_t pa_sc_aa_config;
+    uint32_t pa_su_sc_mode_cntl;
+    uint32_t rb_surface_info;
+
     UpdateMultisampleStateeRegisters() { Reset(); }
     void Reset() { std::memset(this, 0, sizeof(*this)); }
   } update_multisample_state_regs_;
@@ -235,6 +245,7 @@ class PipelineCache {
     uint32_t rb_colorcontrol;
     uint32_t rb_color_mask;
     uint32_t rb_blendcontrol[4];
+    uint32_t rb_modecontrol;
 
     UpdateColorBlendStateRegisters() { Reset(); }
     void Reset() { std::memset(this, 0, sizeof(*this)); }
diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc
index 5637d44eb..f3d3288a7 100644
--- a/src/xenia/gpu/vulkan/render_cache.cc
+++ b/src/xenia/gpu/vulkan/render_cache.cc
@@ -39,7 +39,7 @@ VkFormat ColorRenderTargetFormatToVkFormat(ColorRenderTargetFormat format) {
     case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown:
       // WARNING: this is wrong, most likely - no float form in vulkan?
       XELOGW("Unsupported EDRAM format k_2_10_10_10_FLOAT used");
-      return VK_FORMAT_A2R10G10B10_SSCALED_PACK32;
+      return VK_FORMAT_A2R10G10B10_UNORM_PACK32;
     case ColorRenderTargetFormat::k_16_16:
       return VK_FORMAT_R16G16_UNORM;
     case ColorRenderTargetFormat::k_16_16_16_16:
@@ -71,34 +71,6 @@ VkFormat DepthRenderTargetFormatToVkFormat(DepthRenderTargetFormat format) {
   }
 }
 
-// Cached view into the EDRAM memory.
-// The image is aliased to a region of the edram_memory_ based on the tile
-// parameters.
-// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible
-//     formats?
-class CachedTileView {
- public:
-  // Key identifying the view in the cache.
-  TileViewKey key;
-  // Image mapped into EDRAM.
-  VkImage image = nullptr;
-  // Simple view on the image matching the format.
-  VkImageView image_view = nullptr;
-
-  CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
-                 TileViewKey view_key);
-  ~CachedTileView();
-
-  bool IsEqual(const TileViewKey& other_key) const {
-    auto a = reinterpret_cast<const uint64_t*>(&key);
-    auto b = reinterpret_cast<const uint64_t*>(&other_key);
-    return *a == *b;
-  }
-
- private:
-  VkDevice device_ = nullptr;
-};
-
 // Cached framebuffer referencing tile attachments.
 // Each framebuffer is specific to a render pass. Ugh.
 class CachedFramebuffer {
@@ -151,9 +123,11 @@ class CachedRenderPass {
   VkDevice device_ = nullptr;
 };
 
-CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
+CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device,
+                               VkCommandBuffer command_buffer,
+                               VkDeviceMemory edram_memory,
                                TileViewKey view_key)
-    : device_(device), key(std::move(view_key)) {
+    : device_(*device), key(std::move(view_key)) {
   // Map format to Vulkan.
   VkFormat vulkan_format = VK_FORMAT_UNDEFINED;
   uint32_t bpp = 4;
@@ -175,7 +149,8 @@ CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
     vulkan_format = DepthRenderTargetFormatToVkFormat(edram_format);
   }
   assert_true(vulkan_format != VK_FORMAT_UNDEFINED);
-  assert_true(bpp == 4);
+  // FIXME(DrChat): Was this check necessary?
+  // assert_true(bpp == 4);
 
   // Create the image with the desired properties.
   VkImageCreateInfo image_info;
@@ -191,8 +166,25 @@ CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
   image_info.extent.depth = 1;
   image_info.mipLevels = 1;
   image_info.arrayLayers = 1;
-  // TODO(benvanik): native MSAA support?
-  image_info.samples = VK_SAMPLE_COUNT_1_BIT;
+  if (FLAGS_vulkan_native_msaa) {
+    auto msaa_samples = static_cast<MsaaSamples>(key.msaa_samples);
+    switch (msaa_samples) {
+      case MsaaSamples::k1X:
+        image_info.samples = VK_SAMPLE_COUNT_1_BIT;
+        break;
+      case MsaaSamples::k2X:
+        image_info.samples = VK_SAMPLE_COUNT_2_BIT;
+        break;
+      case MsaaSamples::k4X:
+        image_info.samples = VK_SAMPLE_COUNT_4_BIT;
+        break;
+      default:
+        assert_unhandled_case(msaa_samples);
+    }
+  } else {
+    image_info.samples = VK_SAMPLE_COUNT_1_BIT;
+  }
+  sample_count = image_info.samples;
   image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
   image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
                      VK_IMAGE_USAGE_TRANSFER_DST_BIT |
@@ -203,19 +195,17 @@ CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
   image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
   image_info.queueFamilyIndexCount = 0;
   image_info.pQueueFamilyIndices = nullptr;
-  image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
+  image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
   auto err = vkCreateImage(device_, &image_info, nullptr, &image);
   CheckResult(err, "vkCreateImage");
 
-  // Verify our assumptions about memory layout are correct.
-  VkDeviceSize edram_offset = key.tile_offset * 5120;
   VkMemoryRequirements memory_requirements;
-  vkGetImageMemoryRequirements(device, image, &memory_requirements);
-  assert_true(edram_offset + memory_requirements.size <= kEdramBufferCapacity);
-  assert_true(edram_offset % memory_requirements.alignment == 0);
+  vkGetImageMemoryRequirements(*device, image, &memory_requirements);
 
-  // Bind to the region of EDRAM we occupy.
-  err = vkBindImageMemory(device_, image, edram_memory, edram_offset);
+  // Bind to a newly allocated chunk.
+  // TODO: Alias from a really big buffer?
+  memory = device->AllocateMemory(memory_requirements, 0);
+  err = vkBindImageMemory(device_, image, memory, 0);
   CheckResult(err, "vkBindImageMemory");
 
   // Create the image view we'll use to attach it to a framebuffer.
@@ -242,11 +232,37 @@ CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
   CheckResult(err, "vkCreateImageView");
 
   // TODO(benvanik): transition to general layout?
+  VkImageMemoryBarrier image_barrier;
+  image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+  image_barrier.pNext = nullptr;
+  image_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+  image_barrier.dstAccessMask =
+      key.color_or_depth ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
+                         : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+  image_barrier.dstAccessMask |=
+      VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+  image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+  image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
+  image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  image_barrier.image = image;
+  image_barrier.subresourceRange.aspectMask =
+      key.color_or_depth
+          ? VK_IMAGE_ASPECT_COLOR_BIT
+          : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+  image_barrier.subresourceRange.baseMipLevel = 0;
+  image_barrier.subresourceRange.levelCount = 1;
+  image_barrier.subresourceRange.baseArrayLayer = 0;
+  image_barrier.subresourceRange.layerCount = 1;
+  vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &image_barrier);
 }
 
 CachedTileView::~CachedTileView() {
   vkDestroyImageView(device_, image_view, nullptr);
   vkDestroyImage(device_, image, nullptr);
+  vkFreeMemory(device_, memory, nullptr);
 }
 
 CachedFramebuffer::CachedFramebuffer(
@@ -293,8 +309,15 @@ bool CachedFramebuffer::IsCompatible(
     const RenderConfiguration& desired_config) const {
   // We already know all render pass things line up, so let's verify dimensions,
   // edram offsets, etc. We need an exact match.
-  if (desired_config.surface_pitch_px != width ||
-      desired_config.surface_height_px != height) {
+  uint32_t surface_pitch_px = desired_config.surface_msaa != MsaaSamples::k4X
+                                  ? desired_config.surface_pitch_px
+                                  : desired_config.surface_pitch_px * 2;
+  uint32_t surface_height_px = desired_config.surface_msaa == MsaaSamples::k1X
+                                   ? desired_config.surface_height_px
+                                   : desired_config.surface_height_px * 2;
+  surface_pitch_px = std::min(surface_pitch_px, 2560u);
+  surface_height_px = std::min(surface_height_px, 2560u);
+  if (surface_pitch_px != width || surface_height_px != height) {
     return false;
   }
   // TODO(benvanik): separate image views from images in tiles and store in fb?
@@ -327,13 +350,33 @@ CachedRenderPass::CachedRenderPass(VkDevice device,
     : device_(device) {
   std::memcpy(&config, &desired_config, sizeof(config));
 
+  VkSampleCountFlagBits sample_count;
+  if (FLAGS_vulkan_native_msaa) {
+    switch (desired_config.surface_msaa) {
+      case MsaaSamples::k1X:
+        sample_count = VK_SAMPLE_COUNT_1_BIT;
+        break;
+      case MsaaSamples::k2X:
+        sample_count = VK_SAMPLE_COUNT_2_BIT;
+        break;
+      case MsaaSamples::k4X:
+        sample_count = VK_SAMPLE_COUNT_4_BIT;
+        break;
+      default:
+        assert_unhandled_case(desired_config.surface_msaa);
+        break;
+    }
+  } else {
+    sample_count = VK_SAMPLE_COUNT_1_BIT;
+  }
+
   // Initialize all attachments to default unused.
   // As we set layout(location=RT) in shaders we must always provide 4.
   VkAttachmentDescription attachments[5];
   for (int i = 0; i < 4; ++i) {
     attachments[i].flags = 0;
     attachments[i].format = VK_FORMAT_UNDEFINED;
-    attachments[i].samples = VK_SAMPLE_COUNT_1_BIT;
+    attachments[i].samples = sample_count;
     attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
     attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
     attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
@@ -344,7 +387,7 @@ CachedRenderPass::CachedRenderPass(VkDevice device,
   auto& depth_stencil_attachment = attachments[4];
   depth_stencil_attachment.flags = 0;
   depth_stencil_attachment.format = VK_FORMAT_UNDEFINED;
-  depth_stencil_attachment.samples = VK_SAMPLE_COUNT_1_BIT;
+  depth_stencil_attachment.samples = sample_count;
   depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
   depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
   depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
@@ -409,6 +452,11 @@ CachedRenderPass::~CachedRenderPass() {
 
 bool CachedRenderPass::IsCompatible(
     const RenderConfiguration& desired_config) const {
+  if (config.surface_msaa != desired_config.surface_msaa &&
+      FLAGS_vulkan_native_msaa) {
+    return false;
+  }
+
   for (int i = 0; i < 4; ++i) {
     // TODO(benvanik): allow compatible vulkan formats.
     if (config.color[i].format != desired_config.color[i].format) {
@@ -423,9 +471,10 @@ bool CachedRenderPass::IsCompatible(
 
 RenderCache::RenderCache(RegisterFile* register_file,
                          ui::vulkan::VulkanDevice* device)
-    : register_file_(register_file), device_(*device) {
+    : register_file_(register_file), device_(device) {
+  VkResult status = VK_SUCCESS;
+
   // Create the buffer we'll bind to our memory.
-  // We do this first so we can get the right memory type.
   VkBufferCreateInfo buffer_info;
   buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
   buffer_info.pNext = nullptr;
@@ -436,55 +485,39 @@ RenderCache::RenderCache(RegisterFile* register_file,
   buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
   buffer_info.queueFamilyIndexCount = 0;
   buffer_info.pQueueFamilyIndices = nullptr;
-  auto err = vkCreateBuffer(*device, &buffer_info, nullptr, &edram_buffer_);
-  CheckResult(err, "vkCreateBuffer");
+  status = vkCreateBuffer(*device, &buffer_info, nullptr, &edram_buffer_);
+  CheckResult(status, "vkCreateBuffer");
 
   // Query requirements for the buffer.
   // It should be 1:1.
   VkMemoryRequirements buffer_requirements;
-  vkGetBufferMemoryRequirements(device_, edram_buffer_, &buffer_requirements);
+  vkGetBufferMemoryRequirements(*device_, edram_buffer_, &buffer_requirements);
   assert_true(buffer_requirements.size == kEdramBufferCapacity);
 
-  // Create a dummy image so we can see what memory bits it requires.
-  // They should overlap with the buffer requirements but are likely more
-  // strict.
-  VkImageCreateInfo test_image_info;
-  test_image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
-  test_image_info.pNext = nullptr;
-  test_image_info.flags = 0;
-  test_image_info.imageType = VK_IMAGE_TYPE_2D;
-  test_image_info.format = VK_FORMAT_R8G8B8A8_UINT;
-  test_image_info.extent.width = 128;
-  test_image_info.extent.height = 128;
-  test_image_info.extent.depth = 1;
-  test_image_info.mipLevels = 1;
-  test_image_info.arrayLayers = 1;
-  test_image_info.samples = VK_SAMPLE_COUNT_1_BIT;
-  test_image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
-  test_image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
-  test_image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-  test_image_info.queueFamilyIndexCount = 0;
-  test_image_info.pQueueFamilyIndices = nullptr;
-  test_image_info.initialLayout = VK_IMAGE_LAYOUT_GENERAL;
-  VkImage test_image = nullptr;
-  err = vkCreateImage(device_, &test_image_info, nullptr, &test_image);
-  CheckResult(err, "vkCreateImage");
-  VkMemoryRequirements image_requirements;
-  vkGetImageMemoryRequirements(device_, test_image, &image_requirements);
-  vkDestroyImage(device_, test_image, nullptr);
-  assert_true((image_requirements.memoryTypeBits &
-               buffer_requirements.memoryTypeBits) != 0);
-
   // Allocate EDRAM memory.
-  VkMemoryRequirements memory_requirements;
-  memory_requirements.size = buffer_requirements.size;
-  memory_requirements.alignment = buffer_requirements.alignment;
-  memory_requirements.memoryTypeBits = image_requirements.memoryTypeBits;
   // TODO(benvanik): do we need it host visible?
-  edram_memory_ = device->AllocateMemory(memory_requirements, 0);
+  edram_memory_ = device->AllocateMemory(buffer_requirements);
+  assert_not_null(edram_memory_);
 
   // Bind buffer to map our entire memory.
-  vkBindBufferMemory(device_, edram_buffer_, edram_memory_, 0);
+  status = vkBindBufferMemory(*device_, edram_buffer_, edram_memory_, 0);
+  CheckResult(status, "vkBindBufferMemory");
+
+  if (status == VK_SUCCESS) {
+    // For debugging, upload a grid into the EDRAM buffer.
+    uint32_t* gpu_data = nullptr;
+    status = vkMapMemory(*device_, edram_memory_, 0, buffer_requirements.size,
+                         0, reinterpret_cast<void**>(&gpu_data));
+    CheckResult(status, "vkMapMemory");
+
+    if (status == VK_SUCCESS) {
+      for (int i = 0; i < kEdramBufferCapacity / 4; i++) {
+        gpu_data[i] = (i % 8) >= 4 ? 0xFF0000FF : 0xFFFFFFFF;
+      }
+
+      vkUnmapMemory(*device_, edram_memory_);
+    }
+  }
 }
 
 RenderCache::~RenderCache() {
@@ -503,13 +536,36 @@ RenderCache::~RenderCache() {
   cached_tile_views_.clear();
 
   // Release underlying EDRAM memory.
-  vkDestroyBuffer(device_, edram_buffer_, nullptr);
-  vkFreeMemory(device_, edram_memory_, nullptr);
+  vkDestroyBuffer(*device_, edram_buffer_, nullptr);
+  vkFreeMemory(*device_, edram_memory_, nullptr);
+}
+
+bool RenderCache::dirty() const {
+  auto& regs = *register_file_;
+  auto& cur_regs = shadow_registers_;
+
+  bool dirty = false;
+  dirty |= cur_regs.rb_modecontrol != regs[XE_GPU_REG_RB_MODECONTROL].u32;
+  dirty |= cur_regs.rb_surface_info != regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
+  dirty |= cur_regs.rb_color_info != regs[XE_GPU_REG_RB_COLOR_INFO].u32;
+  dirty |= cur_regs.rb_color1_info != regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
+  dirty |= cur_regs.rb_color2_info != regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
+  dirty |= cur_regs.rb_color3_info != regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
+  dirty |= cur_regs.rb_depth_info != regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
+  dirty |= cur_regs.pa_sc_window_scissor_tl !=
+           regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
+  dirty |= cur_regs.pa_sc_window_scissor_br !=
+           regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
+  return dirty;
 }
 
 const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
                                                 VulkanShader* vertex_shader,
                                                 VulkanShader* pixel_shader) {
+#if FINE_GRAINED_DRAW_SCOPES
+  SCOPE_profile_cpu_f("gpu");
+#endif  // FINE_GRAINED_DRAW_SCOPES
+
   assert_null(current_command_buffer_);
   current_command_buffer_ = command_buffer;
 
@@ -542,13 +598,34 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
     }
 
     // Lookup or generate a new render pass and framebuffer for the new state.
-    if (!ConfigureRenderPass(config, &render_pass, &framebuffer)) {
+    if (!ConfigureRenderPass(command_buffer, config, &render_pass,
+                             &framebuffer)) {
       return nullptr;
     }
+
     current_state_.render_pass = render_pass;
     current_state_.render_pass_handle = render_pass->handle;
     current_state_.framebuffer = framebuffer;
     current_state_.framebuffer_handle = framebuffer->handle;
+
+    // TODO(DrChat): Determine if we actually need an EDRAM buffer.
+    /*
+    // Depth
+    auto depth_target = current_state_.framebuffer->depth_stencil_attachment;
+    if (depth_target && current_state_.config.depth_stencil.used) {
+      UpdateTileView(command_buffer, depth_target, true);
+    }
+
+    // Color
+    for (int i = 0; i < 4; i++) {
+      auto target = current_state_.framebuffer->color_attachments[i];
+      if (!target || !current_state_.config.color[i].used) {
+        continue;
+      }
+
+      UpdateTileView(command_buffer, target, true);
+    }
+    */
   }
   if (!render_pass) {
     return nullptr;
@@ -571,6 +648,15 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
   render_pass_begin_info.renderArea.extent.width = config->surface_pitch_px;
   render_pass_begin_info.renderArea.extent.height = config->surface_height_px;
 
+  if (config->surface_msaa == MsaaSamples::k2X) {
+    render_pass_begin_info.renderArea.extent.height =
+        std::min(config->surface_height_px * 2, 2560u);
+  } else if (config->surface_msaa == MsaaSamples::k4X) {
+    render_pass_begin_info.renderArea.extent.width *= 2;
+    render_pass_begin_info.renderArea.extent.height =
+        std::min(config->surface_height_px * 2, 2560u);
+  }
+
   // Configure clear color, if clearing.
   // TODO(benvanik): enable clearing here during resolve?
   render_pass_begin_info.clearValueCount = 0;
@@ -601,9 +687,15 @@ bool RenderCache::ParseConfiguration(RenderConfiguration* config) {
   // Guess the height from the scissor height.
   // It's wildly inaccurate, but I've never seen it be bigger than the
   // EDRAM tiling.
+  /*
   uint32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF;
   uint32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y;
   config->surface_height_px = std::min(2560u, xe::round_up(ws_h, 16));
+  */
+
+  // TODO(DrChat): Find an accurate way to get the surface height. Until we do,
+  // we're going to hardcode it to 2560, as that's the absolute maximum.
+  config->surface_height_px = 2560;
 
   // Color attachment configuration.
   if (config->mode_control == ModeControl::kColorDepth) {
@@ -620,12 +712,23 @@ bool RenderCache::ParseConfiguration(RenderConfiguration* config) {
         case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
           config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8;
           break;
+        case ColorRenderTargetFormat::k_2_10_10_10_unknown:
+          config->color[i].format = ColorRenderTargetFormat::k_2_10_10_10;
+          break;
+        case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown:
+          config->color[i].format = ColorRenderTargetFormat::k_2_10_10_10_FLOAT;
+          break;
       }
+
+      // Make sure all unknown bits are unset.
+      // RDR sets bit 0x00400000
+      // assert_zero(color_info[i] & ~0x000F0FFF);
     }
   } else {
     for (int i = 0; i < 4; ++i) {
       config->color[i].edram_base = 0;
       config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8;
+      config->color[i].used = false;
     }
   }
 
@@ -635,15 +738,20 @@ bool RenderCache::ParseConfiguration(RenderConfiguration* config) {
     config->depth_stencil.edram_base = regs.rb_depth_info & 0xFFF;
     config->depth_stencil.format =
         static_cast<DepthRenderTargetFormat>((regs.rb_depth_info >> 16) & 0x1);
+
+    // Make sure all unknown bits are unset.
+    // assert_zero(regs.rb_depth_info & ~0x00010FFF);
   } else {
     config->depth_stencil.edram_base = 0;
     config->depth_stencil.format = DepthRenderTargetFormat::kD24S8;
+    config->depth_stencil.used = false;
   }
 
   return true;
 }
 
-bool RenderCache::ConfigureRenderPass(RenderConfiguration* config,
+bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer,
+                                      RenderConfiguration* config,
                                       CachedRenderPass** out_render_pass,
                                       CachedFramebuffer** out_framebuffer) {
   *out_render_pass = nullptr;
@@ -662,7 +770,7 @@ bool RenderCache::ConfigureRenderPass(RenderConfiguration* config,
 
   // If no render pass was found in the cache create a new one.
   if (!render_pass) {
-    render_pass = new CachedRenderPass(device_, *config);
+    render_pass = new CachedRenderPass(*device_, *config);
     cached_render_passes_.push_back(render_pass);
   }
 
@@ -679,16 +787,25 @@ bool RenderCache::ConfigureRenderPass(RenderConfiguration* config,
 
   // If no framebuffer was found in the cache create a new one.
   if (!framebuffer) {
+    uint32_t tile_width = config->surface_msaa == MsaaSamples::k4X ? 40 : 80;
+    uint32_t tile_height = config->surface_msaa != MsaaSamples::k1X ? 8 : 16;
+
     CachedTileView* target_color_attachments[4] = {nullptr, nullptr, nullptr,
                                                    nullptr};
     for (int i = 0; i < 4; ++i) {
       TileViewKey color_key;
       color_key.tile_offset = config->color[i].edram_base;
-      color_key.tile_width = config->surface_pitch_px / 80;
-      color_key.tile_height = config->surface_height_px / 16;
+      color_key.tile_width =
+          xe::round_up(config->surface_pitch_px, tile_width) / tile_width;
+      // color_key.tile_height =
+      //     xe::round_up(config->surface_height_px, tile_height) / tile_height;
+      color_key.tile_height = 160;
       color_key.color_or_depth = 1;
+      color_key.msaa_samples =
+          0;  // static_cast<uint16_t>(config->surface_msaa);
       color_key.edram_format = static_cast<uint16_t>(config->color[i].format);
-      target_color_attachments[i] = GetTileView(color_key);
+      target_color_attachments[i] =
+          FindOrCreateTileView(command_buffer, color_key);
       if (!target_color_attachments) {
         XELOGE("Failed to get tile view for color attachment");
         return false;
@@ -697,21 +814,34 @@ bool RenderCache::ConfigureRenderPass(RenderConfiguration* config,
 
     TileViewKey depth_stencil_key;
     depth_stencil_key.tile_offset = config->depth_stencil.edram_base;
-    depth_stencil_key.tile_width = config->surface_pitch_px / 80;
-    depth_stencil_key.tile_height = config->surface_height_px / 16;
+    depth_stencil_key.tile_width =
+        xe::round_up(config->surface_pitch_px, tile_width) / tile_width;
+    // depth_stencil_key.tile_height =
+    //     xe::round_up(config->surface_height_px, tile_height) / tile_height;
+    depth_stencil_key.tile_height = 160;
     depth_stencil_key.color_or_depth = 0;
+    depth_stencil_key.msaa_samples =
+        0;  // static_cast<uint16_t>(config->surface_msaa);
     depth_stencil_key.edram_format =
         static_cast<uint16_t>(config->depth_stencil.format);
-    auto target_depth_stencil_attachment = GetTileView(depth_stencil_key);
+    auto target_depth_stencil_attachment =
+        FindOrCreateTileView(command_buffer, depth_stencil_key);
     if (!target_depth_stencil_attachment) {
       XELOGE("Failed to get tile view for depth/stencil attachment");
       return false;
     }
 
+    uint32_t surface_pitch_px = config->surface_msaa != MsaaSamples::k4X
+                                    ? config->surface_pitch_px
+                                    : config->surface_pitch_px * 2;
+    uint32_t surface_height_px = config->surface_msaa == MsaaSamples::k1X
+                                     ? config->surface_height_px
+                                     : config->surface_height_px * 2;
+    surface_pitch_px = std::min(surface_pitch_px, 2560u);
+    surface_height_px = std::min(surface_height_px, 2560u);
     framebuffer = new CachedFramebuffer(
-        device_, render_pass->handle, config->surface_pitch_px,
-        config->surface_height_px, target_color_attachments,
-        target_depth_stencil_attachment);
+        *device_, render_pass->handle, surface_pitch_px, surface_height_px,
+        target_color_attachments, target_depth_stencil_attachment);
     render_pass->cached_framebuffers.push_back(framebuffer);
   }
 
@@ -720,7 +850,75 @@ bool RenderCache::ConfigureRenderPass(RenderConfiguration* config,
   return true;
 }
 
-CachedTileView* RenderCache::GetTileView(const TileViewKey& view_key) {
+CachedTileView* RenderCache::FindOrCreateTileView(
+    VkCommandBuffer command_buffer, const TileViewKey& view_key) {
+  auto tile_view = FindTileView(view_key);
+  if (tile_view) {
+    return tile_view;
+  }
+
+  // Create a new tile and add to the cache.
+  tile_view =
+      new CachedTileView(device_, command_buffer, edram_memory_, view_key);
+  cached_tile_views_.push_back(tile_view);
+
+  return tile_view;
+}
+
+void RenderCache::UpdateTileView(VkCommandBuffer command_buffer,
+                                 CachedTileView* view, bool load,
+                                 bool insert_barrier) {
+  uint32_t tile_width =
+      view->key.msaa_samples == uint16_t(MsaaSamples::k4X) ? 40 : 80;
+  uint32_t tile_height =
+      view->key.msaa_samples != uint16_t(MsaaSamples::k1X) ? 8 : 16;
+
+  if (insert_barrier) {
+    VkBufferMemoryBarrier barrier;
+    barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+    barrier.pNext = nullptr;
+    if (load) {
+      barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+      barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+    } else {
+      barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+      barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    }
+    barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    barrier.buffer = edram_buffer_;
+    barrier.offset = view->key.tile_offset * 5120;
+    barrier.size = view->key.tile_width * tile_width * view->key.tile_height *
+                           tile_height * view->key.color_or_depth
+                       ? 4
+                       : 1;
+    vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                         VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
+                         &barrier, 0, nullptr);
+  }
+
+  // TODO(DrChat): Stencil copies.
+  VkBufferImageCopy region;
+  region.bufferOffset = view->key.tile_offset * 5120;
+  region.bufferRowLength = 0;
+  region.bufferImageHeight = 0;
+  region.imageSubresource = {0, 0, 0, 1};
+  region.imageSubresource.aspectMask = view->key.color_or_depth
+                                           ? VK_IMAGE_ASPECT_COLOR_BIT
+                                           : VK_IMAGE_ASPECT_DEPTH_BIT;
+  region.imageOffset = {0, 0, 0};
+  region.imageExtent = {view->key.tile_width * tile_width,
+                        view->key.tile_height * tile_height, 1};
+  if (load) {
+    vkCmdCopyBufferToImage(command_buffer, edram_buffer_, view->image,
+                           VK_IMAGE_LAYOUT_GENERAL, 1, &region);
+  } else {
+    vkCmdCopyImageToBuffer(command_buffer, view->image, VK_IMAGE_LAYOUT_GENERAL,
+                           edram_buffer_, 1, &region);
+  }
+}
+
+CachedTileView* RenderCache::FindTileView(const TileViewKey& view_key) const {
   // Check the cache.
   // TODO(benvanik): better lookup.
   for (auto tile_view : cached_tile_views_) {
@@ -729,25 +927,341 @@ CachedTileView* RenderCache::GetTileView(const TileViewKey& view_key) {
     }
   }
 
-  // Create a new tile and add to the cache.
-  auto tile_view = new CachedTileView(device_, edram_memory_, view_key);
-  cached_tile_views_.push_back(tile_view);
-  return tile_view;
+  return nullptr;
 }
 
 void RenderCache::EndRenderPass() {
   assert_not_null(current_command_buffer_);
-  auto command_buffer = current_command_buffer_;
-  current_command_buffer_ = nullptr;
 
   // End the render pass.
-  vkCmdEndRenderPass(command_buffer);
+  vkCmdEndRenderPass(current_command_buffer_);
+
+  // Copy all render targets back into our EDRAM buffer.
+  // Don't bother waiting on this command to complete, as next render pass may
+  // reuse previous framebuffer attachments. If they need this, they will wait.
+  // TODO: Should we bother re-tiling the images on copy back?
+  //
+  // FIXME: There's a case where we may have a really big render target (as we
+  // can't get the correct height atm) and we may end up overwriting the valid
+  // contents of another render target by mistake! Need to reorder copy commands
+  // to avoid this.
+
+  // TODO(DrChat): Determine if we actually need an EDRAM buffer.
+  /*
+  std::vector<CachedTileView*> cached_views;
+
+  // Depth
+  auto depth_target = current_state_.framebuffer->depth_stencil_attachment;
+  if (depth_target && current_state_.config.depth_stencil.used) {
+    cached_views.push_back(depth_target);
+  }
+
+  // Color
+  for (int i = 0; i < 4; i++) {
+    auto target = current_state_.framebuffer->color_attachments[i];
+    if (!target || !current_state_.config.color[i].used) {
+      continue;
+    }
+
+    cached_views.push_back(target);
+  }
+
+  std::sort(
+      cached_views.begin(), cached_views.end(),
+      [](CachedTileView const* a, CachedTileView const* b) { return *a < *b; });
+
+  for (auto view : cached_views) {
+    UpdateTileView(current_command_buffer_, view, false, false);
+  }
+  */
+
+  current_command_buffer_ = nullptr;
 }
 
 void RenderCache::ClearCache() {
   // TODO(benvanik): caching.
 }
 
+void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer,
+                                 uint32_t edram_base, VkImage image,
+                                 VkImageLayout image_layout,
+                                 bool color_or_depth, VkOffset3D offset,
+                                 VkExtent3D extents) {
+  // Transition the texture into a transfer destination layout.
+  VkImageMemoryBarrier image_barrier;
+  image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+  image_barrier.pNext = nullptr;
+  image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  if (image_layout != VK_IMAGE_LAYOUT_GENERAL &&
+      image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+    image_barrier.srcAccessMask = 0;
+    image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    image_barrier.oldLayout = image_layout;
+    image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+    image_barrier.image = image;
+    image_barrier.subresourceRange = {0, 0, 1, 0, 1};
+    image_barrier.subresourceRange.aspectMask =
+        color_or_depth
+            ? VK_IMAGE_ASPECT_COLOR_BIT
+            : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+
+    vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                         VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                         nullptr, 1, &image_barrier);
+  }
+
+  VkBufferMemoryBarrier buffer_barrier;
+  buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+  buffer_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+  buffer_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+  buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  buffer_barrier.buffer = edram_buffer_;
+  buffer_barrier.offset = edram_base * 5120;
+  // TODO: Calculate this accurately (need texel size)
+  buffer_barrier.size = extents.width * extents.height * 4;
+
+  vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
+                       &buffer_barrier, 0, nullptr);
+
+  // Issue the copy command.
+  // TODO(DrChat): Stencil copies.
+  VkBufferImageCopy region;
+  region.bufferOffset = edram_base * 5120;
+  region.bufferImageHeight = 0;
+  region.bufferRowLength = 0;
+  region.imageOffset = offset;
+  region.imageExtent = extents;
+  region.imageSubresource = {0, 0, 0, 1};
+  region.imageSubresource.aspectMask =
+      color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
+  vkCmdCopyBufferToImage(command_buffer, edram_buffer_, image, image_layout, 1,
+                         &region);
+
+  // Transition the image back into its previous layout.
+  if (image_layout != VK_IMAGE_LAYOUT_GENERAL &&
+      image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+    image_barrier.srcAccessMask = image_barrier.dstAccessMask;
+    image_barrier.dstAccessMask = 0;
+    std::swap(image_barrier.oldLayout, image_barrier.newLayout);
+    vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                         VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                         nullptr, 1, &image_barrier);
+  }
+}
+
+void RenderCache::BlitToImage(VkCommandBuffer command_buffer,
+                              uint32_t edram_base, uint32_t pitch,
+                              uint32_t height, MsaaSamples num_samples,
+                              VkImage image, VkImageLayout image_layout,
+                              bool color_or_depth, uint32_t format,
+                              VkFilter filter, VkOffset3D offset,
+                              VkExtent3D extents) {
+  if (color_or_depth) {
+    // Adjust similar formats for easier matching.
+    switch (static_cast<ColorRenderTargetFormat>(format)) {
+      case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
+        format = uint32_t(ColorRenderTargetFormat::k_8_8_8_8);
+        break;
+      case ColorRenderTargetFormat::k_2_10_10_10_unknown:
+        format = uint32_t(ColorRenderTargetFormat::k_2_10_10_10);
+        break;
+      case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown:
+        format = uint32_t(ColorRenderTargetFormat::k_2_10_10_10_FLOAT);
+        break;
+    }
+  }
+
+  uint32_t tile_width = num_samples == MsaaSamples::k4X ? 40 : 80;
+  uint32_t tile_height = num_samples != MsaaSamples::k1X ? 8 : 16;
+
+  // Grab a tile view that represents the source image.
+  TileViewKey key;
+  key.color_or_depth = color_or_depth ? 1 : 0;
+  key.msaa_samples = 0;  // static_cast<uint16_t>(num_samples);
+  key.edram_format = format;
+  key.tile_offset = edram_base;
+  key.tile_width = xe::round_up(pitch, tile_width) / tile_width;
+  // key.tile_height = xe::round_up(height, tile_height) / tile_height;
+  key.tile_height = 160;
+  auto tile_view = FindOrCreateTileView(command_buffer, key);
+  assert_not_null(tile_view);
+
+  // Update the view with the latest contents.
+  // UpdateTileView(command_buffer, tile_view, true, true);
+
+  // Transition the image into a transfer destination layout, if needed.
+  // TODO: Util function for this
+  VkImageMemoryBarrier image_barrier;
+  image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+  image_barrier.pNext = nullptr;
+  image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  image_barrier.srcAccessMask = 0;
+  image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+  image_barrier.oldLayout = image_layout;
+  image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+  image_barrier.image = image;
+  image_barrier.subresourceRange = {0, 0, 1, 0, 1};
+  image_barrier.subresourceRange.aspectMask =
+      color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT
+                     : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+
+  vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &image_barrier);
+
+  // If we overflow we'll lose the device here.
+  assert_true(extents.width <= key.tile_width * tile_width);
+  assert_true(extents.height <= key.tile_height * tile_height);
+
+  // Now issue the blit to the destination.
+  if (tile_view->sample_count == VK_SAMPLE_COUNT_1_BIT) {
+    VkImageBlit image_blit;
+    image_blit.srcSubresource = {0, 0, 0, 1};
+    image_blit.srcSubresource.aspectMask =
+        color_or_depth
+            ? VK_IMAGE_ASPECT_COLOR_BIT
+            : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+    image_blit.srcOffsets[0] = {0, 0, offset.z};
+    image_blit.srcOffsets[1] = {int32_t(extents.width), int32_t(extents.height),
+                                int32_t(extents.depth)};
+
+    image_blit.dstSubresource = {0, 0, 0, 1};
+    image_blit.dstSubresource.aspectMask =
+        color_or_depth
+            ? VK_IMAGE_ASPECT_COLOR_BIT
+            : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+    image_blit.dstOffsets[0] = offset;
+    image_blit.dstOffsets[1] = {offset.x + int32_t(extents.width),
+                                offset.y + int32_t(extents.height),
+                                offset.z + int32_t(extents.depth)};
+    vkCmdBlitImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL,
+                   image, image_layout, 1, &image_blit, filter);
+  } else {
+    VkImageResolve image_resolve;
+    image_resolve.srcSubresource = {0, 0, 0, 1};
+    image_resolve.srcSubresource.aspectMask =
+        color_or_depth
+            ? VK_IMAGE_ASPECT_COLOR_BIT
+            : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+    image_resolve.srcOffset = {0, 0, 0};
+
+    image_resolve.dstSubresource = {0, 0, 0, 1};
+    image_resolve.dstSubresource.aspectMask =
+        color_or_depth
+            ? VK_IMAGE_ASPECT_COLOR_BIT
+            : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+    image_resolve.dstOffset = offset;
+
+    image_resolve.extent = extents;
+    vkCmdResolveImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL,
+                      image, image_layout, 1, &image_resolve);
+  }
+
+  // Transition the image back into its previous layout.
+  image_barrier.srcAccessMask = image_barrier.dstAccessMask;
+  image_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+  std::swap(image_barrier.oldLayout, image_barrier.newLayout);
+  vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &image_barrier);
+}
+
+void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer,
+                                  uint32_t edram_base,
+                                  ColorRenderTargetFormat format,
+                                  uint32_t pitch, uint32_t height,
+                                  MsaaSamples num_samples, float* color) {
+  // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just
+  // need to detect this and calculate a value.
+
+  // Adjust similar formats for easier matching.
+  switch (format) {
+    case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
+      format = ColorRenderTargetFormat::k_8_8_8_8;
+      break;
+    case ColorRenderTargetFormat::k_2_10_10_10_unknown:
+      format = ColorRenderTargetFormat::k_2_10_10_10;
+      break;
+    case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown:
+      format = ColorRenderTargetFormat::k_2_10_10_10_FLOAT;
+      break;
+  }
+
+  uint32_t tile_width = num_samples == MsaaSamples::k4X ? 40 : 80;
+  uint32_t tile_height = num_samples != MsaaSamples::k1X ? 8 : 16;
+
+  // Grab a tile view (as we need to clear an image first)
+  TileViewKey key;
+  key.color_or_depth = 1;
+  key.msaa_samples = 0;  // static_cast<uint16_t>(num_samples);
+  key.edram_format = static_cast<uint16_t>(format);
+  key.tile_offset = edram_base;
+  key.tile_width = xe::round_up(pitch, tile_width) / tile_width;
+  // key.tile_height = xe::round_up(height, tile_height) / tile_height;
+  key.tile_height = 160;
+  auto tile_view = FindOrCreateTileView(command_buffer, key);
+  assert_not_null(tile_view);
+
+  VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
+  VkClearColorValue clear_value;
+  std::memcpy(clear_value.float32, color, sizeof(float) * 4);
+
+  // Issue a clear command
+  vkCmdClearColorImage(command_buffer, tile_view->image,
+                       VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range);
+
+  // Copy image back into EDRAM buffer
+  // UpdateTileView(command_buffer, tile_view, false, false);
+}
+
+void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
+                                         uint32_t edram_base,
+                                         DepthRenderTargetFormat format,
+                                         uint32_t pitch, uint32_t height,
+                                         MsaaSamples num_samples, float depth,
+                                         uint32_t stencil) {
+  // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just
+  // need to detect this and calculate a value.
+
+  uint32_t tile_width = num_samples == MsaaSamples::k4X ? 40 : 80;
+  uint32_t tile_height = num_samples != MsaaSamples::k1X ? 8 : 16;
+
+  // Grab a tile view (as we need to clear an image first)
+  TileViewKey key;
+  key.color_or_depth = 0;
+  key.msaa_samples = 0;  // static_cast<uint16_t>(num_samples);
+  key.edram_format = static_cast<uint16_t>(format);
+  key.tile_offset = edram_base;
+  key.tile_width = xe::round_up(pitch, tile_width) / tile_width;
+  // key.tile_height = xe::round_up(height, tile_height) / tile_height;
+  key.tile_height = 160;
+  auto tile_view = FindOrCreateTileView(command_buffer, key);
+  assert_not_null(tile_view);
+
+  VkImageSubresourceRange range = {
+      VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1,
+  };
+  VkClearDepthStencilValue clear_value;
+  clear_value.depth = depth;
+  clear_value.stencil = stencil;
+
+  // Issue a clear command
+  vkCmdClearDepthStencilImage(command_buffer, tile_view->image,
+                              VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range);
+
+  // Copy image back into EDRAM buffer
+  // UpdateTileView(command_buffer, tile_view, false, false);
+}
+
+void RenderCache::FillEDRAM(VkCommandBuffer command_buffer, uint32_t value) {
+  vkCmdFillBuffer(command_buffer, edram_buffer_, 0, kEdramBufferCapacity,
+                  value);
+}
+
 bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) {
   uint32_t value = register_file_->values[register_name].u32;
   if (*dest == value) {
diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h
index 4a1574e9b..c9f0adf98 100644
--- a/src/xenia/gpu/vulkan/render_cache.h
+++ b/src/xenia/gpu/vulkan/render_cache.h
@@ -12,6 +12,7 @@
 
 #include "xenia/gpu/register_file.h"
 #include "xenia/gpu/shader.h"
+#include "xenia/gpu/texture_info.h"
 #include "xenia/gpu/vulkan/vulkan_shader.h"
 #include "xenia/gpu/xenos.h"
 #include "xenia/ui/vulkan/vulkan.h"
@@ -36,28 +37,67 @@ struct TileViewKey {
   uint16_t tile_height;
   // 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat.
   uint16_t color_or_depth : 1;
+  // Surface MSAA samples
+  uint16_t msaa_samples : 2;
   // Either ColorRenderTargetFormat or DepthRenderTargetFormat.
-  uint16_t edram_format : 15;
+  uint16_t edram_format : 13;
 };
 static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed");
 
+// Cached view representing EDRAM memory.
+// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible
+//     formats?
+class CachedTileView {
+ public:
+  // Key identifying the view in the cache.
+  TileViewKey key;
+  // Image
+  VkImage image = nullptr;
+  // Simple view on the image matching the format.
+  VkImageView image_view = nullptr;
+  // Memory buffer
+  VkDeviceMemory memory = nullptr;
+  // Image sample count
+  VkSampleCountFlagBits sample_count = VK_SAMPLE_COUNT_1_BIT;
+
+  CachedTileView(ui::vulkan::VulkanDevice* device,
+                 VkCommandBuffer command_buffer, VkDeviceMemory edram_memory,
+                 TileViewKey view_key);
+  ~CachedTileView();
+
+  bool IsEqual(const TileViewKey& other_key) const {
+    auto a = reinterpret_cast<const uint64_t*>(&key);
+    auto b = reinterpret_cast<const uint64_t*>(&other_key);
+    return *a == *b;
+  }
+
+  bool operator<(const CachedTileView& other) const {
+    return key.tile_offset < other.key.tile_offset;
+  }
+
+ private:
+  VkDevice device_ = nullptr;
+};
+
 // Parsed render configuration from the current render state.
 struct RenderConfiguration {
   // Render mode (color+depth, depth-only, etc).
   xenos::ModeControl mode_control;
-  // Target surface pitch, in pixels.
+  // Target surface pitch multiplied by MSAA, in pixels.
   uint32_t surface_pitch_px;
-  // ESTIMATED target surface height, in pixels.
+  // ESTIMATED target surface height multiplied by MSAA, in pixels.
   uint32_t surface_height_px;
   // Surface MSAA setting.
   MsaaSamples surface_msaa;
   // Color attachments for the 4 render targets.
   struct {
+    bool used;
     uint32_t edram_base;
     ColorRenderTargetFormat format;
   } color[4];
   // Depth/stencil attachment.
   struct {
+    bool used;
     uint32_t edram_base;
     DepthRenderTargetFormat format;
   } depth_stencil;
@@ -73,6 +113,9 @@ struct RenderState {
   // Target framebuffer bound to the render pass.
   CachedFramebuffer* framebuffer = nullptr;
   VkFramebuffer framebuffer_handle = nullptr;
+
+  bool color_attachment_written[4] = {false};
+  bool depth_attachment_written = false;
 };
 
 // Manages the virtualized EDRAM and the render target cache.
@@ -97,9 +140,13 @@ struct RenderState {
 // 320px by rounding up to the next tile.
 //
 // MSAA and other settings will modify the exact pixel sizes, like 4X makes
-// each tile effectively 40x8px, but they are still all 5120b. As we try to
-// emulate this we adjust our viewport when rendering to stretch pixels as
-// needed.
+// each tile effectively 40x8px / 2X makes each tile 80x8px, but they are still
+// all 5120b. As we try to emulate this we adjust our viewport when rendering to
+// stretch pixels as needed.
+//
+// It appears that games also take advantage of MSAA stretching tiles when doing
+// clears. Games will clear a view with 1/2X pitch/height and 4X MSAA and then
+// later draw to that view with 1X pitch/height and 1X MSAA.
 //
 // The good news is that games cannot read EDRAM directly but must use a copy
 // operation to get the data out. That gives us a chance to do whatever we
@@ -217,6 +264,10 @@ class RenderCache {
   RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
   ~RenderCache();
 
+  // Call this to determine if you should start a new render pass or continue
+  // with an already open pass.
+  bool dirty() const;
+
   // Begins a render pass targeting the state-specified framebuffer formats.
   // The command buffer will be transitioned into the render pass phase.
   const RenderState* BeginRenderPass(VkCommandBuffer command_buffer,
@@ -230,24 +281,63 @@ class RenderCache {
   // Clears all cached content.
   void ClearCache();
 
+  // Queues commands to copy EDRAM contents into an image.
+  // The command buffer must not be inside of a render pass when calling this.
+  void RawCopyToImage(VkCommandBuffer command_buffer, uint32_t edram_base,
+                      VkImage image, VkImageLayout image_layout,
+                      bool color_or_depth, VkOffset3D offset,
+                      VkExtent3D extents);
+
+  // Queues commands to blit EDRAM contents into an image.
+  // The command buffer must not be inside of a render pass when calling this.
+  void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base,
+                   uint32_t pitch, uint32_t height, MsaaSamples num_samples,
+                   VkImage image, VkImageLayout image_layout,
+                   bool color_or_depth, uint32_t format, VkFilter filter,
+                   VkOffset3D offset, VkExtent3D extents);
+
+  // Queues commands to clear EDRAM contents with a solid color.
+  // The command buffer must not be inside of a render pass when calling this.
+  void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base,
+                       ColorRenderTargetFormat format, uint32_t pitch,
+                       uint32_t height, MsaaSamples num_samples, float* color);
+  // Queues commands to clear EDRAM contents with depth/stencil values.
+  // The command buffer must not be inside of a render pass when calling this.
+  void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
+                              uint32_t edram_base,
+                              DepthRenderTargetFormat format, uint32_t pitch,
+                              uint32_t height, MsaaSamples num_samples,
+                              float depth, uint32_t stencil);
+  // Queues commands to fill EDRAM contents with a constant value.
+  // The command buffer must not be inside of a render pass when calling this.
+  void FillEDRAM(VkCommandBuffer command_buffer, uint32_t value);
+
  private:
   // Parses the current state into a configuration object.
   bool ParseConfiguration(RenderConfiguration* config);
 
+  // Finds a tile view. Returns nullptr if none found matching the key.
+  CachedTileView* FindTileView(const TileViewKey& view_key) const;
+
+  // Gets or creates a tile view with the given parameters.
+  CachedTileView* FindOrCreateTileView(VkCommandBuffer command_buffer,
+                                       const TileViewKey& view_key);
+
+  void UpdateTileView(VkCommandBuffer command_buffer, CachedTileView* view,
+                      bool load, bool insert_barrier = true);
+
   // Gets or creates a render pass and frame buffer for the given configuration.
   // This attempts to reuse as much as possible across render passes and
   // framebuffers.
-  bool ConfigureRenderPass(RenderConfiguration* config,
+  bool ConfigureRenderPass(VkCommandBuffer command_buffer,
+                           RenderConfiguration* config,
                            CachedRenderPass** out_render_pass,
                            CachedFramebuffer** out_framebuffer);
 
-  // Gets or creates a tile view with the given parameters.
-  CachedTileView* GetTileView(const TileViewKey& view_key);
-
   RegisterFile* register_file_ = nullptr;
-  VkDevice device_ = nullptr;
+  ui::vulkan::VulkanDevice* device_ = nullptr;
 
-  // Entire 10MiB of EDRAM, aliased to hell by various VkImages.
+  // Entire 10MiB of EDRAM.
   VkDeviceMemory edram_memory_ = nullptr;
   // Buffer overlayed 1:1 with edram_memory_ to allow raw access.
   VkBuffer edram_buffer_ = nullptr;
diff --git a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h
index b9598cfa9..730f9f12e 100644
--- a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h
+++ b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h
@@ -2,7 +2,7 @@
 // source: rect_list.geom
 const uint8_t rect_list_geom[] = {
     0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00,
-    0xCC, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
+    0xCA, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
     0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00,
     0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
     0x36, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00,
@@ -10,8 +10,8 @@ const uint8_t rect_list_geom[] = {
     0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
     0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, 0x03, 0x00, 0x00, 0x00,
     0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00,
-    0x12, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,
-    0x35, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x33, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00,
     0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00,
     0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00,
@@ -40,17 +40,13 @@ const uint8_t rect_list_geom[] = {
     0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43,
     0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00,
     0x05, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x05, 0x00, 0x05, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x56, 0x65, 0x72, 0x74,
-    0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, 0x06, 0x00, 0x04, 0x00,
-    0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00,
-    0x05, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F,
-    0x76, 0x74, 0x78, 0x00, 0x05, 0x00, 0x05, 0x00, 0x32, 0x00, 0x00, 0x00,
-    0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00,
-    0x06, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x35, 0x00, 0x00, 0x00,
-    0x69, 0x6E, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00,
-    0x66, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00,
-    0xB4, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
+    0x05, 0x00, 0x07, 0x00, 0x30, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F,
+    0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72,
+    0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, 0x33, 0x00, 0x00, 0x00,
+    0x69, 0x6E, 0x5F, 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61,
+    0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00,
+    0x64, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00,
+    0xB2, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
     0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00,
     0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
@@ -65,12 +61,10 @@ const uint8_t rect_list_geom[] = {
     0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
     0x20, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x2F, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x47, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00,
-    0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
-    0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
+    0x30, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x33, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
     0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
     0x14, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
@@ -107,25 +101,23 @@ const uint8_t rect_list_geom[] = {
     0x03, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00,
     0x0B, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
     0x1C, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00,
-    0x2D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, 0x2F, 0x00, 0x00, 0x00,
-    0x2E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x30, 0x00, 0x00, 0x00,
-    0x03, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00,
-    0x30, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
-    0x1E, 0x00, 0x03, 0x00, 0x32, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00,
-    0x1C, 0x00, 0x04, 0x00, 0x33, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
-    0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00,
-    0x34, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x20, 0x00, 0x04, 0x00, 0x36, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x32, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x65, 0x00, 0x00, 0x00,
+    0x2D, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00,
+    0x2F, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x1C, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00,
+    0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00,
+    0x32, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x2E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x63, 0x00, 0x00, 0x00,
     0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00,
-    0x13, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
     0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
     0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
     0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00,
-    0x65, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
-    0x3B, 0x00, 0x04, 0x00, 0x65, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00,
+    0x63, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x3B, 0x00, 0x04, 0x00, 0x63, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00,
     0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x16, 0x00, 0x00, 0x00,
     0x17, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
     0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
@@ -139,7 +131,7 @@ const uint8_t rect_list_geom[] = {
     0x1C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
     0x1D, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00,
     0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00,
-    0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00,
+    0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00,
     0xF8, 0x00, 0x02, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
     0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
     0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
@@ -153,286 +145,283 @@ const uint8_t rect_list_geom[] = {
     0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00,
     0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
     0x2C, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
-    0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00,
-    0x38, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
-    0x31, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00,
-    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00,
+    0x34, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00,
+    0x36, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
+    0x30, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00,
+    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
     0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00,
-    0x39, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
-    0x3B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+    0x37, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
+    0x39, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0x39, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00,
     0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00,
-    0x3C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00,
-    0x35, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x32, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00,
+    0x3A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
+    0x3C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00,
+    0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
+    0x2E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00,
     0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0x3F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
     0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
-    0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
-    0x26, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x43, 0x00, 0x00, 0x00,
-    0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
-    0x44, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0x40, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x26, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x41, 0x00, 0x00, 0x00,
+    0x40, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
+    0x42, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
     0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
-    0x45, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
-    0x2B, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x46, 0x00, 0x00, 0x00,
-    0x45, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00,
-    0x47, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,
-    0x47, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00,
-    0x48, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00,
-    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
+    0x43, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x2B, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x44, 0x00, 0x00, 0x00,
+    0x43, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00,
+    0x45, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00,
+    0x45, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x46, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00,
+    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
     0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00,
-    0x49, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
-    0x4B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
+    0x49, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00,
     0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00,
-    0x4C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
-    0x4E, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0x4E, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00,
-    0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x32, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00,
+    0x4A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
+    0x4C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0x4C, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00,
+    0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
+    0x2E, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00,
     0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
-    0x51, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x4F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
     0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
-    0x52, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
-    0x26, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x53, 0x00, 0x00, 0x00,
-    0x52, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
-    0x54, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x50, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x26, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x51, 0x00, 0x00, 0x00,
+    0x50, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
+    0x52, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
     0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
-    0x55, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
-    0x2B, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x56, 0x00, 0x00, 0x00,
-    0x55, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00,
-    0x57, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
-    0x57, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00,
-    0x58, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00,
-    0x23, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x53, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x2B, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x54, 0x00, 0x00, 0x00,
+    0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00,
+    0x55, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00,
+    0x55, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x56, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00,
+    0x23, 0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
     0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x0A, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00,
+    0x0A, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
     0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00,
-    0x5B, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00,
-    0x5D, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00,
+    0x59, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00,
+    0x5B, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00,
     0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00,
-    0x5E, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00,
-    0x60, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00,
+    0x5C, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00,
+    0x5E, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00,
     0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
-    0x61, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
-    0x16, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x5F, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
+    0x16, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
     0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x09, 0x00, 0x00, 0x00, 0x63, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
     0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
-    0x64, 0x00, 0x00, 0x00, 0x63, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
-    0x66, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
-    0x67, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x67, 0x00, 0x00, 0x00,
-    0xF6, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x6B, 0x00, 0x00, 0x00,
-    0xF8, 0x00, 0x02, 0x00, 0x6B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x13, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00,
-    0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x00,
-    0x6C, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00,
-    0x6E, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00,
+    0x62, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
+    0x64, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
+    0x65, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x65, 0x00, 0x00, 0x00,
+    0xF6, 0x00, 0x04, 0x00, 0x67, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x69, 0x00, 0x00, 0x00,
+    0xF8, 0x00, 0x02, 0x00, 0x69, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+    0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00,
+    0x6A, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00,
+    0x6C, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
+    0xF8, 0x00, 0x02, 0x00, 0x66, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x00,
+    0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x6F, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x6E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
+    0x70, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00,
+    0x0A, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
+    0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x73, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x72, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
+    0x74, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00,
+    0x0A, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
+    0x74, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0x76, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
+    0x23, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
+    0x0A, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00,
+    0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x79, 0x00, 0x00, 0x00,
+    0x75, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x26, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x6D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x7A, 0x00, 0x00, 0x00,
+    0x79, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x00,
     0xF8, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x13, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
-    0x66, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00,
-    0x71, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x0A, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
-    0x7F, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00,
-    0x72, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
-    0x74, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00,
-    0x23, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00,
-    0x75, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00,
-    0x77, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00,
-    0x66, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00,
-    0x79, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x0A, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, 0x79, 0x00, 0x00, 0x00,
-    0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00,
-    0x77, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
-    0x26, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
-    0x7C, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
-    0x6A, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x6A, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00,
-    0x66, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
-    0x7E, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0x66, 0x00, 0x00, 0x00, 0x7E, 0x00, 0x00, 0x00,
-    0xF9, 0x00, 0x02, 0x00, 0x67, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
-    0x69, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00,
-    0xF9, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
-    0x7F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
-    0x80, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
-    0x81, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
-    0x26, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x82, 0x00, 0x00, 0x00,
-    0x81, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
-    0x83, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
-    0x84, 0x00, 0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
-    0x2B, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x85, 0x00, 0x00, 0x00,
-    0x84, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00,
-    0x86, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00,
-    0x86, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00,
-    0x87, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00,
-    0x23, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x0A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x00,
-    0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
-    0x8A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
-    0x16, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x09, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x8D, 0x00, 0x00, 0x00,
-    0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
-    0x8D, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
-    0x36, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00,
-    0x8F, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
-    0x31, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00,
-    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
-    0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00,
-    0x90, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
-    0x92, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0x92, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x93, 0x00, 0x00, 0x00,
-    0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
-    0x93, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
-    0x95, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0x95, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00,
-    0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x32, 0x00, 0x00, 0x00, 0x97, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x97, 0x00, 0x00, 0x00,
-    0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00,
-    0x23, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x0A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x9A, 0x00, 0x00, 0x00,
-    0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
-    0x9A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
-    0x16, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x09, 0x00, 0x00, 0x00, 0x9C, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x00,
-    0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
-    0x9D, 0x00, 0x00, 0x00, 0x9C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
-    0x36, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00,
-    0x9F, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
-    0x31, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00,
-    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xA0, 0x00, 0x00, 0x00,
-    0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00,
-    0xA0, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
-    0xA2, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0xA2, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0xA3, 0x00, 0x00, 0x00,
-    0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
-    0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00,
-    0xA3, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
-    0xA5, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0xA5, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00,
-    0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x32, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00,
-    0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00,
-    0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
-    0xA8, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
-    0xA9, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
-    0x23, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
-    0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x0A, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00,
-    0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xAC, 0x00, 0x00, 0x00,
-    0xA9, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
-    0x23, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x0A, 0x00, 0x00, 0x00, 0xAE, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00,
-    0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xAF, 0x00, 0x00, 0x00,
-    0xAC, 0x00, 0x00, 0x00, 0xAE, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
-    0x26, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB0, 0x00, 0x00, 0x00,
-    0xAF, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
-    0xB1, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
-    0xB2, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
-    0x2B, 0x00, 0x00, 0x00, 0xB3, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB3, 0x00, 0x00, 0x00,
-    0xB2, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB4, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xB5, 0x00, 0x00, 0x00,
-    0xF8, 0x00, 0x02, 0x00, 0xB5, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00,
-    0xB7, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0xF9, 0x00, 0x02, 0x00, 0xB9, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
-    0xB9, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
-    0xBA, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00,
-    0x06, 0x00, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00,
-    0x6D, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, 0xBB, 0x00, 0x00, 0x00,
-    0xB6, 0x00, 0x00, 0x00, 0xB7, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
-    0xB6, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
-    0xBC, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x13, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00,
-    0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0xBD, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
-    0xBF, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x13, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00,
-    0x35, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0xC0, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
-    0xC2, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00,
-    0x0A, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00,
-    0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00,
-    0xBF, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x13, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00,
-    0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00,
-    0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0xC5, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
-    0xC7, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00,
-    0x0A, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00,
-    0xC7, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00,
-    0xC9, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0xBC, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xC9, 0x00, 0x00, 0x00,
-    0xC8, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xB8, 0x00, 0x00, 0x00,
-    0xF8, 0x00, 0x02, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
-    0x13, 0x00, 0x00, 0x00, 0xCA, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00,
-    0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, 0xCB, 0x00, 0x00, 0x00,
-    0xCA, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
-    0xB4, 0x00, 0x00, 0x00, 0xCB, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
-    0xB5, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB7, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+    0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00,
+    0x7B, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
+    0x64, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
+    0x65, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x67, 0x00, 0x00, 0x00,
     0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0xF9, 0x00, 0x02, 0x00,
-    0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00,
-    0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
+    0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x7D, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x7E, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00,
+    0x7E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
+    0x80, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0x80, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x81, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00,
+    0x81, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
+    0x83, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0x83, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+    0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
+    0x2E, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00,
+    0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x86, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
+    0x87, 0x00, 0x00, 0x00, 0x86, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x26, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x88, 0x00, 0x00, 0x00,
+    0x87, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
+    0x89, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
+    0x8A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x2B, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x8B, 0x00, 0x00, 0x00,
+    0x8A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00,
+    0x8C, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x8D, 0x00, 0x00, 0x00,
+    0x8C, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x8D, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00,
+    0x23, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
+    0x0A, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
+    0x90, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
+    0x16, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x93, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
+    0x93, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x34, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00,
+    0x95, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
+    0x30, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00,
+    0xDB, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x96, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
+    0x97, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x26, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x98, 0x00, 0x00, 0x00,
+    0x97, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
+    0x99, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
+    0x9A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x2B, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x9B, 0x00, 0x00, 0x00,
+    0x9A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00,
+    0x9C, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x00,
+    0x9C, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x9D, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00,
+    0x23, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
+    0x0A, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0xA0, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
+    0xA0, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
+    0x16, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0xA2, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0xA3, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
+    0xA3, 0x00, 0x00, 0x00, 0xA2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x34, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00,
+    0xA5, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
+    0x30, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00,
+    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00,
+    0xA6, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0xA8, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
+    0xA9, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00,
+    0x0A, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00,
+    0xA9, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0xAB, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
+    0xAC, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00,
+    0x0A, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00,
+    0xAC, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
+    0xAE, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0xAE, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0xAF, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00,
+    0xAF, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
+    0xB1, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0xB1, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0xF9, 0x00, 0x02, 0x00, 0xB3, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
+    0xB3, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, 0xB5, 0x00, 0x00, 0x00,
+    0xB6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
+    0xB7, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB7, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00,
+    0xB2, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0xB9, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00,
+    0xFA, 0x00, 0x04, 0x00, 0xB9, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00,
+    0xB5, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB4, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00,
+    0xB2, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0xBB, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
+    0x23, 0x00, 0x00, 0x00, 0xBC, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
+    0x0A, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, 0x00, 0xBC, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00,
+    0xB2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0xBF, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0xBE, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
+    0xC0, 0x00, 0x00, 0x00, 0xBF, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00,
+    0x0A, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00,
+    0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00,
+    0xBD, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00,
+    0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00,
+    0xC4, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00,
+    0xC6, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0xC7, 0x00, 0x00, 0x00,
+    0x30, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
+    0xC7, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
+    0xB6, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB6, 0x00, 0x00, 0x00,
+    0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00,
+    0xB2, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0xC9, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x3E, 0x00, 0x03, 0x00, 0xB2, 0x00, 0x00, 0x00, 0xC9, 0x00, 0x00, 0x00,
+    0xF9, 0x00, 0x02, 0x00, 0xB3, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
+    0xB5, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00,
+    0xF9, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
+    0x1F, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
 };
diff --git a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt
index b047926f5..94fb6a700 100644
--- a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt
+++ b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.0
 ; Generator: Khronos Glslang Reference Front End; 1
-; Bound: 204
+; Bound: 202
 ; Schema: 0
                OpCapability Geometry
                OpCapability GeometryPointSize
@@ -9,7 +9,7 @@
                OpCapability GeometryStreams
           %1 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical GLSL450
-               OpEntryPoint Geometry %4 "main" %18 %34 %49 %53
+               OpEntryPoint Geometry %4 "main" %18 %34 %48 %51
                OpExecutionMode %4 Triangles
                OpExecutionMode %4 Invocations 1
                OpExecutionMode %4 OutputTriangleStrip
@@ -27,14 +27,10 @@
                OpMemberName %32 1 "gl_PointSize"
                OpMemberName %32 2 "gl_ClipDistance"
                OpName %34 ""
-               OpName %47 "VertexData"
-               OpMemberName %47 0 "o"
-               OpName %49 "out_vtx"
-               OpName %50 "VertexData"
-               OpMemberName %50 0 "o"
-               OpName %53 "in_vtx"
-               OpName %102 "i"
-               OpName %180 "i"
+               OpName %48 "out_interpolators"
+               OpName %51 "in_interpolators"
+               OpName %100 "i"
+               OpName %178 "i"
                OpMemberDecorate %14 0 BuiltIn Position
                OpMemberDecorate %14 1 BuiltIn PointSize
                OpMemberDecorate %14 2 BuiltIn ClipDistance
@@ -45,10 +41,9 @@
                OpDecorate %32 Block
                OpDecorate %32 Stream 0
                OpDecorate %34 Stream 0
-               OpMemberDecorate %47 0 Location 0
-               OpDecorate %47 Stream 0
-               OpDecorate %49 Stream 0
-               OpMemberDecorate %50 0 Location 0
+               OpDecorate %48 Location 0
+               OpDecorate %48 Stream 0
+               OpDecorate %51 Location 0
           %2 = OpTypeVoid
           %3 = OpTypeFunction %2
           %6 = OpTypeBool
@@ -77,21 +72,19 @@
          %43 = OpTypePointer Output %9
          %45 = OpConstant %11 16
          %46 = OpTypeArray %10 %45
-         %47 = OpTypeStruct %46
-         %48 = OpTypePointer Output %47
-         %49 = OpVariable %48 Output
-         %50 = OpTypeStruct %46
-         %51 = OpTypeArray %50 %15
-         %52 = OpTypePointer Input %51
-         %53 = OpVariable %52 Input
-         %54 = OpTypePointer Input %50
-        %101 = OpTypePointer Function %19
-        %109 = OpConstant %19 16
+         %47 = OpTypePointer Output %46
+         %48 = OpVariable %47 Output
+         %49 = OpTypeArray %46 %15
+         %50 = OpTypePointer Input %49
+         %51 = OpVariable %50 Input
+         %52 = OpTypePointer Input %46
+         %99 = OpTypePointer Function %19
+        %107 = OpConstant %19 16
           %4 = OpFunction %2 None %3
           %5 = OpLabel
           %8 = OpVariable %7 Function
-        %102 = OpVariable %101 Function
-        %180 = OpVariable %101 Function
+        %100 = OpVariable %99 Function
+        %178 = OpVariable %99 Function
          %23 = OpAccessChain %22 %18 %20 %20 %21
          %24 = OpLoad %9 %23
          %26 = OpAccessChain %22 %18 %25 %20 %21
@@ -100,7 +93,7 @@
                OpStore %8 %28
          %29 = OpLoad %6 %8
                OpSelectionMerge %31 None
-               OpBranchConditional %29 %30 %127
+               OpBranchConditional %29 %30 %125
          %30 = OpLabel
          %36 = OpAccessChain %35 %18 %20 %20
          %37 = OpLoad %10 %36
@@ -110,216 +103,216 @@
          %42 = OpLoad %9 %41
          %44 = OpAccessChain %43 %34 %40
                OpStore %44 %42
-         %55 = OpAccessChain %54 %53 %20
-         %56 = OpLoad %50 %55
-               OpStore %49 %56
+         %53 = OpAccessChain %52 %51 %20
+         %54 = OpLoad %46 %53
+               OpStore %48 %54
                OpEmitVertex
-         %57 = OpAccessChain %35 %18 %40 %20
-         %58 = OpLoad %10 %57
-         %59 = OpAccessChain %38 %34 %20
-               OpStore %59 %58
-         %60 = OpAccessChain %22 %18 %40 %40
-         %61 = OpLoad %9 %60
-         %62 = OpAccessChain %43 %34 %40
-               OpStore %62 %61
-         %63 = OpAccessChain %54 %53 %40
-         %64 = OpLoad %50 %63
-               OpStore %49 %64
+         %55 = OpAccessChain %35 %18 %40 %20
+         %56 = OpLoad %10 %55
+         %57 = OpAccessChain %38 %34 %20
+               OpStore %57 %56
+         %58 = OpAccessChain %22 %18 %40 %40
+         %59 = OpLoad %9 %58
+         %60 = OpAccessChain %43 %34 %40
+               OpStore %60 %59
+         %61 = OpAccessChain %52 %51 %40
+         %62 = OpLoad %46 %61
+               OpStore %48 %62
                OpEmitVertex
-         %65 = OpAccessChain %35 %18 %25 %20
-         %66 = OpLoad %10 %65
-         %67 = OpAccessChain %38 %34 %20
-               OpStore %67 %66
-         %68 = OpAccessChain %22 %18 %25 %40
-         %69 = OpLoad %9 %68
-         %70 = OpAccessChain %43 %34 %40
-               OpStore %70 %69
-         %71 = OpAccessChain %54 %53 %25
-         %72 = OpLoad %50 %71
-               OpStore %49 %72
+         %63 = OpAccessChain %35 %18 %25 %20
+         %64 = OpLoad %10 %63
+         %65 = OpAccessChain %38 %34 %20
+               OpStore %65 %64
+         %66 = OpAccessChain %22 %18 %25 %40
+         %67 = OpLoad %9 %66
+         %68 = OpAccessChain %43 %34 %40
+               OpStore %68 %67
+         %69 = OpAccessChain %52 %51 %25
+         %70 = OpLoad %46 %69
+               OpStore %48 %70
                OpEmitVertex
                OpEndPrimitive
-         %73 = OpAccessChain %35 %18 %25 %20
-         %74 = OpLoad %10 %73
-         %75 = OpAccessChain %38 %34 %20
-               OpStore %75 %74
-         %76 = OpAccessChain %22 %18 %25 %40
-         %77 = OpLoad %9 %76
-         %78 = OpAccessChain %43 %34 %40
-               OpStore %78 %77
-         %79 = OpAccessChain %54 %53 %25
-         %80 = OpLoad %50 %79
-               OpStore %49 %80
+         %71 = OpAccessChain %35 %18 %25 %20
+         %72 = OpLoad %10 %71
+         %73 = OpAccessChain %38 %34 %20
+               OpStore %73 %72
+         %74 = OpAccessChain %22 %18 %25 %40
+         %75 = OpLoad %9 %74
+         %76 = OpAccessChain %43 %34 %40
+               OpStore %76 %75
+         %77 = OpAccessChain %52 %51 %25
+         %78 = OpLoad %46 %77
+               OpStore %48 %78
                OpEmitVertex
-         %81 = OpAccessChain %35 %18 %40 %20
-         %82 = OpLoad %10 %81
-         %83 = OpAccessChain %38 %34 %20
-               OpStore %83 %82
-         %84 = OpAccessChain %22 %18 %40 %40
-         %85 = OpLoad %9 %84
-         %86 = OpAccessChain %43 %34 %40
-               OpStore %86 %85
-         %87 = OpAccessChain %54 %53 %40
-         %88 = OpLoad %50 %87
-               OpStore %49 %88
+         %79 = OpAccessChain %35 %18 %40 %20
+         %80 = OpLoad %10 %79
+         %81 = OpAccessChain %38 %34 %20
+               OpStore %81 %80
+         %82 = OpAccessChain %22 %18 %40 %40
+         %83 = OpLoad %9 %82
+         %84 = OpAccessChain %43 %34 %40
+               OpStore %84 %83
+         %85 = OpAccessChain %52 %51 %40
+         %86 = OpLoad %46 %85
+               OpStore %48 %86
                OpEmitVertex
-         %89 = OpAccessChain %35 %18 %40 %20
+         %87 = OpAccessChain %35 %18 %40 %20
+         %88 = OpLoad %10 %87
+         %89 = OpAccessChain %35 %18 %25 %20
          %90 = OpLoad %10 %89
-         %91 = OpAccessChain %35 %18 %25 %20
-         %92 = OpLoad %10 %91
-         %93 = OpFAdd %10 %90 %92
-         %94 = OpAccessChain %35 %18 %20 %20
-         %95 = OpLoad %10 %94
-         %96 = OpFSub %10 %93 %95
-         %97 = OpAccessChain %38 %34 %20
-               OpStore %97 %96
-         %98 = OpAccessChain %22 %18 %25 %40
-         %99 = OpLoad %9 %98
-        %100 = OpAccessChain %43 %34 %40
-               OpStore %100 %99
-               OpStore %102 %20
-               OpBranch %103
-        %103 = OpLabel
-               OpLoopMerge %105 %106 None
-               OpBranch %107
-        %107 = OpLabel
-        %108 = OpLoad %19 %102
-        %110 = OpSLessThan %6 %108 %109
-               OpBranchConditional %110 %104 %105
-        %104 = OpLabel
-        %111 = OpLoad %19 %102
-        %112 = OpLoad %19 %102
-        %113 = OpAccessChain %35 %53 %20 %20 %112
-        %114 = OpLoad %10 %113
-        %115 = OpFNegate %10 %114
-        %116 = OpLoad %19 %102
-        %117 = OpAccessChain %35 %53 %40 %20 %116
-        %118 = OpLoad %10 %117
-        %119 = OpFAdd %10 %115 %118
-        %120 = OpLoad %19 %102
-        %121 = OpAccessChain %35 %53 %25 %20 %120
-        %122 = OpLoad %10 %121
-        %123 = OpFAdd %10 %119 %122
-        %124 = OpAccessChain %38 %49 %20 %111
-               OpStore %124 %123
-               OpBranch %106
-        %106 = OpLabel
-        %125 = OpLoad %19 %102
-        %126 = OpIAdd %19 %125 %40
-               OpStore %102 %126
-               OpBranch %103
+         %91 = OpFAdd %10 %88 %90
+         %92 = OpAccessChain %35 %18 %20 %20
+         %93 = OpLoad %10 %92
+         %94 = OpFSub %10 %91 %93
+         %95 = OpAccessChain %38 %34 %20
+               OpStore %95 %94
+         %96 = OpAccessChain %22 %18 %25 %40
+         %97 = OpLoad %9 %96
+         %98 = OpAccessChain %43 %34 %40
+               OpStore %98 %97
+               OpStore %100 %20
+               OpBranch %101
+        %101 = OpLabel
+               OpLoopMerge %103 %104 None
+               OpBranch %105
         %105 = OpLabel
+        %106 = OpLoad %19 %100
+        %108 = OpSLessThan %6 %106 %107
+               OpBranchConditional %108 %102 %103
+        %102 = OpLabel
+        %109 = OpLoad %19 %100
+        %110 = OpLoad %19 %100
+        %111 = OpAccessChain %35 %51 %20 %110
+        %112 = OpLoad %10 %111
+        %113 = OpFNegate %10 %112
+        %114 = OpLoad %19 %100
+        %115 = OpAccessChain %35 %51 %40 %114
+        %116 = OpLoad %10 %115
+        %117 = OpFAdd %10 %113 %116
+        %118 = OpLoad %19 %100
+        %119 = OpAccessChain %35 %51 %25 %118
+        %120 = OpLoad %10 %119
+        %121 = OpFAdd %10 %117 %120
+        %122 = OpAccessChain %38 %48 %109
+               OpStore %122 %121
+               OpBranch %104
+        %104 = OpLabel
+        %123 = OpLoad %19 %100
+        %124 = OpIAdd %19 %123 %40
+               OpStore %100 %124
+               OpBranch %101
+        %103 = OpLabel
                OpEmitVertex
                OpEndPrimitive
                OpBranch %31
-        %127 = OpLabel
-        %128 = OpAccessChain %35 %18 %20 %20
-        %129 = OpLoad %10 %128
-        %130 = OpAccessChain %38 %34 %20
-               OpStore %130 %129
-        %131 = OpAccessChain %22 %18 %20 %40
-        %132 = OpLoad %9 %131
-        %133 = OpAccessChain %43 %34 %40
-               OpStore %133 %132
-        %134 = OpAccessChain %54 %53 %20
-        %135 = OpLoad %50 %134
-               OpStore %49 %135
+        %125 = OpLabel
+        %126 = OpAccessChain %35 %18 %20 %20
+        %127 = OpLoad %10 %126
+        %128 = OpAccessChain %38 %34 %20
+               OpStore %128 %127
+        %129 = OpAccessChain %22 %18 %20 %40
+        %130 = OpLoad %9 %129
+        %131 = OpAccessChain %43 %34 %40
+               OpStore %131 %130
+        %132 = OpAccessChain %52 %51 %20
+        %133 = OpLoad %46 %132
+               OpStore %48 %133
                OpEmitVertex
-        %136 = OpAccessChain %35 %18 %40 %20
-        %137 = OpLoad %10 %136
-        %138 = OpAccessChain %38 %34 %20
-               OpStore %138 %137
-        %139 = OpAccessChain %22 %18 %40 %40
-        %140 = OpLoad %9 %139
-        %141 = OpAccessChain %43 %34 %40
-               OpStore %141 %140
-        %142 = OpAccessChain %54 %53 %40
-        %143 = OpLoad %50 %142
-               OpStore %49 %143
+        %134 = OpAccessChain %35 %18 %40 %20
+        %135 = OpLoad %10 %134
+        %136 = OpAccessChain %38 %34 %20
+               OpStore %136 %135
+        %137 = OpAccessChain %22 %18 %40 %40
+        %138 = OpLoad %9 %137
+        %139 = OpAccessChain %43 %34 %40
+               OpStore %139 %138
+        %140 = OpAccessChain %52 %51 %40
+        %141 = OpLoad %46 %140
+               OpStore %48 %141
                OpEmitVertex
-        %144 = OpAccessChain %35 %18 %25 %20
-        %145 = OpLoad %10 %144
-        %146 = OpAccessChain %38 %34 %20
-               OpStore %146 %145
-        %147 = OpAccessChain %22 %18 %25 %40
-        %148 = OpLoad %9 %147
-        %149 = OpAccessChain %43 %34 %40
-               OpStore %149 %148
-        %150 = OpAccessChain %54 %53 %25
-        %151 = OpLoad %50 %150
-               OpStore %49 %151
+        %142 = OpAccessChain %35 %18 %25 %20
+        %143 = OpLoad %10 %142
+        %144 = OpAccessChain %38 %34 %20
+               OpStore %144 %143
+        %145 = OpAccessChain %22 %18 %25 %40
+        %146 = OpLoad %9 %145
+        %147 = OpAccessChain %43 %34 %40
+               OpStore %147 %146
+        %148 = OpAccessChain %52 %51 %25
+        %149 = OpLoad %46 %148
+               OpStore %48 %149
                OpEmitVertex
                OpEndPrimitive
-        %152 = OpAccessChain %35 %18 %20 %20
-        %153 = OpLoad %10 %152
-        %154 = OpAccessChain %38 %34 %20
-               OpStore %154 %153
-        %155 = OpAccessChain %22 %18 %20 %40
-        %156 = OpLoad %9 %155
-        %157 = OpAccessChain %43 %34 %40
-               OpStore %157 %156
-        %158 = OpAccessChain %54 %53 %20
-        %159 = OpLoad %50 %158
-               OpStore %49 %159
+        %150 = OpAccessChain %35 %18 %20 %20
+        %151 = OpLoad %10 %150
+        %152 = OpAccessChain %38 %34 %20
+               OpStore %152 %151
+        %153 = OpAccessChain %22 %18 %20 %40
+        %154 = OpLoad %9 %153
+        %155 = OpAccessChain %43 %34 %40
+               OpStore %155 %154
+        %156 = OpAccessChain %52 %51 %20
+        %157 = OpLoad %46 %156
+               OpStore %48 %157
                OpEmitVertex
-        %160 = OpAccessChain %35 %18 %25 %20
-        %161 = OpLoad %10 %160
-        %162 = OpAccessChain %38 %34 %20
-               OpStore %162 %161
-        %163 = OpAccessChain %22 %18 %25 %40
-        %164 = OpLoad %9 %163
-        %165 = OpAccessChain %43 %34 %40
-               OpStore %165 %164
-        %166 = OpAccessChain %54 %53 %25
-        %167 = OpLoad %50 %166
-               OpStore %49 %167
+        %158 = OpAccessChain %35 %18 %25 %20
+        %159 = OpLoad %10 %158
+        %160 = OpAccessChain %38 %34 %20
+               OpStore %160 %159
+        %161 = OpAccessChain %22 %18 %25 %40
+        %162 = OpLoad %9 %161
+        %163 = OpAccessChain %43 %34 %40
+               OpStore %163 %162
+        %164 = OpAccessChain %52 %51 %25
+        %165 = OpLoad %46 %164
+               OpStore %48 %165
                OpEmitVertex
-        %168 = OpAccessChain %35 %18 %20 %20
+        %166 = OpAccessChain %35 %18 %20 %20
+        %167 = OpLoad %10 %166
+        %168 = OpAccessChain %35 %18 %25 %20
         %169 = OpLoad %10 %168
-        %170 = OpAccessChain %35 %18 %25 %20
-        %171 = OpLoad %10 %170
-        %172 = OpFAdd %10 %169 %171
-        %173 = OpAccessChain %35 %18 %40 %20
-        %174 = OpLoad %10 %173
-        %175 = OpFSub %10 %172 %174
-        %176 = OpAccessChain %38 %34 %20
-               OpStore %176 %175
-        %177 = OpAccessChain %22 %18 %25 %40
-        %178 = OpLoad %9 %177
-        %179 = OpAccessChain %43 %34 %40
-               OpStore %179 %178
-               OpStore %180 %20
-               OpBranch %181
-        %181 = OpLabel
-               OpLoopMerge %183 %184 None
-               OpBranch %185
-        %185 = OpLabel
-        %186 = OpLoad %19 %180
-        %187 = OpSLessThan %6 %186 %109
-               OpBranchConditional %187 %182 %183
-        %182 = OpLabel
-        %188 = OpLoad %19 %180
-        %189 = OpLoad %19 %180
-        %190 = OpAccessChain %35 %53 %20 %20 %189
-        %191 = OpLoad %10 %190
-        %192 = OpLoad %19 %180
-        %193 = OpAccessChain %35 %53 %40 %20 %192
-        %194 = OpLoad %10 %193
-        %195 = OpFNegate %10 %194
-        %196 = OpFAdd %10 %191 %195
-        %197 = OpLoad %19 %180
-        %198 = OpAccessChain %35 %53 %25 %20 %197
-        %199 = OpLoad %10 %198
-        %200 = OpFAdd %10 %196 %199
-        %201 = OpAccessChain %38 %49 %20 %188
-               OpStore %201 %200
-               OpBranch %184
-        %184 = OpLabel
-        %202 = OpLoad %19 %180
-        %203 = OpIAdd %19 %202 %40
-               OpStore %180 %203
-               OpBranch %181
+        %170 = OpFAdd %10 %167 %169
+        %171 = OpAccessChain %35 %18 %40 %20
+        %172 = OpLoad %10 %171
+        %173 = OpFSub %10 %170 %172
+        %174 = OpAccessChain %38 %34 %20
+               OpStore %174 %173
+        %175 = OpAccessChain %22 %18 %25 %40
+        %176 = OpLoad %9 %175
+        %177 = OpAccessChain %43 %34 %40
+               OpStore %177 %176
+               OpStore %178 %20
+               OpBranch %179
+        %179 = OpLabel
+               OpLoopMerge %181 %182 None
+               OpBranch %183
         %183 = OpLabel
+        %184 = OpLoad %19 %178
+        %185 = OpSLessThan %6 %184 %107
+               OpBranchConditional %185 %180 %181
+        %180 = OpLabel
+        %186 = OpLoad %19 %178
+        %187 = OpLoad %19 %178
+        %188 = OpAccessChain %35 %51 %20 %187
+        %189 = OpLoad %10 %188
+        %190 = OpLoad %19 %178
+        %191 = OpAccessChain %35 %51 %40 %190
+        %192 = OpLoad %10 %191
+        %193 = OpFNegate %10 %192
+        %194 = OpFAdd %10 %189 %193
+        %195 = OpLoad %19 %178
+        %196 = OpAccessChain %35 %51 %25 %195
+        %197 = OpLoad %10 %196
+        %198 = OpFAdd %10 %194 %197
+        %199 = OpAccessChain %38 %48 %186
+               OpStore %199 %198
+               OpBranch %182
+        %182 = OpLabel
+        %200 = OpLoad %19 %178
+        %201 = OpIAdd %19 %200 %40
+               OpStore %178 %201
+               OpBranch %179
+        %181 = OpLabel
                OpEmitVertex
                OpEndPrimitive
                OpBranch %31
diff --git a/src/xenia/gpu/vulkan/shaders/rect_list.geom b/src/xenia/gpu/vulkan/shaders/rect_list.geom
index d796919d3..6c7e24c7e 100644
--- a/src/xenia/gpu/vulkan/shaders/rect_list.geom
+++ b/src/xenia/gpu/vulkan/shaders/rect_list.geom
@@ -16,11 +16,8 @@ out gl_PerVertex {
   float gl_ClipDistance[];
 };
 
-struct VertexData {
-  vec4 o[16];
-};
-layout(location = 0) in VertexData in_vtx[];
-layout(location = 0) out VertexData out_vtx;
+layout(location = 0) in vec4 in_interpolators[][16];
+layout(location = 0) out vec4 out_interpolators[16];
 
 layout(triangles) in;
 layout(triangle_strip, max_vertices = 6) out;
@@ -35,30 +32,30 @@ void main() {
     //  2 ----- [3]
     gl_Position = gl_in[0].gl_Position;
     gl_PointSize = gl_in[0].gl_PointSize;
-    out_vtx = in_vtx[0];
+    out_interpolators = in_interpolators[0];
     EmitVertex();
     gl_Position = gl_in[1].gl_Position;
     gl_PointSize = gl_in[1].gl_PointSize;
-    out_vtx = in_vtx[1];
+    out_interpolators = in_interpolators[1];
     EmitVertex();
     gl_Position = gl_in[2].gl_Position;
     gl_PointSize = gl_in[2].gl_PointSize;
-    out_vtx = in_vtx[2];
+    out_interpolators = in_interpolators[2];
     EmitVertex();
     EndPrimitive();
     gl_Position = gl_in[2].gl_Position;
     gl_PointSize = gl_in[2].gl_PointSize;
-    out_vtx = in_vtx[2];
+    out_interpolators = in_interpolators[2];
     EmitVertex();
     gl_Position = gl_in[1].gl_Position;
     gl_PointSize = gl_in[1].gl_PointSize;
-    out_vtx = in_vtx[1];
+    out_interpolators = in_interpolators[1];
     EmitVertex();
     gl_Position = (gl_in[1].gl_Position + gl_in[2].gl_Position) -
                   gl_in[0].gl_Position;
     gl_PointSize = gl_in[2].gl_PointSize;
     for (int i = 0; i < 16; ++i) {
-      out_vtx.o[i] = -in_vtx[0].o[i] + in_vtx[1].o[i] + in_vtx[2].o[i];
+      out_interpolators[i] = -in_interpolators[0][i] + in_interpolators[1][i] + in_interpolators[2][i];
     }
     EmitVertex();
     EndPrimitive();
@@ -70,30 +67,30 @@ void main() {
     // [3] ----- 2
     gl_Position = gl_in[0].gl_Position;
     gl_PointSize = gl_in[0].gl_PointSize;
-    out_vtx = in_vtx[0];
+    out_interpolators = in_interpolators[0];
     EmitVertex();
     gl_Position = gl_in[1].gl_Position;
     gl_PointSize = gl_in[1].gl_PointSize;
-    out_vtx = in_vtx[1];
+    out_interpolators = in_interpolators[1];
     EmitVertex();
     gl_Position = gl_in[2].gl_Position;
     gl_PointSize = gl_in[2].gl_PointSize;
-    out_vtx = in_vtx[2];
+    out_interpolators = in_interpolators[2];
     EmitVertex();
     EndPrimitive();
     gl_Position = gl_in[0].gl_Position;
     gl_PointSize = gl_in[0].gl_PointSize;
-    out_vtx = in_vtx[0];
+    out_interpolators = in_interpolators[0];
     EmitVertex();
     gl_Position = gl_in[2].gl_Position;
     gl_PointSize = gl_in[2].gl_PointSize;
-    out_vtx = in_vtx[2];
+    out_interpolators = in_interpolators[2];
     EmitVertex();
     gl_Position = (gl_in[0].gl_Position + gl_in[2].gl_Position) -
                   gl_in[1].gl_Position;
     gl_PointSize = gl_in[2].gl_PointSize;
     for (int i = 0; i < 16; ++i) {
-      out_vtx.o[i] = in_vtx[0].o[i] + -in_vtx[1].o[i] + in_vtx[2].o[i];
+      out_interpolators[i] = in_interpolators[0][i] + -in_interpolators[1][i] + in_interpolators[2][i];
     }
     EmitVertex();
     EndPrimitive();
diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc
index 4e93a46ca..a6f6dab17 100644
--- a/src/xenia/gpu/vulkan/texture_cache.cc
+++ b/src/xenia/gpu/vulkan/texture_cache.cc
@@ -25,26 +25,104 @@ namespace vulkan {
 using xe::ui::vulkan::CheckResult;
 
 constexpr uint32_t kMaxTextureSamplers = 32;
+constexpr VkDeviceSize kStagingBufferSize = 64 * 1024 * 1024;
 
-TextureCache::TextureCache(RegisterFile* register_file,
+struct TextureConfig {
+  TextureFormat guest_format;
+  VkFormat host_format;
+};
+
+static const TextureConfig texture_configs[64] = {
+    {TextureFormat::k_1_REVERSE, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_1, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_8, VK_FORMAT_R8_UNORM},
+    {TextureFormat::k_1_5_5_5, VK_FORMAT_R5G5B5A1_UNORM_PACK16},
+    {TextureFormat::k_5_6_5, VK_FORMAT_R5G6B5_UNORM_PACK16},
+    {TextureFormat::k_6_5_5, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_8_8_8_8, VK_FORMAT_R8G8B8A8_UNORM},
+    {TextureFormat::k_2_10_10_10, VK_FORMAT_A2R10G10B10_UNORM_PACK32},
+    {TextureFormat::k_8_A, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_8_B, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_8_8, VK_FORMAT_R8G8_UNORM},
+    {TextureFormat::k_Cr_Y1_Cb_Y0, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_Y1_Cr_Y0_Cb, VK_FORMAT_UNDEFINED},
+    {TextureFormat::kUnknown, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_8_8_8_8_A, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_4_4_4_4, VK_FORMAT_R4G4B4A4_UNORM_PACK16},
+    {TextureFormat::k_10_11_11, VK_FORMAT_B10G11R11_UFLOAT_PACK32},  // ?
+    {TextureFormat::k_11_11_10, VK_FORMAT_B10G11R11_UFLOAT_PACK32},  // ?
+    {TextureFormat::k_DXT1, VK_FORMAT_BC1_RGBA_SRGB_BLOCK},
+    {TextureFormat::k_DXT2_3, VK_FORMAT_BC2_SRGB_BLOCK},
+    {TextureFormat::k_DXT4_5, VK_FORMAT_BC3_SRGB_BLOCK},
+    {TextureFormat::kUnknown, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_24_8, VK_FORMAT_D24_UNORM_S8_UINT},
+    {TextureFormat::k_24_8_FLOAT, VK_FORMAT_D24_UNORM_S8_UINT},  // ?
+    {TextureFormat::k_16, VK_FORMAT_R16_UNORM},
+    {TextureFormat::k_16_16, VK_FORMAT_R16G16_UNORM},
+    {TextureFormat::k_16_16_16_16, VK_FORMAT_R16G16B16A16_UNORM},
+    {TextureFormat::k_16_EXPAND, VK_FORMAT_R16_UNORM},                    // ?
+    {TextureFormat::k_16_16_EXPAND, VK_FORMAT_R16G16_UNORM},              // ?
+    {TextureFormat::k_16_16_16_16_EXPAND, VK_FORMAT_R16G16B16A16_UNORM},  // ?
+    {TextureFormat::k_16_FLOAT, VK_FORMAT_R16_SFLOAT},
+    {TextureFormat::k_16_16_FLOAT, VK_FORMAT_R16G16_SFLOAT},
+    {TextureFormat::k_16_16_16_16_FLOAT, VK_FORMAT_R16G16B16A16_SFLOAT},
+    {TextureFormat::k_32, VK_FORMAT_R32_SINT},
+    {TextureFormat::k_32_32, VK_FORMAT_R32G32_SINT},
+    {TextureFormat::k_32_32_32_32, VK_FORMAT_R32G32B32A32_SINT},
+    {TextureFormat::k_32_FLOAT, VK_FORMAT_R32_SFLOAT},
+    {TextureFormat::k_32_32_FLOAT, VK_FORMAT_R32G32_SFLOAT},
+    {TextureFormat::k_32_32_32_32_FLOAT, VK_FORMAT_R32G32B32A32_SFLOAT},
+    {TextureFormat::k_32_AS_8, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_32_AS_8_8, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_16_MPEG, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_16_16_MPEG, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_8_INTERLACED, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_32_AS_8_INTERLACED, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_32_AS_8_8_INTERLACED, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_16_INTERLACED, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_16_MPEG_INTERLACED, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_16_16_MPEG_INTERLACED, VK_FORMAT_UNDEFINED},
+
+    // http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
+    {TextureFormat::k_DXN, VK_FORMAT_BC5_UNORM_BLOCK},  // ?
+    {TextureFormat::k_8_8_8_8_AS_16_16_16_16, VK_FORMAT_R8G8B8A8_UNORM},
+    {TextureFormat::k_DXT1_AS_16_16_16_16, VK_FORMAT_BC1_RGB_UNORM_BLOCK},
+    {TextureFormat::k_DXT2_3_AS_16_16_16_16, VK_FORMAT_BC2_UNORM_BLOCK},
+    {TextureFormat::k_DXT4_5_AS_16_16_16_16, VK_FORMAT_BC3_UNORM_BLOCK},
+    {TextureFormat::k_2_10_10_10_AS_16_16_16_16,
+     VK_FORMAT_A2R10G10B10_UNORM_PACK32},
+    {TextureFormat::k_10_11_11_AS_16_16_16_16,
+     VK_FORMAT_B10G11R11_UFLOAT_PACK32},  // ?
+    {TextureFormat::k_11_11_10_AS_16_16_16_16,
+     VK_FORMAT_B10G11R11_UFLOAT_PACK32},  // ?
+    {TextureFormat::k_32_32_32_FLOAT, VK_FORMAT_R32G32B32_SFLOAT},
+    {TextureFormat::k_DXT3A, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_DXT5A, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_CTX1, VK_FORMAT_UNDEFINED},
+    {TextureFormat::k_DXT3A_AS_1_1_1_1, VK_FORMAT_UNDEFINED},
+    {TextureFormat::kUnknown, VK_FORMAT_UNDEFINED},
+    {TextureFormat::kUnknown, VK_FORMAT_UNDEFINED},
+};
+
+TextureCache::TextureCache(Memory* memory, RegisterFile* register_file,
                            TraceWriter* trace_writer,
                            ui::vulkan::VulkanDevice* device)
-    : register_file_(register_file),
+    : memory_(memory),
+      register_file_(register_file),
       trace_writer_(trace_writer),
-      device_(device) {
+      device_(device),
+      staging_buffer_(device) {
   // Descriptor pool used for all of our cached descriptors.
   VkDescriptorPoolCreateInfo descriptor_pool_info;
   descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
   descriptor_pool_info.pNext = nullptr;
   descriptor_pool_info.flags =
       VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
-  descriptor_pool_info.maxSets = 256;
-  VkDescriptorPoolSize pool_sizes[2];
-  pool_sizes[0].type = VK_DESCRIPTOR_TYPE_SAMPLER;
-  pool_sizes[0].descriptorCount = 32;
-  pool_sizes[1].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
-  pool_sizes[1].descriptorCount = 32;
-  descriptor_pool_info.poolSizeCount = 2;
+  descriptor_pool_info.maxSets = 8192;
+  VkDescriptorPoolSize pool_sizes[1];
+  pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+  pool_sizes[0].descriptorCount = 8192;
+  descriptor_pool_info.poolSizeCount = 1;
   descriptor_pool_info.pPoolSizes = pool_sizes;
   auto err = vkCreateDescriptorPool(*device_, &descriptor_pool_info, nullptr,
                                     &descriptor_pool_);
@@ -52,18 +130,11 @@ TextureCache::TextureCache(RegisterFile* register_file,
 
   // Create the descriptor set layout used for rendering.
   // We always have the same number of samplers but only some are used.
-  VkDescriptorSetLayoutBinding bindings[5];
-  auto& sampler_binding = bindings[0];
-  sampler_binding.binding = 0;
-  sampler_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
-  sampler_binding.descriptorCount = kMaxTextureSamplers;
-  sampler_binding.stageFlags =
-      VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
-  sampler_binding.pImmutableSamplers = nullptr;
+  VkDescriptorSetLayoutBinding bindings[4];
   for (int i = 0; i < 4; ++i) {
-    auto& texture_binding = bindings[1 + i];
-    texture_binding.binding = 1 + i;
-    texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
+    auto& texture_binding = bindings[i];
+    texture_binding.binding = i;
+    texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
     texture_binding.descriptorCount = kMaxTextureSamplers;
     texture_binding.stageFlags =
         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
@@ -81,96 +152,759 @@ TextureCache::TextureCache(RegisterFile* register_file,
                                     nullptr, &texture_descriptor_set_layout_);
   CheckResult(err, "vkCreateDescriptorSetLayout");
 
-  SetupGridImages();
+  if (!staging_buffer_.Initialize(kStagingBufferSize,
+                                  VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) {
+    assert_always();
+  }
+
+  invalidated_textures_sets_[0].reserve(64);
+  invalidated_textures_sets_[1].reserve(64);
+  invalidated_textures_ = &invalidated_textures_sets_[0];
 }
 
 TextureCache::~TextureCache() {
-  vkDestroyImageView(*device_, grid_image_2d_view_, nullptr);
-  vkDestroyImage(*device_, grid_image_2d_, nullptr);
-  vkFreeMemory(*device_, grid_image_2d_memory_, nullptr);
+  for (auto it = samplers_.begin(); it != samplers_.end(); ++it) {
+    vkDestroySampler(*device_, it->second->sampler, nullptr);
+    delete it->second;
+  }
+  samplers_.clear();
 
   vkDestroyDescriptorSetLayout(*device_, texture_descriptor_set_layout_,
                                nullptr);
   vkDestroyDescriptorPool(*device_, descriptor_pool_, nullptr);
 }
 
-void TextureCache::SetupGridImages() {
-  VkImageCreateInfo image_info;
+TextureCache::Texture* TextureCache::AllocateTexture(
+    const TextureInfo& texture_info) {
+  // Create an image first.
+  VkImageCreateInfo image_info = {};
   image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
-  image_info.pNext = nullptr;
-  image_info.flags = 0;
-  image_info.imageType = VK_IMAGE_TYPE_2D;
-  image_info.format = VK_FORMAT_R8G8B8A8_UNORM;
-  image_info.extent = {8, 8, 1};
+  switch (texture_info.dimension) {
+    case Dimension::k1D:
+      image_info.imageType = VK_IMAGE_TYPE_1D;
+      break;
+    case Dimension::k2D:
+      image_info.imageType = VK_IMAGE_TYPE_2D;
+      break;
+    case Dimension::k3D:
+      image_info.imageType = VK_IMAGE_TYPE_3D;
+      break;
+    case Dimension::kCube:
+      image_info.imageType = VK_IMAGE_TYPE_2D;
+      image_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
+      break;
+    default:
+      assert_unhandled_case(texture_info.dimension);
+      return nullptr;
+  }
+
+  assert_not_null(texture_info.format_info);
+  auto& config = texture_configs[int(texture_info.format_info->format)];
+  VkFormat format = config.host_format != VK_FORMAT_UNDEFINED
+                        ? config.host_format
+                        : VK_FORMAT_R8G8B8A8_UNORM;
+
+  VkFormatProperties props;
+  uint32_t required_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
+                            VK_FORMAT_FEATURE_BLIT_DST_BIT |
+                            VK_FORMAT_FEATURE_BLIT_SRC_BIT;
+  vkGetPhysicalDeviceFormatProperties(*device_, format, &props);
+  if ((props.optimalTilingFeatures & required_flags) != required_flags) {
+    // Texture needs conversion on upload to a native format.
+    // assert_always();
+  }
+
+  image_info.format = format;
+  image_info.extent = {texture_info.width + 1, texture_info.height + 1,
+                       texture_info.depth + 1};
   image_info.mipLevels = 1;
   image_info.arrayLayers = 1;
   image_info.samples = VK_SAMPLE_COUNT_1_BIT;
-  image_info.tiling = VK_IMAGE_TILING_LINEAR;
-  image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT;
+  image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
+  image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT |
+                     VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+                     VK_IMAGE_USAGE_TRANSFER_DST_BIT;
   image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
   image_info.queueFamilyIndexCount = 0;
   image_info.pQueueFamilyIndices = nullptr;
-  image_info.initialLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-  auto err = vkCreateImage(*device_, &image_info, nullptr, &grid_image_2d_);
+  image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+  VkImage image;
+  auto err = vkCreateImage(*device_, &image_info, nullptr, &image);
   CheckResult(err, "vkCreateImage");
 
-  VkMemoryRequirements memory_requirements;
-  vkGetImageMemoryRequirements(*device_, grid_image_2d_, &memory_requirements);
-  grid_image_2d_memory_ = device_->AllocateMemory(
-      memory_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
-  err = vkBindImageMemory(*device_, grid_image_2d_, grid_image_2d_memory_, 0);
+  VkMemoryRequirements mem_requirements;
+  vkGetImageMemoryRequirements(*device_, image, &mem_requirements);
+
+  // TODO: Use a circular buffer or something else to allocate this memory.
+  // The device has a limited amount (around 64) of memory allocations that we
+  // can make.
+  // Now that we have the size, back the image with GPU memory.
+  auto memory = device_->AllocateMemory(mem_requirements, 0);
+  if (!memory) {
+    // Crap.
+    assert_always();
+    vkDestroyImage(*device_, image, nullptr);
+    return nullptr;
+  }
+
+  err = vkBindImageMemory(*device_, image, memory, 0);
   CheckResult(err, "vkBindImageMemory");
 
+  auto texture = new Texture();
+  texture->format = image_info.format;
+  texture->image = image;
+  texture->image_layout = image_info.initialLayout;
+  texture->image_memory = memory;
+  texture->memory_offset = 0;
+  texture->memory_size = mem_requirements.size;
+  texture->texture_info = texture_info;
+
+  // Create a default view, just for kicks.
   VkImageViewCreateInfo view_info;
   view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
   view_info.pNext = nullptr;
   view_info.flags = 0;
-  view_info.image = grid_image_2d_;
+  view_info.image = image;
   view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
-  view_info.format = VK_FORMAT_R8G8B8A8_UNORM;
+  view_info.format = image_info.format;
   view_info.components = {
       VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B,
       VK_COMPONENT_SWIZZLE_A,
   };
   view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
-  err = vkCreateImageView(*device_, &view_info, nullptr, &grid_image_2d_view_);
+  VkImageView view;
+  err = vkCreateImageView(*device_, &view_info, nullptr, &view);
   CheckResult(err, "vkCreateImageView");
+  if (err == VK_SUCCESS) {
+    auto texture_view = std::make_unique<TextureView>();
+    texture_view->texture = texture;
+    texture_view->view = view;
+    texture_view->swiz_x = 0;
+    texture_view->swiz_y = 1;
+    texture_view->swiz_z = 2;
+    texture_view->swiz_w = 3;
+    texture->views.push_back(std::move(texture_view));
+  }
 
-  VkImageSubresource subresource;
-  subresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
-  subresource.mipLevel = 0;
-  subresource.arrayLayer = 0;
-  VkSubresourceLayout layout;
-  vkGetImageSubresourceLayout(*device_, grid_image_2d_, &subresource, &layout);
+  return texture;
+}
 
-  void* gpu_data = nullptr;
-  err = vkMapMemory(*device_, grid_image_2d_memory_, 0, layout.size, 0,
-                    &gpu_data);
-  CheckResult(err, "vkMapMemory");
+bool TextureCache::FreeTexture(Texture* texture) {
+  if (texture->in_flight_fence &&
+      texture->in_flight_fence->status() != VK_SUCCESS) {
+    // Texture still in flight.
+    return false;
+  }
 
-  uint32_t grid_pixels[8 * 8];
-  for (int y = 0; y < 8; ++y) {
-    for (int x = 0; x < 8; ++x) {
-      grid_pixels[y * 8 + x] =
-          ((y % 2 == 0) ^ (x % 2 != 0)) ? 0xFFFFFFFF : 0xFF0000FF;
+  for (auto it = texture->views.begin(); it != texture->views.end();) {
+    vkDestroyImageView(*device_, (*it)->view, nullptr);
+    it = texture->views.erase(it);
+  }
+
+  if (texture->access_watch_handle) {
+    memory_->CancelAccessWatch(texture->access_watch_handle);
+    texture->access_watch_handle = 0;
+  }
+
+  vkDestroyImage(*device_, texture->image, nullptr);
+  vkFreeMemory(*device_, texture->image_memory, nullptr);
+  delete texture;
+  return true;
+}
+
+TextureCache::Texture* TextureCache::DemandResolveTexture(
+    const TextureInfo& texture_info, TextureFormat format,
+    VkOffset2D* out_offset) {
+  // Check to see if we've already used a texture at this location.
+  auto texture = LookupAddress(
+      texture_info.guest_address, texture_info.size_2d.block_width,
+      texture_info.size_2d.block_height, format, out_offset);
+  if (texture) {
+    return texture;
+  }
+
+  // No texture at this location. Make a new one.
+  texture = AllocateTexture(texture_info);
+  texture->is_full_texture = false;
+
+  // Setup an access watch. If this texture is touched, it is destroyed.
+  texture->access_watch_handle = memory_->AddPhysicalAccessWatch(
+      texture_info.guest_address, texture_info.input_length,
+      cpu::MMIOHandler::kWatchWrite,
+      [](void* context_ptr, void* data_ptr, uint32_t address) {
+        auto self = reinterpret_cast<TextureCache*>(context_ptr);
+        auto touched_texture = reinterpret_cast<Texture*>(data_ptr);
+        // Clear watch handle first so we don't redundantly
+        // remove.
+        touched_texture->access_watch_handle = 0;
+        touched_texture->pending_invalidation = true;
+        // Add to pending list so Scavenge will clean it up.
+        self->invalidated_resolve_textures_mutex_.lock();
+        self->invalidated_resolve_textures_.push_back(touched_texture);
+        self->invalidated_resolve_textures_mutex_.unlock();
+      },
+      this, texture);
+
+  resolve_textures_.push_back(texture);
+  return texture;
+}
+
+TextureCache::Texture* TextureCache::Demand(
+    const TextureInfo& texture_info, VkCommandBuffer command_buffer,
+    std::shared_ptr<ui::vulkan::Fence> completion_fence) {
+  // Run a tight loop to scan for an exact match existing texture.
+  auto texture_hash = texture_info.hash();
+  for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) {
+    if (it->second->texture_info == texture_info) {
+      if (it->second->pending_invalidation) {
+        // This texture has been invalidated!
+        Scavenge();
+        break;
+      }
+
+      return it->second;
     }
   }
-  std::memcpy(gpu_data, grid_pixels, sizeof(grid_pixels));
 
-  vkUnmapMemory(*device_, grid_image_2d_memory_);
+  // Check resolve textures.
+  for (auto it = resolve_textures_.begin(); it != resolve_textures_.end();
+       ++it) {
+    auto texture = (*it);
+    if (texture_info.guest_address == texture->texture_info.guest_address &&
+        texture_info.size_2d.logical_width ==
+            texture->texture_info.size_2d.logical_width &&
+        texture_info.size_2d.logical_height ==
+            texture->texture_info.size_2d.logical_height) {
+      // Exact match.
+      // TODO: Lazy match (at an offset)
+      // Upgrade this texture to a full texture.
+      texture->is_full_texture = true;
+      texture->texture_info = texture_info;
+
+      if (texture->access_watch_handle) {
+        memory_->CancelAccessWatch(texture->access_watch_handle);
+      }
+
+      texture->access_watch_handle = memory_->AddPhysicalAccessWatch(
+          texture_info.guest_address, texture_info.input_length,
+          cpu::MMIOHandler::kWatchWrite,
+          [](void* context_ptr, void* data_ptr, uint32_t address) {
+            auto self = reinterpret_cast<TextureCache*>(context_ptr);
+            auto touched_texture = reinterpret_cast<Texture*>(data_ptr);
+            // Clear watch handle first so we don't redundantly
+            // remove.
+            touched_texture->access_watch_handle = 0;
+            touched_texture->pending_invalidation = true;
+            // Add to pending list so Scavenge will clean it up.
+            self->invalidated_textures_mutex_.lock();
+            self->invalidated_textures_->push_back(touched_texture);
+            self->invalidated_textures_mutex_.unlock();
+          },
+          this, texture);
+
+      textures_[texture_hash] = *it;
+      it = resolve_textures_.erase(it);
+      return textures_[texture_hash];
+    }
+  }
+
+  if (!command_buffer) {
+    // Texture not found and no command buffer was passed, preventing us from
+    // uploading a new one.
+    return nullptr;
+  }
+
+  if (texture_info.dimension != Dimension::k2D) {
+    // Abort.
+    return nullptr;
+  }
+
+  // Create a new texture and cache it.
+  auto texture = AllocateTexture(texture_info);
+  if (!texture) {
+    // Failed to allocate texture (out of memory?)
+    assert_always();
+    return nullptr;
+  }
+
+  bool uploaded = false;
+  switch (texture_info.dimension) {
+    case Dimension::k2D: {
+      uploaded = UploadTexture2D(command_buffer, completion_fence, texture,
+                                 texture_info);
+    } break;
+    default:
+      assert_unhandled_case(texture_info.dimension);
+      break;
+  }
+
+  if (!uploaded) {
+    FreeTexture(texture);
+    return nullptr;
+  }
+
+  // Copy in overlapping resolve textures.
+  // FIXME: RDR appears to take textures from small chunks of a resolve texture?
+  if (texture_info.dimension == Dimension::k2D) {
+    for (auto it = resolve_textures_.begin(); it != resolve_textures_.end();
+         ++it) {
+      auto texture = (*it);
+      if (texture_info.guest_address >= texture->texture_info.guest_address &&
+          texture_info.guest_address < texture->texture_info.guest_address +
+                                           texture->texture_info.input_length) {
+        // Lazy matched a resolve texture. Copy it in and destroy it.
+        // Future resolves will just copy directly into this texture.
+        // assert_always();
+      }
+    }
+  }
+
+  // Though we didn't find an exact match, that doesn't mean we're out of the
+  // woods yet. This texture could either be a portion of another texture or
+  // vice versa. Copy any overlapping textures into this texture.
+  // TODO: Byte count -> pixel count (on x and y axes)
+  for (auto it = textures_.begin(); it != textures_.end(); ++it) {
+  }
+
+  // Okay. Now that the texture is uploaded from system memory, put a writewatch
+  // on it to tell us if it's been modified from the guest.
+  texture->access_watch_handle = memory_->AddPhysicalAccessWatch(
+      texture_info.guest_address, texture_info.input_length,
+      cpu::MMIOHandler::kWatchWrite,
+      [](void* context_ptr, void* data_ptr, uint32_t address) {
+        auto self = reinterpret_cast<TextureCache*>(context_ptr);
+        auto touched_texture = reinterpret_cast<Texture*>(data_ptr);
+        // Clear watch handle first so we don't redundantly
+        // remove.
+        touched_texture->access_watch_handle = 0;
+        touched_texture->pending_invalidation = true;
+        // Add to pending list so Scavenge will clean it up.
+        self->invalidated_textures_mutex_.lock();
+        self->invalidated_textures_->push_back(touched_texture);
+        self->invalidated_textures_mutex_.unlock();
+      },
+      this, texture);
+
+  textures_[texture_hash] = texture;
+  return texture;
+}
+
+TextureCache::TextureView* TextureCache::DemandView(Texture* texture,
+                                                    uint16_t swizzle) {
+  for (auto it = texture->views.begin(); it != texture->views.end(); ++it) {
+    if ((*it)->swizzle == swizzle) {
+      return (*it).get();
+    }
+  }
+
+  VkImageViewCreateInfo view_info;
+  view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
+  view_info.pNext = nullptr;
+  view_info.flags = 0;
+  view_info.image = texture->image;
+  view_info.format = texture->format;
+
+  switch (texture->texture_info.dimension) {
+    case Dimension::k1D:
+      view_info.viewType = VK_IMAGE_VIEW_TYPE_1D;
+      break;
+    case Dimension::k2D:
+      view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
+      break;
+    case Dimension::k3D:
+      view_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
+      break;
+    case Dimension::kCube:
+      view_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE;
+      break;
+    default:
+      assert_always();
+  }
+
+  VkComponentSwizzle swiz_component_map[] = {
+      VK_COMPONENT_SWIZZLE_R,        VK_COMPONENT_SWIZZLE_G,
+      VK_COMPONENT_SWIZZLE_B,        VK_COMPONENT_SWIZZLE_A,
+      VK_COMPONENT_SWIZZLE_ZERO,     VK_COMPONENT_SWIZZLE_ONE,
+      VK_COMPONENT_SWIZZLE_IDENTITY,
+  };
+
+  view_info.components = {
+      swiz_component_map[(swizzle >> 0) & 0x7],
+      swiz_component_map[(swizzle >> 3) & 0x7],
+      swiz_component_map[(swizzle >> 6) & 0x7],
+      swiz_component_map[(swizzle >> 9) & 0x7],
+  };
+  view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
+  VkImageView view;
+  auto status = vkCreateImageView(*device_, &view_info, nullptr, &view);
+  CheckResult(status, "vkCreateImageView");
+  if (status == VK_SUCCESS) {
+    auto texture_view = new TextureView();
+    texture_view->texture = texture;
+    texture_view->view = view;
+    texture_view->swizzle = swizzle;
+    texture->views.push_back(std::unique_ptr<TextureView>(texture_view));
+    return texture_view;
+  }
+
+  return nullptr;
+}
+
+TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) {
+#if FINE_GRAINED_DRAW_SCOPES
+  SCOPE_profile_cpu_f("gpu");
+#endif  // FINE_GRAINED_DRAW_SCOPES
+
+  auto sampler_hash = sampler_info.hash();
+  for (auto it = samplers_.find(sampler_hash); it != samplers_.end(); ++it) {
+    if (it->second->sampler_info == sampler_info) {
+      // Found a compatible sampler.
+      return it->second;
+    }
+  }
+
+  VkResult status = VK_SUCCESS;
+
+  // Create a new sampler and cache it.
+  // TODO: Actually set the properties
+  VkSamplerCreateInfo sampler_create_info;
+  sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
+  sampler_create_info.pNext = nullptr;
+  sampler_create_info.flags = 0;
+  sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
+
+  // Texture level filtering.
+  VkSamplerMipmapMode mip_filter;
+  switch (sampler_info.mip_filter) {
+    case TextureFilter::kBaseMap:
+      // TODO(DrChat): ?
+      mip_filter = VK_SAMPLER_MIPMAP_MODE_NEAREST;
+      break;
+    case TextureFilter::kPoint:
+      mip_filter = VK_SAMPLER_MIPMAP_MODE_NEAREST;
+      break;
+    case TextureFilter::kLinear:
+      mip_filter = VK_SAMPLER_MIPMAP_MODE_LINEAR;
+      break;
+    default:
+      assert_unhandled_case(sampler_info.mip_filter);
+      return nullptr;
+  }
+
+  VkFilter min_filter;
+  switch (sampler_info.min_filter) {
+    case TextureFilter::kPoint:
+      min_filter = VK_FILTER_NEAREST;
+      break;
+    case TextureFilter::kLinear:
+      min_filter = VK_FILTER_LINEAR;
+      break;
+    default:
+      assert_unhandled_case(sampler_info.min_filter);
+      return nullptr;
+  }
+  VkFilter mag_filter;
+  switch (sampler_info.mag_filter) {
+    case TextureFilter::kPoint:
+      mag_filter = VK_FILTER_NEAREST;
+      break;
+    case TextureFilter::kLinear:
+      mag_filter = VK_FILTER_LINEAR;
+      break;
+    default:
+      assert_unhandled_case(mag_filter);
+      return nullptr;
+  }
+
+  sampler_create_info.minFilter = min_filter;
+  sampler_create_info.magFilter = mag_filter;
+  sampler_create_info.mipmapMode = mip_filter;
+
+  // FIXME: Both halfway / mirror clamp to border aren't mapped properly.
+  VkSamplerAddressMode address_mode_map[] = {
+      /* kRepeat               */ VK_SAMPLER_ADDRESS_MODE_REPEAT,
+      /* kMirroredRepeat       */ VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT,
+      /* kClampToEdge          */ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+      /* kMirrorClampToEdge    */ VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE,
+      /* kClampToHalfway       */ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+      /* kMirrorClampToHalfway */ VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE,
+      /* kClampToBorder        */ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+      /* kMirrorClampToBorder  */ VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE,
+  };
+  sampler_create_info.addressModeU =
+      address_mode_map[static_cast<int>(sampler_info.clamp_u)];
+  sampler_create_info.addressModeV =
+      address_mode_map[static_cast<int>(sampler_info.clamp_v)];
+  sampler_create_info.addressModeW =
+      address_mode_map[static_cast<int>(sampler_info.clamp_w)];
+
+  sampler_create_info.mipLodBias = 0.0f;
+
+  float aniso = 0.f;
+  switch (sampler_info.aniso_filter) {
+    case AnisoFilter::kDisabled:
+      aniso = 1.0f;
+      break;
+    case AnisoFilter::kMax_1_1:
+      aniso = 1.0f;
+      break;
+    case AnisoFilter::kMax_2_1:
+      aniso = 2.0f;
+      break;
+    case AnisoFilter::kMax_4_1:
+      aniso = 4.0f;
+      break;
+    case AnisoFilter::kMax_8_1:
+      aniso = 8.0f;
+      break;
+    case AnisoFilter::kMax_16_1:
+      aniso = 16.0f;
+      break;
+    default:
+      assert_unhandled_case(aniso);
+      return nullptr;
+  }
+
+  sampler_create_info.anisotropyEnable =
+      sampler_info.aniso_filter != AnisoFilter::kDisabled ? VK_TRUE : VK_FALSE;
+  sampler_create_info.maxAnisotropy = aniso;
+
+  sampler_create_info.compareEnable = VK_FALSE;
+  sampler_create_info.compareOp = VK_COMPARE_OP_NEVER;
+  sampler_create_info.minLod = 0.0f;
+  sampler_create_info.maxLod = 0.0f;
+  sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
+  sampler_create_info.unnormalizedCoordinates = VK_FALSE;
+  VkSampler vk_sampler;
+  status =
+      vkCreateSampler(*device_, &sampler_create_info, nullptr, &vk_sampler);
+  CheckResult(status, "vkCreateSampler");
+  if (status != VK_SUCCESS) {
+    return nullptr;
+  }
+
+  auto sampler = new Sampler();
+  sampler->sampler = vk_sampler;
+  sampler->sampler_info = sampler_info;
+  samplers_[sampler_hash] = sampler;
+
+  return sampler;
+}
+
+TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address,
+                                                   uint32_t width,
+                                                   uint32_t height,
+                                                   TextureFormat format,
+                                                   VkOffset2D* out_offset) {
+  for (auto it = textures_.begin(); it != textures_.end(); ++it) {
+    const auto& texture_info = it->second->texture_info;
+    if (guest_address >= texture_info.guest_address &&
+        guest_address <
+            texture_info.guest_address + texture_info.input_length &&
+        texture_info.size_2d.input_width >= width &&
+        texture_info.size_2d.input_height >= height && out_offset) {
+      auto offset_bytes = guest_address - texture_info.guest_address;
+
+      if (texture_info.dimension == Dimension::k2D) {
+        out_offset->x = 0;
+        out_offset->y = offset_bytes / texture_info.size_2d.input_pitch;
+        if (offset_bytes % texture_info.size_2d.input_pitch != 0) {
+          // TODO: offset_x
+        }
+      }
+
+      return it->second;
+    }
+
+    if (texture_info.guest_address == guest_address &&
+        texture_info.dimension == Dimension::k2D &&
+        texture_info.size_2d.input_width == width &&
+        texture_info.size_2d.input_height == height) {
+      if (out_offset) {
+        out_offset->x = 0;
+        out_offset->y = 0;
+      }
+
+      return it->second;
+    }
+  }
+
+  // Check resolve textures
+  for (auto it = resolve_textures_.begin(); it != resolve_textures_.end();
+       ++it) {
+    const auto& texture_info = (*it)->texture_info;
+    if (texture_info.guest_address == guest_address &&
+        texture_info.dimension == Dimension::k2D &&
+        texture_info.size_2d.input_width == width &&
+        texture_info.size_2d.input_height == height) {
+      if (out_offset) {
+        out_offset->x = 0;
+        out_offset->y = 0;
+      }
+
+      return (*it);
+    }
+  }
+
+  return nullptr;
+}
+
+void TextureSwap(Endian endianness, void* dest, const void* src,
+                 size_t length) {
+  switch (endianness) {
+    case Endian::k8in16:
+      xe::copy_and_swap_16_aligned(dest, src, length / 2);
+      break;
+    case Endian::k8in32:
+      xe::copy_and_swap_32_aligned(dest, src, length / 4);
+      break;
+    case Endian::k16in32:  // Swap high and low 16 bits within a 32 bit word
+      xe::copy_and_swap_16_in_32_aligned(dest, src, length);
+      break;
+    default:
+    case Endian::kUnspecified:
+      std::memcpy(dest, src, length);
+      break;
+  }
+}
+
+bool TextureCache::UploadTexture2D(
+    VkCommandBuffer command_buffer,
+    std::shared_ptr<ui::vulkan::Fence> completion_fence, Texture* dest,
+    TextureInfo src) {
+#if FINE_GRAINED_DRAW_SCOPES
+  SCOPE_profile_cpu_f("gpu");
+#endif  // FINE_GRAINED_DRAW_SCOPES
+
+  assert_true(src.dimension == Dimension::k2D);
+
+  if (!staging_buffer_.CanAcquire(src.input_length)) {
+    // Need to have unique memory for every upload for at least one frame. If we
+    // run out of memory, we need to flush all queued upload commands to the
+    // GPU.
+    // TODO: Actually flush commands.
+    assert_always();
+  }
+
+  // Grab some temporary memory for staging.
+  size_t unpack_length = src.output_length;
+  auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence);
+  assert_not_null(alloc);
+
+  // Upload texture into GPU memory.
+  // TODO: If the GPU supports it, we can submit a compute batch to convert the
+  // texture and copy it to its destination. Otherwise, fallback to conversion
+  // on the CPU.
+  void* host_address = memory_->TranslatePhysical(src.guest_address);
+  if (!src.is_tiled) {
+    if (src.size_2d.input_pitch == src.size_2d.output_pitch) {
+      // Fast path copy entire image.
+      TextureSwap(src.endianness, alloc->host_ptr, host_address, unpack_length);
+    } else {
+      // Slow path copy row-by-row because strides differ.
+      // UNPACK_ROW_LENGTH only works for uncompressed images, and likely does
+      // this exact thing under the covers, so we just always do it here.
+      const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
+      uint8_t* dest = reinterpret_cast<uint8_t*>(alloc->host_ptr);
+      uint32_t pitch =
+          std::min(src.size_2d.input_pitch, src.size_2d.output_pitch);
+      for (uint32_t y = 0;
+           y < std::min(src.size_2d.block_height, src.size_2d.logical_height);
+           y++) {
+        TextureSwap(src.endianness, dest, src_mem, pitch);
+        src_mem += src.size_2d.input_pitch;
+        dest += src.size_2d.output_pitch;
+      }
+    }
+  } else {
+    // Untile image.
+    // We could do this in a shader to speed things up, as this is pretty slow.
+
+    // TODO(benvanik): optimize this inner loop (or work by tiles).
+    const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
+    uint8_t* dest = reinterpret_cast<uint8_t*>(alloc->host_ptr);
+    uint32_t bytes_per_block = src.format_info->block_width *
+                               src.format_info->block_height *
+                               src.format_info->bits_per_pixel / 8;
+
+    // Tiled textures can be packed; get the offset into the packed texture.
+    uint32_t offset_x;
+    uint32_t offset_y;
+    TextureInfo::GetPackedTileOffset(src, &offset_x, &offset_y);
+    auto bpp = (bytes_per_block >> 2) +
+               ((bytes_per_block >> 1) >> (bytes_per_block >> 2));
+    for (uint32_t y = 0, output_base_offset = 0;
+         y < std::min(src.size_2d.block_height, src.size_2d.logical_height);
+         y++, output_base_offset += src.size_2d.output_pitch) {
+      auto input_base_offset = TextureInfo::TiledOffset2DOuter(
+          offset_y + y,
+          (src.size_2d.input_width / src.format_info->block_width), bpp);
+      for (uint32_t x = 0, output_offset = output_base_offset;
+           x < src.size_2d.block_width; x++, output_offset += bytes_per_block) {
+        auto input_offset =
+            TextureInfo::TiledOffset2DInner(offset_x + x, offset_y + y, bpp,
+                                            input_base_offset) >>
+            bpp;
+        TextureSwap(src.endianness, dest + output_offset,
+                    src_mem + input_offset * bytes_per_block, bytes_per_block);
+      }
+    }
+  }
+
+  staging_buffer_.Flush(alloc);
+
+  // Transition the texture into a transfer destination layout.
+  VkImageMemoryBarrier barrier;
+  barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+  barrier.pNext = nullptr;
+  barrier.srcAccessMask = 0;
+  barrier.dstAccessMask =
+      VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_HOST_WRITE_BIT;
+  barrier.oldLayout = dest->image_layout;
+  barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+  barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  barrier.image = dest->image;
+  barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
+  vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &barrier);
+
+  // Now move the converted texture into the destination.
+  VkBufferImageCopy copy_region;
+  copy_region.bufferOffset = alloc->offset;
+  copy_region.bufferRowLength = src.size_2d.output_width;
+  copy_region.bufferImageHeight = src.size_2d.output_height;
+  copy_region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
+  copy_region.imageOffset = {0, 0, 0};
+  copy_region.imageExtent = {src.size_2d.output_width,
+                             src.size_2d.output_height, 1};
+  vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(),
+                         dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
+                         &copy_region);
+
+  // Now transition the texture into a shader readonly source.
+  barrier.srcAccessMask = barrier.dstAccessMask;
+  barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+  barrier.oldLayout = barrier.newLayout;
+  barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+  vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &barrier);
+
+  dest->image_layout = barrier.newLayout;
+  return true;
 }
 
 VkDescriptorSet TextureCache::PrepareTextureSet(
     VkCommandBuffer command_buffer,
+    std::shared_ptr<ui::vulkan::Fence> completion_fence,
     const std::vector<Shader::TextureBinding>& vertex_bindings,
     const std::vector<Shader::TextureBinding>& pixel_bindings) {
   // Clear state.
   auto update_set_info = &update_set_info_;
   update_set_info->has_setup_fetch_mask = 0;
-  update_set_info->image_1d_write_count = 0;
-  update_set_info->image_2d_write_count = 0;
-  update_set_info->image_3d_write_count = 0;
-  update_set_info->image_cube_write_count = 0;
+  update_set_info->image_write_count = 0;
 
   std::memset(update_set_info, 0, sizeof(update_set_info_));
 
@@ -178,10 +912,12 @@ VkDescriptorSet TextureCache::PrepareTextureSet(
   // This does things lazily and de-dupes fetch constants reused in both
   // shaders.
   bool any_failed = false;
-  any_failed =
-      !SetupTextureBindings(update_set_info, vertex_bindings) || any_failed;
-  any_failed =
-      !SetupTextureBindings(update_set_info, pixel_bindings) || any_failed;
+  any_failed = !SetupTextureBindings(command_buffer, completion_fence,
+                                     update_set_info, vertex_bindings) ||
+               any_failed;
+  any_failed = !SetupTextureBindings(command_buffer, completion_fence,
+                                     update_set_info, pixel_bindings) ||
+               any_failed;
   if (any_failed) {
     XELOGW("Failed to setup one or more texture bindings");
     // TODO(benvanik): actually bail out here?
@@ -199,75 +935,87 @@ VkDescriptorSet TextureCache::PrepareTextureSet(
       vkAllocateDescriptorSets(*device_, &set_alloc_info, &descriptor_set);
   CheckResult(err, "vkAllocateDescriptorSets");
 
-  // Write all updated descriptors.
-  // TODO(benvanik): optimize? split into multiple sets? set per type?
-  VkWriteDescriptorSet descriptor_writes[4];
-  std::memset(descriptor_writes, 0, sizeof(descriptor_writes));
-  uint32_t descriptor_write_count = 0;
-  if (update_set_info->sampler_write_count) {
-    auto& sampler_write = descriptor_writes[descriptor_write_count++];
-    sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-    sampler_write.pNext = nullptr;
-    sampler_write.dstSet = descriptor_set;
-    sampler_write.dstBinding = 0;
-    sampler_write.dstArrayElement = 0;
-    sampler_write.descriptorCount = update_set_info->sampler_write_count;
-    sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
-    sampler_write.pImageInfo = update_set_info->sampler_infos;
-  }
-  if (update_set_info->image_1d_write_count) {
-    auto& image_write = descriptor_writes[descriptor_write_count++];
-    image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-    image_write.pNext = nullptr;
-    image_write.dstSet = descriptor_set;
-    image_write.dstBinding = 1;
-    image_write.dstArrayElement = 0;
-    image_write.descriptorCount = update_set_info->image_1d_write_count;
-    image_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
-    image_write.pImageInfo = update_set_info->image_1d_infos;
-  }
-  if (update_set_info->image_2d_write_count) {
-    auto& image_write = descriptor_writes[descriptor_write_count++];
-    image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-    image_write.pNext = nullptr;
-    image_write.dstSet = descriptor_set;
-    image_write.dstBinding = 2;
-    image_write.dstArrayElement = 0;
-    image_write.descriptorCount = update_set_info->image_2d_write_count;
-    image_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
-    image_write.pImageInfo = update_set_info->image_2d_infos;
-  }
-  if (update_set_info->image_3d_write_count) {
-    auto& image_write = descriptor_writes[descriptor_write_count++];
-    image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-    image_write.pNext = nullptr;
-    image_write.dstSet = descriptor_set;
-    image_write.dstBinding = 3;
-    image_write.dstArrayElement = 0;
-    image_write.descriptorCount = update_set_info->image_3d_write_count;
-    image_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
-    image_write.pImageInfo = update_set_info->image_3d_infos;
-  }
-  if (update_set_info->image_cube_write_count) {
-    auto& image_write = descriptor_writes[descriptor_write_count++];
-    image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-    image_write.pNext = nullptr;
-    image_write.dstSet = descriptor_set;
-    image_write.dstBinding = 4;
-    image_write.dstArrayElement = 0;
-    image_write.descriptorCount = update_set_info->image_cube_write_count;
-    image_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
-    image_write.pImageInfo = update_set_info->image_cube_infos;
-  }
-  if (descriptor_write_count) {
-    vkUpdateDescriptorSets(*device_, descriptor_write_count, descriptor_writes,
-                           0, nullptr);
+  if (err != VK_SUCCESS) {
+    return nullptr;
   }
 
+  // Write all updated descriptors.
+  // TODO(benvanik): optimize? split into multiple sets? set per type?
+  // First: Reorganize and pool image update infos.
+  struct DescriptorInfo {
+    Dimension dimension;
+    uint32_t tf_binding_base;
+    std::vector<VkDescriptorImageInfo> infos;
+  };
+
+  std::vector<DescriptorInfo> descriptor_update_infos;
+  for (uint32_t i = 0; i < update_set_info->image_write_count; i++) {
+    auto& image_info = update_set_info->image_infos[i];
+    if (descriptor_update_infos.size() > 0) {
+      // Check last write to see if we can pool more into it.
+      DescriptorInfo& last_write =
+          descriptor_update_infos[descriptor_update_infos.size() - 1];
+      if (last_write.dimension == image_info.dimension &&
+          last_write.tf_binding_base + last_write.infos.size() ==
+              image_info.tf_binding) {
+        // Compatible! Pool into it.
+        last_write.infos.push_back(image_info.info);
+        continue;
+      }
+    }
+
+    // Push a new descriptor write entry.
+    DescriptorInfo desc_info;
+    desc_info.dimension = image_info.dimension;
+    desc_info.tf_binding_base = image_info.tf_binding;
+    desc_info.infos.push_back(image_info.info);
+    descriptor_update_infos.push_back(desc_info);
+  }
+
+  // Finalize the writes so they're consumable by Vulkan.
+  std::vector<VkWriteDescriptorSet> descriptor_writes;
+  descriptor_writes.resize(descriptor_update_infos.size());
+  for (size_t i = 0; i < descriptor_update_infos.size(); i++) {
+    auto& update_info = descriptor_update_infos[i];
+    auto& write_info = descriptor_writes[i];
+    std::memset(&write_info, 0, sizeof(VkWriteDescriptorSet));
+
+    write_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+    write_info.dstSet = descriptor_set;
+
+    switch (update_info.dimension) {
+      case Dimension::k1D:
+        write_info.dstBinding = 0;
+        break;
+      case Dimension::k2D:
+        write_info.dstBinding = 1;
+        break;
+      case Dimension::k3D:
+        write_info.dstBinding = 2;
+        break;
+      case Dimension::kCube:
+        write_info.dstBinding = 3;
+        break;
+    }
+
+    write_info.dstArrayElement = update_info.tf_binding_base;
+    write_info.descriptorCount = uint32_t(update_info.infos.size());
+    write_info.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+    write_info.pImageInfo = update_info.infos.data();
+  }
+
+  if (descriptor_writes.size() > 0) {
+    vkUpdateDescriptorSets(*device_, uint32_t(descriptor_writes.size()),
+                           descriptor_writes.data(), 0, nullptr);
+  }
+
+  in_flight_sets_.push_back({descriptor_set, completion_fence});
   return descriptor_set;
 }
 
 bool TextureCache::SetupTextureBindings(
+    VkCommandBuffer command_buffer,
+    std::shared_ptr<ui::vulkan::Fence> completion_fence,
     UpdateSetInfo* update_set_info,
     const std::vector<Shader::TextureBinding>& bindings) {
   bool any_failed = false;
@@ -275,15 +1023,23 @@ bool TextureCache::SetupTextureBindings(
     uint32_t fetch_bit = 1 << binding.fetch_constant;
     if ((update_set_info->has_setup_fetch_mask & fetch_bit) == 0) {
       // Needs setup.
-      any_failed = !SetupTextureBinding(update_set_info, binding) || any_failed;
+      any_failed = !SetupTextureBinding(command_buffer, completion_fence,
+                                        update_set_info, binding) ||
+                   any_failed;
       update_set_info->has_setup_fetch_mask |= fetch_bit;
     }
   }
   return !any_failed;
 }
 
-bool TextureCache::SetupTextureBinding(UpdateSetInfo* update_set_info,
-                                       const Shader::TextureBinding& binding) {
+bool TextureCache::SetupTextureBinding(
+    VkCommandBuffer command_buffer,
+    std::shared_ptr<ui::vulkan::Fence> completion_fence,
+    UpdateSetInfo* update_set_info, const Shader::TextureBinding& binding) {
+#if FINE_GRAINED_DRAW_SCOPES
+  SCOPE_profile_cpu_f("gpu");
+#endif  // FINE_GRAINED_DRAW_SCOPES
+
   auto& regs = *register_file_;
   int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6;
   auto group =
@@ -308,47 +1064,100 @@ bool TextureCache::SetupTextureBinding(UpdateSetInfo* update_set_info,
     return false;  // invalid texture used
   }
 
+  auto texture = Demand(texture_info, command_buffer, completion_fence);
+  auto sampler = Demand(sampler_info);
+  // assert_true(texture != nullptr && sampler != nullptr);
+  if (texture == nullptr || sampler == nullptr) {
+    return false;
+  }
+
+  uint16_t swizzle = static_cast<uint16_t>(fetch.swizzle);
+  auto view = DemandView(texture, swizzle);
+
   trace_writer_->WriteMemoryRead(texture_info.guest_address,
                                  texture_info.input_length);
 
-  // TODO(benvanik): reuse.
-  VkSamplerCreateInfo sampler_create_info;
-  sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
-  sampler_create_info.pNext = nullptr;
-  sampler_create_info.flags = 0;
-  sampler_create_info.magFilter = VK_FILTER_NEAREST;
-  sampler_create_info.minFilter = VK_FILTER_NEAREST;
-  sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
-  sampler_create_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
-  sampler_create_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
-  sampler_create_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
-  sampler_create_info.mipLodBias = 0.0f;
-  sampler_create_info.anisotropyEnable = VK_FALSE;
-  sampler_create_info.maxAnisotropy = 1.0f;
-  sampler_create_info.compareEnable = VK_FALSE;
-  sampler_create_info.compareOp = VK_COMPARE_OP_ALWAYS;
-  sampler_create_info.minLod = 0.0f;
-  sampler_create_info.maxLod = 0.0f;
-  sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
-  sampler_create_info.unnormalizedCoordinates = VK_FALSE;
-  VkSampler sampler;
-  auto err = vkCreateSampler(*device_, &sampler_create_info, nullptr, &sampler);
-  CheckResult(err, "vkCreateSampler");
-
-  auto& sampler_write =
-      update_set_info->sampler_infos[update_set_info->sampler_write_count++];
-  sampler_write.sampler = sampler;
-
-  auto& image_write =
-      update_set_info->image_2d_infos[update_set_info->image_2d_write_count++];
-  image_write.imageView = grid_image_2d_view_;
-  image_write.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+  auto image_write =
+      &update_set_info->image_infos[update_set_info->image_write_count++];
+  image_write->dimension = texture_info.dimension;
+  image_write->tf_binding = binding.fetch_constant;
+  image_write->info.imageView = view->view;
+  image_write->info.imageLayout = texture->image_layout;
+  image_write->info.sampler = sampler->sampler;
+  texture->in_flight_fence = completion_fence;
 
   return true;
 }
 
 void TextureCache::ClearCache() {
-  // TODO(benvanik): caching.
+  // TODO(DrChat): Nuke everything.
+}
+
+void TextureCache::Scavenge() {
+  // Free unused descriptor sets
+  for (auto it = in_flight_sets_.begin(); it != in_flight_sets_.end();) {
+    if (vkGetFenceStatus(*device_, *it->second) == VK_SUCCESS) {
+      // We can free this one.
+      vkFreeDescriptorSets(*device_, descriptor_pool_, 1, &it->first);
+      it = in_flight_sets_.erase(it);
+      continue;
+    }
+
+    // We've encountered an item that hasn't been used yet, so any items
+    // afterwards are guaranteed to be unused.
+    break;
+  }
+
+  staging_buffer_.Scavenge();
+
+  // Kill all pending delete textures.
+  if (!pending_delete_textures_.empty()) {
+    for (auto it = pending_delete_textures_.begin();
+         it != pending_delete_textures_.end();) {
+      if (!FreeTexture(*it)) {
+        break;
+      }
+
+      it = pending_delete_textures_.erase(it);
+    }
+  }
+
+  // Clean up any invalidated textures.
+  invalidated_textures_mutex_.lock();
+  std::vector<Texture*>& invalidated_textures = *invalidated_textures_;
+  if (invalidated_textures_ == &invalidated_textures_sets_[0]) {
+    invalidated_textures_ = &invalidated_textures_sets_[1];
+  } else {
+    invalidated_textures_ = &invalidated_textures_sets_[0];
+  }
+  invalidated_textures_mutex_.unlock();
+  if (!invalidated_textures.empty()) {
+    for (auto it = invalidated_textures.begin();
+         it != invalidated_textures.end(); ++it) {
+      pending_delete_textures_.push_back(*it);
+      textures_.erase((*it)->texture_info.hash());
+    }
+
+    invalidated_textures.clear();
+  }
+
+  // Invalidated resolve textures.
+  invalidated_resolve_textures_mutex_.lock();
+  if (!invalidated_resolve_textures_.empty()) {
+    for (auto it = invalidated_resolve_textures_.begin();
+         it != invalidated_resolve_textures_.end(); ++it) {
+      pending_delete_textures_.push_back(*it);
+
+      auto tex =
+          std::find(resolve_textures_.begin(), resolve_textures_.end(), *it);
+      if (tex != resolve_textures_.end()) {
+        resolve_textures_.erase(tex);
+      }
+    }
+
+    invalidated_resolve_textures_.clear();
+  }
+  invalidated_resolve_textures_mutex_.unlock();
 }
 
 }  // namespace vulkan
diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h
index 9ba3f3577..8f47f33df 100644
--- a/src/xenia/gpu/vulkan/texture_cache.h
+++ b/src/xenia/gpu/vulkan/texture_cache.h
@@ -10,10 +10,16 @@
 #ifndef XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
 #define XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
 
+#include <unordered_map>
+
 #include "xenia/gpu/register_file.h"
+#include "xenia/gpu/sampler_info.h"
 #include "xenia/gpu/shader.h"
+#include "xenia/gpu/texture_info.h"
 #include "xenia/gpu/trace_writer.h"
+#include "xenia/gpu/vulkan/vulkan_command_processor.h"
 #include "xenia/gpu/xenos.h"
+#include "xenia/ui/vulkan/circular_buffer.h"
 #include "xenia/ui/vulkan/vulkan.h"
 #include "xenia/ui/vulkan/vulkan_device.h"
 
@@ -24,8 +30,51 @@ namespace vulkan {
 //
 class TextureCache {
  public:
-  TextureCache(RegisterFile* register_file, TraceWriter* trace_writer,
-               ui::vulkan::VulkanDevice* device);
+  struct TextureView;
+
+  // This represents an uploaded Vulkan texture.
+  struct Texture {
+    TextureInfo texture_info;
+    std::vector<std::unique_ptr<TextureView>> views;
+
+    // True if we know all info about this texture, false otherwise.
+    // (e.g. we resolve to system memory and may not know the full details about
+    // this texture)
+    bool is_full_texture;
+    VkFormat format;
+    VkImage image;
+    VkImageLayout image_layout;
+    VkDeviceMemory image_memory;
+    VkDeviceSize memory_offset;
+    VkDeviceSize memory_size;
+
+    uintptr_t access_watch_handle;
+    bool pending_invalidation;
+
+    // Pointer to the latest usage fence.
+    std::shared_ptr<ui::vulkan::Fence> in_flight_fence;
+  };
+
+  struct TextureView {
+    Texture* texture;
+    VkImageView view;
+
+    union {
+      struct {
+        // FIXME: This only applies on little-endian platforms!
+        uint16_t swiz_x : 3;
+        uint16_t swiz_y : 3;
+        uint16_t swiz_z : 3;
+        uint16_t swiz_w : 3;
+        uint16_t : 4;
+      };
+
+      uint16_t swizzle;
+    };
+  };
+
+  TextureCache(Memory* memory, RegisterFile* register_file,
+               TraceWriter* trace_writer, ui::vulkan::VulkanDevice* device);
   ~TextureCache();
 
   // Descriptor set layout containing all possible texture bindings.
@@ -36,8 +85,11 @@ class TextureCache {
 
   // Prepares a descriptor set containing the samplers and images for all
   // bindings. The textures will be uploaded/converted/etc as needed.
+  // Requires a fence to be provided that will be signaled when finished
+  // using the returned descriptor set.
   VkDescriptorSet PrepareTextureSet(
-      VkCommandBuffer command_buffer,
+      VkCommandBuffer setup_command_buffer,
+      std::shared_ptr<ui::vulkan::Fence> completion_fence,
       const std::vector<Shader::TextureBinding>& vertex_bindings,
       const std::vector<Shader::TextureBinding>& pixel_bindings);
 
@@ -45,45 +97,106 @@ class TextureCache {
   // TODO(benvanik): Resolve.
   // TODO(benvanik): ReadTexture.
 
+  // Looks for a texture either containing or matching these parameters.
+  // Caller is responsible for checking if the texture returned is an exact
+  // match or just contains the texture given by the parameters.
+  // If offset_x and offset_y are not null, this may return a texture that
+  // contains this address at an offset.
+  Texture* LookupAddress(uint32_t guest_address, uint32_t width,
+                         uint32_t height, TextureFormat format,
+                         VkOffset2D* out_offset = nullptr);
+
+  // Demands a texture for the purpose of resolving from EDRAM. This either
+  // creates a new texture or returns a previously created texture. texture_info
+  // is not required to be completely filled out, just guest_address and all
+  // sizes.
+  //
+  // It's possible that this may return an image that is larger than the
+  // requested size (e.g. resolving into a bigger texture) or an image that
+  // must have an offset applied. If so, the caller must handle this.
+  // At the very least, it's guaranteed that the image will be large enough to
+  // hold the requested size.
+  Texture* DemandResolveTexture(const TextureInfo& texture_info,
+                                TextureFormat format, VkOffset2D* out_offset);
+
   // Clears all cached content.
   void ClearCache();
 
+  // Frees any unused resources
+  void Scavenge();
+
  private:
   struct UpdateSetInfo;
 
-  void SetupGridImages();
+  // Cached Vulkan sampler.
+  struct Sampler {
+    SamplerInfo sampler_info;
+    VkSampler sampler;
+  };
+
+  // Allocates a new texture and memory to back it on the GPU.
+  Texture* AllocateTexture(const TextureInfo& texture_info);
+  bool FreeTexture(Texture* texture);
+
+  // Demands a texture. If command_buffer is null and the texture hasn't been
+  // uploaded to graphics memory already, we will return null and bail.
+  Texture* Demand(
+      const TextureInfo& texture_info, VkCommandBuffer command_buffer = nullptr,
+      std::shared_ptr<ui::vulkan::Fence> completion_fence = nullptr);
+  TextureView* DemandView(Texture* texture, uint16_t swizzle);
+  Sampler* Demand(const SamplerInfo& sampler_info);
+
+  // Queues commands to upload a texture from system memory, applying any
+  // conversions necessary. This may flush the command buffer to the GPU if we
+  // run out of staging memory.
+  bool UploadTexture2D(VkCommandBuffer command_buffer,
+                       std::shared_ptr<ui::vulkan::Fence> completion_fence,
+                       Texture* dest, TextureInfo src);
 
   bool SetupTextureBindings(
+      VkCommandBuffer command_buffer,
+      std::shared_ptr<ui::vulkan::Fence> completion_fence,
       UpdateSetInfo* update_set_info,
       const std::vector<Shader::TextureBinding>& bindings);
-  bool SetupTextureBinding(UpdateSetInfo* update_set_info,
+  bool SetupTextureBinding(VkCommandBuffer command_buffer,
+                           std::shared_ptr<ui::vulkan::Fence> completion_fence,
+                           UpdateSetInfo* update_set_info,
                            const Shader::TextureBinding& binding);
 
+  Memory* memory_ = nullptr;
+
   RegisterFile* register_file_ = nullptr;
   TraceWriter* trace_writer_ = nullptr;
   ui::vulkan::VulkanDevice* device_ = nullptr;
 
   VkDescriptorPool descriptor_pool_ = nullptr;
   VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr;
+  std::list<std::pair<VkDescriptorSet, std::shared_ptr<ui::vulkan::Fence>>>
+      in_flight_sets_;
 
-  VkDeviceMemory grid_image_2d_memory_ = nullptr;
-  VkImage grid_image_2d_ = nullptr;
-  VkImageView grid_image_2d_view_ = nullptr;
+  ui::vulkan::CircularBuffer staging_buffer_;
+  std::unordered_map<uint64_t, Texture*> textures_;
+  std::unordered_map<uint64_t, Sampler*> samplers_;
+  std::vector<Texture*> resolve_textures_;
+  std::list<Texture*> pending_delete_textures_;
+
+  std::mutex invalidated_textures_mutex_;
+  std::vector<Texture*>* invalidated_textures_;
+  std::vector<Texture*> invalidated_textures_sets_[2];
+
+  std::mutex invalidated_resolve_textures_mutex_;
+  std::vector<Texture*> invalidated_resolve_textures_;
 
   struct UpdateSetInfo {
     // Bitmap of all 32 fetch constants and whether they have been setup yet.
     // This prevents duplication across the vertex and pixel shader.
     uint32_t has_setup_fetch_mask;
-    uint32_t sampler_write_count = 0;
-    VkDescriptorImageInfo sampler_infos[32];
-    uint32_t image_1d_write_count = 0;
-    VkDescriptorImageInfo image_1d_infos[32];
-    uint32_t image_2d_write_count = 0;
-    VkDescriptorImageInfo image_2d_infos[32];
-    uint32_t image_3d_write_count = 0;
-    VkDescriptorImageInfo image_3d_infos[32];
-    uint32_t image_cube_write_count = 0;
-    VkDescriptorImageInfo image_cube_infos[32];
+    uint32_t image_write_count = 0;
+    struct ImageSetInfo {
+      Dimension dimension;
+      uint32_t tf_binding;
+      VkDescriptorImageInfo info;
+    } image_infos[32];
   } update_set_info_;
 };
 
diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc
index f04ec1ad3..f31b28142 100644
--- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc
+++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc
@@ -37,9 +37,22 @@ VulkanCommandProcessor::VulkanCommandProcessor(
 
 VulkanCommandProcessor::~VulkanCommandProcessor() = default;
 
+void VulkanCommandProcessor::RequestFrameTrace(const std::wstring& root_path) {
+  // Override traces if renderdoc is attached.
+  if (device_->is_renderdoc_attached()) {
+    trace_requested_ = true;
+    return;
+  }
+
+  return CommandProcessor::RequestFrameTrace(root_path);
+}
+
 void VulkanCommandProcessor::ClearCaches() {
   CommandProcessor::ClearCaches();
 
+  auto status = vkQueueWaitIdle(queue_);
+  CheckResult(status, "vkQueueWaitIdle");
+
   buffer_cache_->ClearCache();
   pipeline_cache_->ClearCache();
   render_cache_->ClearCache();
@@ -69,8 +82,8 @@ bool VulkanCommandProcessor::SetupContext() {
   // Initialize the state machine caches.
   buffer_cache_ = std::make_unique<BufferCache>(register_file_, device_,
                                                 kDefaultBufferCacheCapacity);
-  texture_cache_ =
-      std::make_unique<TextureCache>(register_file_, &trace_writer_, device_);
+  texture_cache_ = std::make_unique<TextureCache>(memory_, register_file_,
+                                                  &trace_writer_, device_);
   pipeline_cache_ = std::make_unique<PipelineCache>(
       register_file_, device_, buffer_cache_->constant_descriptor_set_layout(),
       texture_cache_->texture_descriptor_set_layout());
@@ -82,6 +95,11 @@ bool VulkanCommandProcessor::SetupContext() {
 void VulkanCommandProcessor::ShutdownContext() {
   // TODO(benvanik): wait until idle.
 
+  if (swap_state_.front_buffer_texture) {
+    // Free swap chain images.
+    DestroySwapImages();
+  }
+
   buffer_cache_.reset();
   pipeline_cache_.reset();
   render_cache_.reset();
@@ -90,7 +108,7 @@ void VulkanCommandProcessor::ShutdownContext() {
   // Free all pools. This must come after all of our caches clean up.
   command_buffer_pool_.reset();
 
-  // Release queue, if were using an acquired one.
+  // Release queue, if we were using an acquired one.
   if (!queue_mutex_) {
     device_->ReleaseQueue(queue_);
     queue_ = nullptr;
@@ -131,24 +149,241 @@ void VulkanCommandProcessor::ReturnFromWait() {
   CommandProcessor::ReturnFromWait();
 }
 
+void VulkanCommandProcessor::CreateSwapImages(VkCommandBuffer setup_buffer,
+                                              VkExtent2D extents) {
+  VkImageCreateInfo image_info;
+  std::memset(&image_info, 0, sizeof(VkImageCreateInfo));
+  image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
+  image_info.imageType = VK_IMAGE_TYPE_2D;
+  image_info.format = VK_FORMAT_R8G8B8A8_UNORM;
+  image_info.extent = {extents.width, extents.height, 1};
+  image_info.mipLevels = 1;
+  image_info.arrayLayers = 1;
+  image_info.samples = VK_SAMPLE_COUNT_1_BIT;
+  image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
+  image_info.usage =
+      VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+  image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+  image_info.queueFamilyIndexCount = 0;
+  image_info.pQueueFamilyIndices = nullptr;
+  image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+
+  VkImage image_fb, image_bb;
+  auto status = vkCreateImage(*device_, &image_info, nullptr, &image_fb);
+  CheckResult(status, "vkCreateImage");
+
+  status = vkCreateImage(*device_, &image_info, nullptr, &image_bb);
+  CheckResult(status, "vkCreateImage");
+
+  // Bind memory to images.
+  VkMemoryRequirements mem_requirements;
+  vkGetImageMemoryRequirements(*device_, image_fb, &mem_requirements);
+  fb_memory = device_->AllocateMemory(mem_requirements, 0);
+  assert_not_null(fb_memory);
+
+  status = vkBindImageMemory(*device_, image_fb, fb_memory, 0);
+  CheckResult(status, "vkBindImageMemory");
+
+  vkGetImageMemoryRequirements(*device_, image_fb, &mem_requirements);
+  bb_memory = device_->AllocateMemory(mem_requirements, 0);
+  assert_not_null(bb_memory);
+
+  status = vkBindImageMemory(*device_, image_bb, bb_memory, 0);
+  CheckResult(status, "vkBindImageMemory");
+
+  std::lock_guard<std::mutex> lock(swap_state_.mutex);
+  swap_state_.front_buffer_texture = reinterpret_cast<uintptr_t>(image_fb);
+  swap_state_.back_buffer_texture = reinterpret_cast<uintptr_t>(image_bb);
+
+  // Transition both images to general layout.
+  VkImageMemoryBarrier barrier;
+  std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier));
+  barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+  barrier.srcAccessMask = 0;
+  barrier.dstAccessMask = 0;
+  barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+  barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
+  barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  barrier.image = image_fb;
+  barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
+
+  vkCmdPipelineBarrier(setup_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &barrier);
+
+  barrier.image = image_bb;
+
+  vkCmdPipelineBarrier(setup_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &barrier);
+}
+
+void VulkanCommandProcessor::DestroySwapImages() {
+  std::lock_guard<std::mutex> lock(swap_state_.mutex);
+  vkDestroyImage(*device_,
+                 reinterpret_cast<VkImage>(swap_state_.front_buffer_texture),
+                 nullptr);
+  vkDestroyImage(*device_,
+                 reinterpret_cast<VkImage>(swap_state_.back_buffer_texture),
+                 nullptr);
+  vkFreeMemory(*device_, fb_memory, nullptr);
+  vkFreeMemory(*device_, bb_memory, nullptr);
+
+  swap_state_.front_buffer_texture = 0;
+  swap_state_.back_buffer_texture = 0;
+  fb_memory = nullptr;
+  bb_memory = nullptr;
+}
+
 void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
                                          uint32_t frontbuffer_width,
                                          uint32_t frontbuffer_height) {
-  // Ensure we issue any pending draws.
-  // draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent);
+  SCOPE_profile_cpu_f("gpu");
 
-  // Need to finish to be sure the other context sees the right data.
-  // TODO(benvanik): prevent this? fences?
-  // glFinish();
-
-  if (context_->WasLost()) {
-    // We've lost the context due to a TDR.
-    // TODO: Dump the current commands to a tracefile.
-    assert_always();
+  // Build a final command buffer that copies the game's frontbuffer texture
+  // into our backbuffer texture.
+  VkCommandBuffer copy_commands = nullptr;
+  bool opened_batch;
+  if (command_buffer_pool_->has_open_batch()) {
+    copy_commands = command_buffer_pool_->AcquireEntry();
+    opened_batch = false;
+  } else {
+    command_buffer_pool_->BeginBatch();
+    copy_commands = command_buffer_pool_->AcquireEntry();
+    current_batch_fence_.reset(new ui::vulkan::Fence(*device_));
+    opened_batch = true;
   }
 
-  // Remove any dead textures, etc.
-  // texture_cache_.Scavenge();
+  VkCommandBufferBeginInfo begin_info;
+  std::memset(&begin_info, 0, sizeof(begin_info));
+  begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+  begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+  auto status = vkBeginCommandBuffer(copy_commands, &begin_info);
+  CheckResult(status, "vkBeginCommandBuffer");
+
+  if (!frontbuffer_ptr) {
+    // Trace viewer does this.
+    frontbuffer_ptr = last_copy_base_;
+  }
+
+  if (!swap_state_.back_buffer_texture) {
+    CreateSwapImages(copy_commands, {frontbuffer_width, frontbuffer_height});
+  }
+  auto swap_bb = reinterpret_cast<VkImage>(swap_state_.back_buffer_texture);
+
+  // Issue the commands to copy the game's frontbuffer to our backbuffer.
+  auto texture = texture_cache_->LookupAddress(
+      frontbuffer_ptr, xe::round_up(frontbuffer_width, 32),
+      xe::round_up(frontbuffer_height, 32), TextureFormat::k_8_8_8_8);
+  if (texture) {
+    texture->in_flight_fence = current_batch_fence_;
+
+    // Insert a barrier so the GPU finishes writing to the image.
+    VkImageMemoryBarrier barrier;
+    std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier));
+    barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+    barrier.srcAccessMask =
+        VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+    barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+    barrier.oldLayout = texture->image_layout;
+    barrier.newLayout = texture->image_layout;
+    barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    barrier.image = texture->image;
+    barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
+
+    vkCmdPipelineBarrier(copy_commands, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                         VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                         nullptr, 1, &barrier);
+
+    // Now issue a blit command.
+    VkImageBlit blit;
+    std::memset(&blit, 0, sizeof(VkImageBlit));
+    blit.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
+    blit.srcOffsets[0] = {0, 0, 0};
+    blit.srcOffsets[1] = {int32_t(frontbuffer_width),
+                          int32_t(frontbuffer_height), 1};
+    blit.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
+    blit.dstOffsets[0] = {0, 0, 0};
+    blit.dstOffsets[1] = {int32_t(frontbuffer_width),
+                          int32_t(frontbuffer_height), 1};
+
+    vkCmdBlitImage(copy_commands, texture->image, texture->image_layout,
+                   swap_bb, VK_IMAGE_LAYOUT_GENERAL, 1, &blit,
+                   VK_FILTER_LINEAR);
+
+    std::lock_guard<std::mutex> lock(swap_state_.mutex);
+    swap_state_.width = frontbuffer_width;
+    swap_state_.height = frontbuffer_height;
+  }
+
+  status = vkEndCommandBuffer(copy_commands);
+  CheckResult(status, "vkEndCommandBuffer");
+
+  // Queue up current command buffers.
+  // TODO(benvanik): bigger batches.
+  std::vector<VkCommandBuffer> submit_buffers;
+  if (current_command_buffer_) {
+    if (current_render_state_) {
+      render_cache_->EndRenderPass();
+      current_render_state_ = nullptr;
+    }
+
+    status = vkEndCommandBuffer(current_setup_buffer_);
+    CheckResult(status, "vkEndCommandBuffer");
+    status = vkEndCommandBuffer(current_command_buffer_);
+    CheckResult(status, "vkEndCommandBuffer");
+
+    // TODO(DrChat): If the setup buffer is empty, don't bother queueing it up.
+    submit_buffers.push_back(current_setup_buffer_);
+    submit_buffers.push_back(current_command_buffer_);
+
+    current_command_buffer_ = nullptr;
+    current_setup_buffer_ = nullptr;
+  }
+
+  submit_buffers.push_back(copy_commands);
+  if (!submit_buffers.empty()) {
+    // TODO(benvanik): move to CP or to host (trace dump, etc).
+    // This only needs to surround a vkQueueSubmit.
+    if (queue_mutex_) {
+      queue_mutex_->lock();
+    }
+
+    VkSubmitInfo submit_info;
+    std::memset(&submit_info, 0, sizeof(VkSubmitInfo));
+    submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+    submit_info.commandBufferCount = uint32_t(submit_buffers.size());
+    submit_info.pCommandBuffers = submit_buffers.data();
+    status = vkQueueSubmit(queue_, 1, &submit_info, *current_batch_fence_);
+    CheckResult(status, "vkQueueSubmit");
+
+    if (device_->is_renderdoc_attached() && capturing_) {
+      device_->EndRenderDocFrameCapture();
+      capturing_ = false;
+    }
+    if (queue_mutex_) {
+      queue_mutex_->unlock();
+    }
+  }
+
+  command_buffer_pool_->EndBatch(current_batch_fence_);
+
+  // Scavenging.
+  {
+#if FINE_GRAINED_DRAW_SCOPES
+    SCOPE_profile_cpu_i(
+        "gpu",
+        "xe::gpu::vulkan::VulkanCommandProcessor::PerformSwap Scavenging");
+#endif  // FINE_GRAINED_DRAW_SCOPES
+    command_buffer_pool_->Scavenge();
+
+    texture_cache_->Scavenge();
+    buffer_cache_->Scavenge();
+  }
+
+  current_batch_fence_ = nullptr;
 }
 
 Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type,
@@ -178,16 +413,16 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
     return IssueCopy();
   }
 
-  // TODO(benvanik): move to CP or to host (trace dump, etc).
-  if (FLAGS_vulkan_renderdoc_capture_all && device_->is_renderdoc_attached()) {
-    device_->BeginRenderDocFrameCapture();
+  if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) {
+    // Doesn't actually draw.
+    return true;
   }
 
   // Shaders will have already been defined by previous loads.
-  // We need the to do just about anything so validate here.
+  // We need them to do just about anything so validate here.
   auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader());
   auto pixel_shader = static_cast<VulkanShader*>(active_pixel_shader());
-  if (!vertex_shader || !vertex_shader->is_valid()) {
+  if (!vertex_shader) {
     // Always need a vertex shader.
     return true;
   }
@@ -196,61 +431,142 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
     // Use a dummy pixel shader when required.
     // TODO(benvanik): dummy pixel shader.
     assert_not_null(pixel_shader);
-  } else if (!pixel_shader || !pixel_shader->is_valid()) {
+  } else if (!pixel_shader) {
     // Need a pixel shader in normal color mode.
     return true;
   }
 
-  // TODO(benvanik): bigger batches.
-  command_buffer_pool_->BeginBatch();
-  VkCommandBuffer command_buffer = command_buffer_pool_->AcquireEntry();
-  VkCommandBufferBeginInfo command_buffer_begin_info;
-  command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-  command_buffer_begin_info.pNext = nullptr;
-  command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-  command_buffer_begin_info.pInheritanceInfo = nullptr;
-  auto err = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info);
-  CheckResult(err, "vkBeginCommandBuffer");
+  bool started_command_buffer = false;
+  if (!current_command_buffer_) {
+    // TODO(benvanik): bigger batches.
+    // TODO(DrChat): Decouple setup buffer from current batch.
+    command_buffer_pool_->BeginBatch();
+    current_command_buffer_ = command_buffer_pool_->AcquireEntry();
+    current_setup_buffer_ = command_buffer_pool_->AcquireEntry();
+    current_batch_fence_.reset(new ui::vulkan::Fence(*device_));
+
+    VkCommandBufferBeginInfo command_buffer_begin_info;
+    command_buffer_begin_info.sType =
+        VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+    command_buffer_begin_info.pNext = nullptr;
+    command_buffer_begin_info.flags =
+        VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+    command_buffer_begin_info.pInheritanceInfo = nullptr;
+    auto status = vkBeginCommandBuffer(current_command_buffer_,
+                                       &command_buffer_begin_info);
+    CheckResult(status, "vkBeginCommandBuffer");
+
+    status =
+        vkBeginCommandBuffer(current_setup_buffer_, &command_buffer_begin_info);
+    CheckResult(status, "vkBeginCommandBuffer");
+
+    static uint32_t frame = 0;
+    if (device_->is_renderdoc_attached() && !capturing_ &&
+        (FLAGS_vulkan_renderdoc_capture_all || trace_requested_)) {
+      if (queue_mutex_) {
+        queue_mutex_->lock();
+      }
+
+      capturing_ = true;
+      trace_requested_ = false;
+      device_->BeginRenderDocFrameCapture();
+
+      if (queue_mutex_) {
+        queue_mutex_->unlock();
+      }
+    }
+
+    started_command_buffer = true;
+  }
+  auto command_buffer = current_command_buffer_;
+  auto setup_buffer = current_setup_buffer_;
 
   // Begin the render pass.
   // This will setup our framebuffer and begin the pass in the command buffer.
-  auto render_state = render_cache_->BeginRenderPass(
-      command_buffer, vertex_shader, pixel_shader);
-  if (!render_state) {
-    return false;
+  // This reuses a previous render pass if one is already open.
+  if (render_cache_->dirty() || !current_render_state_) {
+    if (current_render_state_) {
+      render_cache_->EndRenderPass();
+      current_render_state_ = nullptr;
+    }
+
+    current_render_state_ = render_cache_->BeginRenderPass(
+        command_buffer, vertex_shader, pixel_shader);
+    if (!current_render_state_) {
+      command_buffer_pool_->CancelBatch();
+      current_command_buffer_ = nullptr;
+      current_setup_buffer_ = nullptr;
+      current_batch_fence_ = nullptr;
+      return false;
+    }
   }
 
   // Configure the pipeline for drawing.
   // This encodes all render state (blend, depth, etc), our shader stages,
   // and our vertex input layout.
-  if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_state,
-                                          vertex_shader, pixel_shader,
-                                          primitive_type)) {
+  VkPipeline pipeline = nullptr;
+  auto pipeline_status = pipeline_cache_->ConfigurePipeline(
+      command_buffer, current_render_state_, vertex_shader, pixel_shader,
+      primitive_type, &pipeline);
+  if (pipeline_status == PipelineCache::UpdateStatus::kMismatch ||
+      started_command_buffer) {
+    vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                      pipeline);
+  } else if (pipeline_status == PipelineCache::UpdateStatus::kError) {
     render_cache_->EndRenderPass();
+    command_buffer_pool_->CancelBatch();
+    current_command_buffer_ = nullptr;
+    current_setup_buffer_ = nullptr;
+    current_batch_fence_ = nullptr;
+    current_render_state_ = nullptr;
     return false;
   }
+  pipeline_cache_->SetDynamicState(command_buffer, started_command_buffer);
 
   // Pass registers to the shaders.
   if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) {
     render_cache_->EndRenderPass();
+    command_buffer_pool_->CancelBatch();
+    current_command_buffer_ = nullptr;
+    current_setup_buffer_ = nullptr;
+    current_batch_fence_ = nullptr;
+    current_render_state_ = nullptr;
     return false;
   }
 
   // Upload and bind index buffer data (if we have any).
   if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) {
     render_cache_->EndRenderPass();
+    command_buffer_pool_->CancelBatch();
+    current_command_buffer_ = nullptr;
+    current_setup_buffer_ = nullptr;
+    current_batch_fence_ = nullptr;
+    current_render_state_ = nullptr;
     return false;
   }
 
   // Upload and bind all vertex buffer data.
   if (!PopulateVertexBuffers(command_buffer, vertex_shader)) {
     render_cache_->EndRenderPass();
+    command_buffer_pool_->CancelBatch();
+    current_command_buffer_ = nullptr;
+    current_setup_buffer_ = nullptr;
+    current_batch_fence_ = nullptr;
+    current_render_state_ = nullptr;
     return false;
   }
 
-  // Upload and set descriptors for all textures.
-  if (!PopulateSamplers(command_buffer, vertex_shader, pixel_shader)) {
+  // Bind samplers/textures.
+  // Uploads all textures that need it.
+  // Setup buffer may be flushed to GPU if the texture cache needs it.
+  if (!PopulateSamplers(command_buffer, setup_buffer, vertex_shader,
+                        pixel_shader)) {
     render_cache_->EndRenderPass();
+    command_buffer_pool_->CancelBatch();
+    current_command_buffer_ = nullptr;
+    current_setup_buffer_ = nullptr;
+    current_batch_fence_ = nullptr;
+    current_render_state_ = nullptr;
     return false;
   }
 
@@ -273,68 +589,21 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
                      vertex_offset, first_instance);
   }
 
-  // End the rendering pass.
-  render_cache_->EndRenderPass();
-
-  // TODO(benvanik): bigger batches.
-  err = vkEndCommandBuffer(command_buffer);
-  CheckResult(err, "vkEndCommandBuffer");
-  VkFence fence;
-  VkFenceCreateInfo fence_info;
-  fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
-  fence_info.pNext = nullptr;
-  fence_info.flags = 0;
-  vkCreateFence(*device_, &fence_info, nullptr, &fence);
-  command_buffer_pool_->EndBatch(fence);
-  VkSubmitInfo submit_info;
-  submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-  submit_info.pNext = nullptr;
-  submit_info.waitSemaphoreCount = 0;
-  submit_info.pWaitSemaphores = nullptr;
-  submit_info.commandBufferCount = 1;
-  submit_info.pCommandBuffers = &command_buffer;
-  submit_info.signalSemaphoreCount = 0;
-  submit_info.pSignalSemaphores = nullptr;
-  if (queue_mutex_) {
-    queue_mutex_->lock();
-  }
-  err = vkQueueSubmit(queue_, 1, &submit_info, fence);
-  if (queue_mutex_) {
-    queue_mutex_->unlock();
-  }
-  CheckResult(err, "vkQueueSubmit");
-  if (queue_mutex_) {
-    queue_mutex_->lock();
-  }
-  err = vkQueueWaitIdle(queue_);
-  CheckResult(err, "vkQueueWaitIdle");
-  err = vkDeviceWaitIdle(*device_);
-  CheckResult(err, "vkDeviceWaitIdle");
-  if (queue_mutex_) {
-    queue_mutex_->unlock();
-  }
-  while (command_buffer_pool_->has_pending()) {
-    command_buffer_pool_->Scavenge();
-    xe::threading::MaybeYield();
-  }
-  vkDestroyFence(*device_, fence, nullptr);
-
-  // TODO(benvanik): move to CP or to host (trace dump, etc).
-  if (FLAGS_vulkan_renderdoc_capture_all && device_->is_renderdoc_attached()) {
-    device_->EndRenderDocFrameCapture();
-  }
-
   return true;
 }
 
 bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer,
                                                VulkanShader* vertex_shader,
                                                VulkanShader* pixel_shader) {
+#if FINE_GRAINED_DRAW_SCOPES
+  SCOPE_profile_cpu_f("gpu");
+#endif  // FINE_GRAINED_DRAW_SCOPES
+
   // Upload the constants the shaders require.
   // These are optional, and if none are defined 0 will be returned.
   auto constant_offsets = buffer_cache_->UploadConstantRegisters(
       vertex_shader->constant_register_map(),
-      pixel_shader->constant_register_map());
+      pixel_shader->constant_register_map(), current_batch_fence_);
   if (constant_offsets.first == VK_WHOLE_SIZE ||
       constant_offsets.second == VK_WHOLE_SIZE) {
     // Shader wants constants but we couldn't upload them.
@@ -387,8 +656,8 @@ bool VulkanCommandProcessor::PopulateIndexBuffer(
   size_t source_length =
       info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
                                                        : sizeof(uint16_t));
-  auto buffer_ref =
-      buffer_cache_->UploadIndexBuffer(source_ptr, source_length, info.format);
+  auto buffer_ref = buffer_cache_->UploadIndexBuffer(
+      source_ptr, source_length, info.format, current_batch_fence_);
   if (buffer_ref.second == VK_WHOLE_SIZE) {
     // Failed to upload buffer.
     return false;
@@ -413,6 +682,11 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
 #endif  // FINE_GRAINED_DRAW_SCOPES
 
   auto& vertex_bindings = vertex_shader->vertex_bindings();
+  if (vertex_bindings.empty()) {
+    // No bindings.
+    return true;
+  }
+
   assert_true(vertex_bindings.size() <= 32);
   VkBuffer all_buffers[32];
   VkDeviceSize all_buffer_offsets[32];
@@ -434,7 +708,6 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
         fetch = &group->vertex_fetch_2;
         break;
     }
-    assert_true(fetch->endian == 2);
 
     // TODO(benvanik): compute based on indices or vertex count.
     //     THIS CAN BE MASSIVELY INCORRECT (too large).
@@ -446,8 +719,9 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
     const void* source_ptr =
         memory_->TranslatePhysical<const void*>(fetch->address << 2);
     size_t source_length = valid_range;
-    auto buffer_ref =
-        buffer_cache_->UploadVertexBuffer(source_ptr, source_length);
+    auto buffer_ref = buffer_cache_->UploadVertexBuffer(
+        source_ptr, source_length, static_cast<Endian>(fetch->endian),
+        current_batch_fence_);
     if (buffer_ref.second == VK_WHOLE_SIZE) {
       // Failed to upload buffer.
       return false;
@@ -467,6 +741,7 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
 }
 
 bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer,
+                                              VkCommandBuffer setup_buffer,
                                               VulkanShader* vertex_shader,
                                               VulkanShader* pixel_shader) {
 #if FINE_GRAINED_DRAW_SCOPES
@@ -474,14 +749,13 @@ bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer,
 #endif  // FINE_GRAINED_DRAW_SCOPES
 
   auto descriptor_set = texture_cache_->PrepareTextureSet(
-      command_buffer, vertex_shader->texture_bindings(),
+      setup_buffer, current_batch_fence_, vertex_shader->texture_bindings(),
       pixel_shader->texture_bindings());
   if (!descriptor_set) {
     // Unable to bind set.
     return false;
   }
 
-  // Bind samplers/textures.
   vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
                           pipeline_cache_->pipeline_layout(), 1, 1,
                           &descriptor_set, 0, nullptr);
@@ -491,7 +765,294 @@ bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer,
 
 bool VulkanCommandProcessor::IssueCopy() {
   SCOPE_profile_cpu_f("gpu");
-  // TODO(benvanik): resolve.
+  auto& regs = *register_file_;
+
+  // This is used to resolve surfaces, taking them from EDRAM render targets
+  // to system memory. It can optionally clear color/depth surfaces, too.
+  // The command buffer has stuff for actually doing this by drawing, however
+  // we should be able to do it without that much easier.
+
+  uint32_t copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
+  // Render targets 0-3, 4 = depth
+  uint32_t copy_src_select = copy_control & 0x7;
+  bool color_clear_enabled = (copy_control >> 8) & 0x1;
+  bool depth_clear_enabled = (copy_control >> 9) & 0x1;
+  auto copy_command = static_cast<CopyCommand>((copy_control >> 20) & 0x3);
+
+  uint32_t copy_dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32;
+  auto copy_dest_endian = static_cast<Endian128>(copy_dest_info & 0x7);
+  uint32_t copy_dest_array = (copy_dest_info >> 3) & 0x1;
+  assert_true(copy_dest_array == 0);
+  uint32_t copy_dest_slice = (copy_dest_info >> 4) & 0x7;
+  assert_true(copy_dest_slice == 0);
+  auto copy_dest_format =
+      static_cast<ColorFormat>((copy_dest_info >> 7) & 0x3F);
+  uint32_t copy_dest_number = (copy_dest_info >> 13) & 0x7;
+  // assert_true(copy_dest_number == 0); // ?
+  uint32_t copy_dest_bias = (copy_dest_info >> 16) & 0x3F;
+  // assert_true(copy_dest_bias == 0);
+  uint32_t copy_dest_swap = (copy_dest_info >> 25) & 0x1;
+
+  uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
+  uint32_t copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32;
+  uint32_t copy_dest_height = (copy_dest_pitch >> 16) & 0x3FFF;
+  copy_dest_pitch &= 0x3FFF;
+
+  // None of this is supported yet:
+  uint32_t copy_surface_slice = regs[XE_GPU_REG_RB_COPY_SURFACE_SLICE].u32;
+  assert_true(copy_surface_slice == 0);
+  uint32_t copy_func = regs[XE_GPU_REG_RB_COPY_FUNC].u32;
+  assert_true(copy_func == 0);
+  uint32_t copy_ref = regs[XE_GPU_REG_RB_COPY_REF].u32;
+  assert_true(copy_ref == 0);
+  uint32_t copy_mask = regs[XE_GPU_REG_RB_COPY_MASK].u32;
+  assert_true(copy_mask == 0);
+
+  // Supported in GL4, not supported here yet.
+  assert_zero(copy_dest_swap);
+
+  // RB_SURFACE_INFO
+  // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
+  uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
+  uint32_t surface_pitch = surface_info & 0x3FFF;
+  auto surface_msaa = static_cast<MsaaSamples>((surface_info >> 16) & 0x3);
+
+  // TODO(benvanik): any way to scissor this? a200 has:
+  // REG_A2XX_RB_COPY_DEST_OFFSET = A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
+  //                                A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff);
+  // but I can't seem to find something similar.
+  uint32_t dest_logical_width = copy_dest_pitch;
+  uint32_t dest_logical_height = copy_dest_height;
+  uint32_t dest_block_width = xe::round_up(dest_logical_width, 32);
+  uint32_t dest_block_height = xe::round_up(dest_logical_height, 32);
+
+  uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
+  int16_t window_offset_x = window_offset & 0x7FFF;
+  int16_t window_offset_y = (window_offset >> 16) & 0x7FFF;
+  // Sign-extension
+  if (window_offset_x & 0x4000) {
+    window_offset_x |= 0x8000;
+  }
+  if (window_offset_y & 0x4000) {
+    window_offset_y |= 0x8000;
+  }
+
+  size_t read_size = GetTexelSize(ColorFormatToTextureFormat(copy_dest_format));
+
+  // Adjust the copy base offset to point to the beginning of the texture, so
+  // we don't run into hiccups down the road (e.g. resolving the last part going
+  // backwards).
+  int32_t dest_offset = window_offset_y * copy_dest_pitch * int(read_size);
+  dest_offset += window_offset_x * 32 * int(read_size);
+  copy_dest_base += dest_offset;
+
+  // HACK: vertices to use are always in vf0.
+  int copy_vertex_fetch_slot = 0;
+  int r =
+      XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (copy_vertex_fetch_slot / 3) * 6;
+  const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(&regs.values[r]);
+  const xe_gpu_vertex_fetch_t* fetch = nullptr;
+  switch (copy_vertex_fetch_slot % 3) {
+    case 0:
+      fetch = &group->vertex_fetch_0;
+      break;
+    case 1:
+      fetch = &group->vertex_fetch_1;
+      break;
+    case 2:
+      fetch = &group->vertex_fetch_2;
+      break;
+  }
+  assert_true(fetch->type == 3);
+  assert_true(fetch->endian == 2);
+  assert_true(fetch->size == 6);
+  const uint8_t* vertex_addr = memory_->TranslatePhysical(fetch->address << 2);
+  trace_writer_.WriteMemoryRead(fetch->address << 2, fetch->size * 4);
+  int32_t dest_min_x = int32_t((std::min(
+      std::min(
+          GpuSwap(xe::load<float>(vertex_addr + 0), Endian(fetch->endian)),
+          GpuSwap(xe::load<float>(vertex_addr + 8), Endian(fetch->endian))),
+      GpuSwap(xe::load<float>(vertex_addr + 16), Endian(fetch->endian)))));
+  int32_t dest_max_x = int32_t((std::max(
+      std::max(
+          GpuSwap(xe::load<float>(vertex_addr + 0), Endian(fetch->endian)),
+          GpuSwap(xe::load<float>(vertex_addr + 8), Endian(fetch->endian))),
+      GpuSwap(xe::load<float>(vertex_addr + 16), Endian(fetch->endian)))));
+  int32_t dest_min_y = int32_t((std::min(
+      std::min(
+          GpuSwap(xe::load<float>(vertex_addr + 4), Endian(fetch->endian)),
+          GpuSwap(xe::load<float>(vertex_addr + 12), Endian(fetch->endian))),
+      GpuSwap(xe::load<float>(vertex_addr + 20), Endian(fetch->endian)))));
+  int32_t dest_max_y = int32_t((std::max(
+      std::max(
+          GpuSwap(xe::load<float>(vertex_addr + 4), Endian(fetch->endian)),
+          GpuSwap(xe::load<float>(vertex_addr + 12), Endian(fetch->endian))),
+      GpuSwap(xe::load<float>(vertex_addr + 20), Endian(fetch->endian)))));
+
+  uint32_t color_edram_base = 0;
+  uint32_t depth_edram_base = 0;
+  ColorRenderTargetFormat color_format;
+  DepthRenderTargetFormat depth_format;
+  if (copy_src_select <= 3) {
+    // Source from a color target.
+    uint32_t color_info[4] = {
+        regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
+        regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
+        regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
+    };
+    color_edram_base = color_info[copy_src_select] & 0xFFF;
+
+    color_format = static_cast<ColorRenderTargetFormat>(
+        (color_info[copy_src_select] >> 16) & 0xF);
+  }
+
+  if (copy_src_select > 3 || depth_clear_enabled) {
+    // Source from a depth target.
+    uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
+    depth_edram_base = depth_info & 0xFFF;
+
+    depth_format =
+        static_cast<DepthRenderTargetFormat>((depth_info >> 16) & 0x1);
+  }
+
+  // Demand a resolve texture from the texture cache.
+  TextureInfo tex_info = {};
+  tex_info.guest_address = copy_dest_base;
+  tex_info.width = dest_logical_width - 1;
+  tex_info.height = dest_logical_height - 1;
+  tex_info.dimension = gpu::Dimension::k2D;
+  tex_info.input_length = copy_dest_pitch * copy_dest_height * 4;
+  tex_info.format_info =
+      FormatInfo::Get(uint32_t(ColorFormatToTextureFormat(copy_dest_format)));
+  tex_info.size_2d.logical_width = dest_logical_width;
+  tex_info.size_2d.logical_height = dest_logical_height;
+  tex_info.size_2d.block_width = dest_block_width;
+  tex_info.size_2d.block_height = dest_block_height;
+  tex_info.size_2d.input_width = dest_block_width;
+  tex_info.size_2d.input_height = dest_block_height;
+  tex_info.size_2d.input_pitch = copy_dest_pitch * 4;
+  auto texture = texture_cache_->DemandResolveTexture(
+      tex_info, ColorFormatToTextureFormat(copy_dest_format), nullptr);
+  assert_not_null(texture);
+  texture->in_flight_fence = current_batch_fence_;
+
+  // For debugging purposes only (trace viewer)
+  last_copy_base_ = texture->texture_info.guest_address;
+
+  if (!current_command_buffer_) {
+    command_buffer_pool_->BeginBatch();
+    current_command_buffer_ = command_buffer_pool_->AcquireEntry();
+    current_setup_buffer_ = command_buffer_pool_->AcquireEntry();
+    current_batch_fence_.reset(new ui::vulkan::Fence(*device_));
+
+    VkCommandBufferBeginInfo command_buffer_begin_info;
+    command_buffer_begin_info.sType =
+        VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+    command_buffer_begin_info.pNext = nullptr;
+    command_buffer_begin_info.flags =
+        VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+    command_buffer_begin_info.pInheritanceInfo = nullptr;
+    auto status = vkBeginCommandBuffer(current_command_buffer_,
+                                       &command_buffer_begin_info);
+    CheckResult(status, "vkBeginCommandBuffer");
+
+    status =
+        vkBeginCommandBuffer(current_setup_buffer_, &command_buffer_begin_info);
+    CheckResult(status, "vkBeginCommandBuffer");
+  } else if (current_render_state_) {
+    render_cache_->EndRenderPass();
+    current_render_state_ = nullptr;
+  }
+  auto command_buffer = current_command_buffer_;
+
+  if (texture->image_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
+    // Transition the image to a general layout.
+    VkImageMemoryBarrier image_barrier;
+    image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+    image_barrier.pNext = nullptr;
+    image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    image_barrier.srcAccessMask = 0;
+    image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+    image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
+    image_barrier.image = texture->image;
+    image_barrier.subresourceRange = {0, 0, 1, 0, 1};
+    image_barrier.subresourceRange.aspectMask =
+        copy_src_select <= 3
+            ? VK_IMAGE_ASPECT_COLOR_BIT
+            : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+    texture->image_layout = VK_IMAGE_LAYOUT_GENERAL;
+
+    vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                         VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                         nullptr, 1, &image_barrier);
+  }
+
+  VkOffset3D resolve_offset = {dest_min_x, dest_min_y, 0};
+  VkExtent3D resolve_extent = {uint32_t(dest_max_x - dest_min_x),
+                               uint32_t(dest_max_y - dest_min_y), 1};
+
+  // Ask the render cache to copy to the resolve texture.
+  auto edram_base = copy_src_select <= 3 ? color_edram_base : depth_edram_base;
+  uint32_t src_format = copy_src_select <= 3
+                            ? static_cast<uint32_t>(color_format)
+                            : static_cast<uint32_t>(depth_format);
+  switch (copy_command) {
+    case CopyCommand::kRaw:
+    /*
+      render_cache_->RawCopyToImage(command_buffer, edram_base, texture->image,
+                                    texture->image_layout, copy_src_select <= 3,
+                                    resolve_offset, resolve_extent);
+      break;
+    */
+    case CopyCommand::kConvert:
+      render_cache_->BlitToImage(
+          command_buffer, edram_base, surface_pitch, resolve_extent.height,
+          surface_msaa, texture->image, texture->image_layout,
+          copy_src_select <= 3, src_format, VK_FILTER_LINEAR, resolve_offset,
+          resolve_extent);
+      break;
+
+    case CopyCommand::kConstantOne:
+    case CopyCommand::kNull:
+      assert_always();
+      break;
+  }
+
+  // Perform any requested clears.
+  uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
+  uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
+  uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32;
+  assert_true(copy_color_clear == copy_color_clear_low);
+
+  if (color_clear_enabled) {
+    // If color clear is enabled, we can only clear a selected color target!
+    assert_true(copy_src_select <= 3);
+
+    // TODO(benvanik): verify color order.
+    float color[] = {((copy_color_clear >> 0) & 0xFF) / 255.0f,
+                     ((copy_color_clear >> 8) & 0xFF) / 255.0f,
+                     ((copy_color_clear >> 16) & 0xFF) / 255.0f,
+                     ((copy_color_clear >> 24) & 0xFF) / 255.0f};
+
+    // TODO(DrChat): Do we know the surface height at this point?
+    render_cache_->ClearEDRAMColor(command_buffer, color_edram_base,
+                                   color_format, surface_pitch,
+                                   resolve_extent.height, surface_msaa, color);
+  }
+
+  if (depth_clear_enabled) {
+    float depth =
+        (copy_depth_clear & 0xFFFFFF00) / static_cast<float>(0xFFFFFF00);
+    uint8_t stencil = copy_depth_clear & 0xFF;
+
+    // TODO(DrChat): Do we know the surface height at this point?
+    render_cache_->ClearEDRAMDepthStencil(
+        command_buffer, depth_edram_base, depth_format, surface_pitch,
+        resolve_extent.height, surface_msaa, depth, stencil);
+  }
+
   return true;
 }
 
diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h
index 43aec9edd..f58e2319b 100644
--- a/src/xenia/gpu/vulkan/vulkan_command_processor.h
+++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h
@@ -34,12 +34,14 @@
 #include "xenia/ui/vulkan/fenced_pools.h"
 #include "xenia/ui/vulkan/vulkan_context.h"
 #include "xenia/ui/vulkan/vulkan_device.h"
+#include "xenia/ui/vulkan/vulkan_util.h"
 
 namespace xe {
 namespace gpu {
 namespace vulkan {
 
 class VulkanGraphicsSystem;
+class TextureCache;
 
 class VulkanCommandProcessor : public CommandProcessor {
  public:
@@ -47,8 +49,11 @@ class VulkanCommandProcessor : public CommandProcessor {
                          kernel::KernelState* kernel_state);
   ~VulkanCommandProcessor() override;
 
+  virtual void RequestFrameTrace(const std::wstring& root_path) override;
   void ClearCaches() override;
 
+  RenderCache* render_cache() { return render_cache_.get(); }
+
  private:
   bool SetupContext() override;
   void ShutdownContext() override;
@@ -57,6 +62,9 @@ class VulkanCommandProcessor : public CommandProcessor {
   void PrepareForWait() override;
   void ReturnFromWait() override;
 
+  void CreateSwapImages(VkCommandBuffer setup_buffer, VkExtent2D extents);
+  void DestroySwapImages();
+
   void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width,
                    uint32_t frontbuffer_height) override;
 
@@ -74,12 +82,17 @@ class VulkanCommandProcessor : public CommandProcessor {
   bool PopulateVertexBuffers(VkCommandBuffer command_buffer,
                              VulkanShader* vertex_shader);
   bool PopulateSamplers(VkCommandBuffer command_buffer,
+                        VkCommandBuffer setup_buffer,
                         VulkanShader* vertex_shader,
                         VulkanShader* pixel_shader);
   bool IssueCopy() override;
 
   xe::ui::vulkan::VulkanDevice* device_ = nullptr;
 
+  // front buffer / back buffer memory
+  VkDeviceMemory fb_memory = nullptr;
+  VkDeviceMemory bb_memory = nullptr;
+
   // TODO(benvanik): abstract behind context?
   // Queue used to submit work. This may be a dedicated queue for the command
   // processor and no locking will be required for use. If a dedicated queue
@@ -88,12 +101,22 @@ class VulkanCommandProcessor : public CommandProcessor {
   VkQueue queue_ = nullptr;
   std::mutex* queue_mutex_ = nullptr;
 
+  // Last copy base address, for debugging only.
+  uint32_t last_copy_base_ = 0;
+  bool capturing_ = false;
+  bool trace_requested_ = false;
+
   std::unique_ptr<BufferCache> buffer_cache_;
   std::unique_ptr<PipelineCache> pipeline_cache_;
   std::unique_ptr<RenderCache> render_cache_;
   std::unique_ptr<TextureCache> texture_cache_;
 
   std::unique_ptr<ui::vulkan::CommandBufferPool> command_buffer_pool_;
+
+  const RenderState* current_render_state_ = nullptr;
+  VkCommandBuffer current_command_buffer_ = nullptr;
+  VkCommandBuffer current_setup_buffer_ = nullptr;
+  std::shared_ptr<ui::vulkan::Fence> current_batch_fence_;
 };
 
 }  // namespace vulkan
diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc b/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc
index 1f018db54..fd2fe7789 100644
--- a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc
+++ b/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc
@@ -11,3 +11,6 @@
 
 DEFINE_bool(vulkan_renderdoc_capture_all, false,
             "Capture everything with RenderDoc.");
+DEFINE_bool(vulkan_native_msaa, false, "Use native MSAA");
+DEFINE_bool(vulkan_dump_disasm, false,
+            "Dump shader disassembly. NVIDIA only supported.");
diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h b/src/xenia/gpu/vulkan/vulkan_gpu_flags.h
index ca83dfb7a..169e797c8 100644
--- a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h
+++ b/src/xenia/gpu/vulkan/vulkan_gpu_flags.h
@@ -15,5 +15,7 @@
 #define FINE_GRAINED_DRAW_SCOPES 1
 
 DECLARE_bool(vulkan_renderdoc_capture_all);
+DECLARE_bool(vulkan_native_msaa);
+DECLARE_bool(vulkan_dump_disasm);
 
 #endif  // XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_
diff --git a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc
index 74ec57849..08c6120d7 100644
--- a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc
+++ b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc
@@ -19,14 +19,14 @@
 #include "xenia/gpu/vulkan/vulkan_command_processor.h"
 #include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
 #include "xenia/ui/vulkan/vulkan_provider.h"
+#include "xenia/ui/vulkan/vulkan_swap_chain.h"
 #include "xenia/ui/window.h"
 
 namespace xe {
 namespace gpu {
 namespace vulkan {
 
-VulkanGraphicsSystem::VulkanGraphicsSystem() = default;
-
+VulkanGraphicsSystem::VulkanGraphicsSystem() {}
 VulkanGraphicsSystem::~VulkanGraphicsSystem() = default;
 
 X_STATUS VulkanGraphicsSystem::Setup(cpu::Processor* processor,
@@ -74,12 +74,41 @@ void VulkanGraphicsSystem::Swap(xe::ui::UIEvent* e) {
     return;
   }
 
-  // Blit the frontbuffer.
-  // display_context_->blitter()->BlitTexture2D(
-  //    static_cast<GLuint>(swap_state.front_buffer_texture),
-  //    Rect2D(0, 0, swap_state.width, swap_state.height),
-  //    Rect2D(0, 0, target_window_->width(), target_window_->height()),
-  //    GL_LINEAR, false);
+  auto swap_chain = display_context_->swap_chain();
+  auto copy_cmd_buffer = swap_chain->copy_cmd_buffer();
+  auto front_buffer =
+      reinterpret_cast<VkImage>(swap_state.front_buffer_texture);
+
+  VkImageMemoryBarrier barrier;
+  std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier));
+  barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+  barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+  barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+  barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
+  barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
+  barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  barrier.image = front_buffer;
+  barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
+  vkCmdPipelineBarrier(copy_cmd_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &barrier);
+
+  VkImageBlit region;
+  region.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
+  region.srcOffsets[0] = {0, 0, 0};
+  region.srcOffsets[1] = {static_cast<int32_t>(swap_state.width),
+                          static_cast<int32_t>(swap_state.height), 1};
+
+  region.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
+  region.dstOffsets[0] = {0, 0, 0};
+  region.dstOffsets[1] = {static_cast<int32_t>(swap_chain->surface_width()),
+                          static_cast<int32_t>(swap_chain->surface_height()),
+                          1};
+  vkCmdBlitImage(copy_cmd_buffer, front_buffer, VK_IMAGE_LAYOUT_GENERAL,
+                 swap_chain->surface_image(),
+                 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region,
+                 VK_FILTER_LINEAR);
 }
 
 }  // namespace vulkan
diff --git a/src/xenia/gpu/vulkan/vulkan_shader.cc b/src/xenia/gpu/vulkan/vulkan_shader.cc
index b3c72abf3..c18341a71 100644
--- a/src/xenia/gpu/vulkan/vulkan_shader.cc
+++ b/src/xenia/gpu/vulkan/vulkan_shader.cc
@@ -44,11 +44,11 @@ bool VulkanShader::Prepare() {
   shader_info.codeSize = translated_binary_.size();
   shader_info.pCode =
       reinterpret_cast<const uint32_t*>(translated_binary_.data());
-  auto err =
+  auto status =
       vkCreateShaderModule(device_, &shader_info, nullptr, &shader_module_);
-  CheckResult(err, "vkCreateShaderModule");
+  CheckResult(status, "vkCreateShaderModule");
 
-  return true;
+  return status == VK_SUCCESS;
 }
 
 }  // namespace vulkan
diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h
index 65c1f0bad..32c33cae8 100644
--- a/src/xenia/gpu/xenos.h
+++ b/src/xenia/gpu/xenos.h
@@ -49,6 +49,7 @@ enum class PrimitiveType : uint32_t {
   kLineLoop = 0x0C,
   kQuadList = 0x0D,
   kQuadStrip = 0x0E,
+  kUnknown0x11 = 0x11,
 };
 
 enum class Dimension : uint32_t {
@@ -382,7 +383,7 @@ XEPACKEDUNION(xe_gpu_vertex_fetch_t, {
     uint32_t type : 2;
     uint32_t address : 30;
     uint32_t endian : 2;
-    uint32_t size : 24;
+    uint32_t size : 24;  // size in words
     uint32_t unk1 : 6;
   });
   XEPACKEDSTRUCTANONYMOUS({
@@ -486,6 +487,46 @@ XEPACKEDUNION(xe_gpu_fetch_group_t, {
   });
 });
 
+enum Event {
+  SAMPLE_STREAMOUTSTATS1 = (1 << 0),
+  SAMPLE_STREAMOUTSTATS2 = (2 << 0),
+  SAMPLE_STREAMOUTSTATS3 = (3 << 0),
+  CACHE_FLUSH_TS = (4 << 0),
+  CACHE_FLUSH = (6 << 0),
+  CS_PARTIAL_FLUSH = (7 << 0),
+  VGT_STREAMOUT_RESET = (10 << 0),
+  END_OF_PIPE_INCR_DE = (11 << 0),
+  END_OF_PIPE_IB_END = (12 << 0),
+  RST_PIX_CNT = (13 << 0),
+  VS_PARTIAL_FLUSH = (15 << 0),
+  PS_PARTIAL_FLUSH = (16 << 0),
+  CACHE_FLUSH_AND_INV_TS_EVENT = (20 << 0),
+  ZPASS_DONE = (21 << 0),
+  CACHE_FLUSH_AND_INV_EVENT = (22 << 0),
+  PERFCOUNTER_START = (23 << 0),
+  PERFCOUNTER_STOP = (24 << 0),
+  PIPELINESTAT_START = (25 << 0),
+  PIPELINESTAT_STOP = (26 << 0),
+  PERFCOUNTER_SAMPLE = (27 << 0),
+  SAMPLE_PIPELINESTAT = (30 << 0),
+  SAMPLE_STREAMOUTSTATS = (32 << 0),
+  RESET_VTX_CNT = (33 << 0),
+  VGT_FLUSH = (36 << 0),
+  BOTTOM_OF_PIPE_TS = (40 << 0),
+  DB_CACHE_FLUSH_AND_INV = (42 << 0),
+  FLUSH_AND_INV_DB_DATA_TS = (43 << 0),
+  FLUSH_AND_INV_DB_META = (44 << 0),
+  FLUSH_AND_INV_CB_DATA_TS = (45 << 0),
+  FLUSH_AND_INV_CB_META = (46 << 0),
+  CS_DONE = (47 << 0),
+  PS_DONE = (48 << 0),
+  FLUSH_AND_INV_CB_PIXEL_DATA = (49 << 0),
+  THREAD_TRACE_START = (51 << 0),
+  THREAD_TRACE_STOP = (52 << 0),
+  THREAD_TRACE_FLUSH = (54 << 0),
+  THREAD_TRACE_FINISH = (55 << 0),
+};
+
 // Opcodes (IT_OPCODE) for Type-3 commands in the ringbuffer.
 // https://github.com/freedreno/amd-gpu/blob/master/include/api/gsl_pm4types.h
 // Not sure if all of these are used.
@@ -501,7 +542,7 @@ enum Type3Opcode {
   PM4_WAIT_FOR_IDLE         = 0x26,   // wait for the IDLE state of the engine
   PM4_WAIT_REG_MEM          = 0x3c,   // wait until a register or memory location is a specific value
   PM4_WAIT_REG_EQ           = 0x52,   // wait until a register location is equal to a specific value
-  PM4_WAT_REG_GTE           = 0x53,   // wait until a register location is >= a specific value
+  PM4_WAIT_REG_GTE          = 0x53,   // wait until a register location is >= a specific value
   PM4_WAIT_UNTIL_READ       = 0x5c,   // wait until a read completes
   PM4_WAIT_IB_PFD_COMPLETE  = 0x5d,   // wait until all base/size writes from an IB_PFD packet have completed
 
diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc
index e979cb62a..208473cf2 100644
--- a/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc
+++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc
@@ -366,7 +366,7 @@ void VdSwap(lpvoid_t buffer_ptr,  // ptr into primary ringbuffer
   auto dwords = buffer_ptr.as_array<uint32_t>();
   dwords[0] = xenos::MakePacketType3<xenos::PM4_XE_SWAP, 63>();
   dwords[1] = 'SWAP';
-  dwords[2] = *frontbuffer_ptr;
+  dwords[2] = (*frontbuffer_ptr) & 0x1FFFFFFF;
 
   // Set by VdCallGraphicsNotificationRoutines.
   dwords[3] = last_frontbuffer_width_;
diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc
index d7507df23..5dcf5bfa8 100644
--- a/src/xenia/memory.cc
+++ b/src/xenia/memory.cc
@@ -376,17 +376,19 @@ cpu::MMIORange* Memory::LookupVirtualMappedRange(uint32_t virtual_address) {
   return mmio_handler_->LookupRange(virtual_address);
 }
 
-uintptr_t Memory::AddPhysicalWriteWatch(uint32_t physical_address,
-                                        uint32_t length,
-                                        cpu::WriteWatchCallback callback,
-                                        void* callback_context,
-                                        void* callback_data) {
-  return mmio_handler_->AddPhysicalWriteWatch(
-      physical_address, length, callback, callback_context, callback_data);
+uintptr_t Memory::AddPhysicalAccessWatch(uint32_t physical_address,
+                                         uint32_t length,
+                                         cpu::MMIOHandler::WatchType type,
+                                         cpu::AccessWatchCallback callback,
+                                         void* callback_context,
+                                         void* callback_data) {
+  return mmio_handler_->AddPhysicalAccessWatch(physical_address, length, type,
+                                               callback, callback_context,
+                                               callback_data);
 }
 
-void Memory::CancelWriteWatch(uintptr_t watch_handle) {
-  mmio_handler_->CancelWriteWatch(watch_handle);
+void Memory::CancelAccessWatch(uintptr_t watch_handle) {
+  mmio_handler_->CancelAccessWatch(watch_handle);
 }
 
 uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment,
@@ -453,6 +455,7 @@ bool Memory::Save(ByteStream* stream) {
 }
 
 bool Memory::Restore(ByteStream* stream) {
+  XELOGD("Restoring memory...");
   heaps_.v00000000.Restore(stream);
   heaps_.v40000000.Restore(stream);
   heaps_.v80000000.Restore(stream);
@@ -577,6 +580,8 @@ bool BaseHeap::Save(ByteStream* stream) {
 }
 
 bool BaseHeap::Restore(ByteStream* stream) {
+  XELOGD("Heap %.8X-%.8X", heap_base_, heap_base_ + heap_size_);
+
   for (size_t i = 0; i < page_table_.size(); i++) {
     auto& page = page_table_[i];
     page.qword = stream->Read<uint64_t>();
@@ -897,7 +902,7 @@ bool BaseHeap::Release(uint32_t base_address, uint32_t* out_region_size) {
   auto base_page_entry = page_table_[base_page_number];
   if (base_page_entry.base_address != base_page_number) {
     XELOGE("BaseHeap::Release failed because address is not a region start");
-    // return false;
+    return false;
   }
 
   if (out_region_size) {
diff --git a/src/xenia/memory.h b/src/xenia/memory.h
index 6a0fc9c5d..e27976de2 100644
--- a/src/xenia/memory.h
+++ b/src/xenia/memory.h
@@ -303,12 +303,13 @@ class Memory {
   //
   // This has a significant performance penalty for writes in in the range or
   // nearby (sharing 64KiB pages).
-  uintptr_t AddPhysicalWriteWatch(uint32_t physical_address, uint32_t length,
-                                  cpu::WriteWatchCallback callback,
-                                  void* callback_context, void* callback_data);
+  uintptr_t AddPhysicalAccessWatch(uint32_t physical_address, uint32_t length,
+                                   cpu::MMIOHandler::WatchType type,
+                                   cpu::AccessWatchCallback callback,
+                                   void* callback_context, void* callback_data);
 
-  // Cancels a write watch requested with AddPhysicalWriteWatch.
-  void CancelWriteWatch(uintptr_t watch_handle);
+  // Cancels a write watch requested with AddPhysicalAccessWatch.
+  void CancelAccessWatch(uintptr_t watch_handle);
 
   // Allocates virtual memory from the 'system' heap.
   // System memory is kept separate from game memory but is still accessible
diff --git a/src/xenia/ui/spirv/spirv_validator.cc b/src/xenia/ui/spirv/spirv_validator.cc
new file mode 100644
index 000000000..734688eb6
--- /dev/null
+++ b/src/xenia/ui/spirv/spirv_validator.cc
@@ -0,0 +1,80 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/ui/spirv/spirv_validator.h"
+
+#include "third_party/spirv-tools/include/spirv-tools/libspirv.h"
+#include "xenia/base/logging.h"
+
+namespace xe {
+namespace ui {
+namespace spirv {
+
+SpirvValidator::Result::Result(spv_text text, spv_diagnostic diagnostic)
+    : text_(text), diagnostic_(diagnostic) {}
+
+SpirvValidator::Result::~Result() {
+  if (text_) {
+    spvTextDestroy(text_);
+  }
+  if (diagnostic_) {
+    spvDiagnosticDestroy(diagnostic_);
+  }
+}
+
+bool SpirvValidator::Result::has_error() const { return !!diagnostic_; }
+
+size_t SpirvValidator::Result::error_word_index() const {
+  return diagnostic_ ? diagnostic_->position.index : 0;
+}
+
+const char* SpirvValidator::Result::error_string() const {
+  return diagnostic_ ? diagnostic_->error : "";
+}
+
+const char* SpirvValidator::Result::text() const {
+  return text_ ? text_->str : "";
+}
+
+std::string SpirvValidator::Result::to_string() const {
+  return text_ ? std::string(text_->str, text_->length) : "";
+}
+
+void SpirvValidator::Result::AppendText(StringBuffer* target_buffer) const {
+  if (text_) {
+    target_buffer->AppendBytes(reinterpret_cast<const uint8_t*>(text_->str),
+                               text_->length);
+  }
+}
+
+SpirvValidator::SpirvValidator() : spv_context_(spvContextCreate()) {}
+SpirvValidator::~SpirvValidator() { spvContextDestroy(spv_context_); }
+
+std::unique_ptr<SpirvValidator::Result> SpirvValidator::Validate(
+    const uint32_t* words, size_t word_count) {
+  spv_text text = nullptr;
+  spv_diagnostic diagnostic = nullptr;
+  spv_const_binary_t binary = {words, word_count};
+  auto result_code =
+      spvValidate(spv_context_, &binary, SPV_VALIDATE_ALL, &diagnostic);
+  std::unique_ptr<Result> result(new Result(text, diagnostic));
+  if (result_code) {
+    XELOGE("Failed to validate spv: %d", result_code);
+    if (result->has_error()) {
+      return result;
+    } else {
+      return nullptr;
+    }
+  }
+  return result;
+}
+
+}  // namespace spirv
+}  // namespace ui
+}  // namespace xe
\ No newline at end of file
diff --git a/src/xenia/ui/spirv/spirv_validator.h b/src/xenia/ui/spirv/spirv_validator.h
new file mode 100644
index 000000000..890843f27
--- /dev/null
+++ b/src/xenia/ui/spirv/spirv_validator.h
@@ -0,0 +1,66 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_
+#define XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_
+
+#include <memory>
+#include <string>
+
+#include "xenia/base/string_buffer.h"
+#include "xenia/ui/spirv/spirv_util.h"
+
+namespace xe {
+namespace ui {
+namespace spirv {
+
+class SpirvValidator {
+ public:
+  class Result {
+   public:
+    Result(spv_text text, spv_diagnostic diagnostic);
+    ~Result();
+
+    // True if the result has an error associated with it.
+    bool has_error() const;
+    // Index of the error in the provided binary word data.
+    size_t error_word_index() const;
+    // Human-readable description of the error.
+    const char* error_string() const;
+
+    // Disassembled source text.
+    // Returned pointer lifetime is tied to this Result instance.
+    const char* text() const;
+    // Converts the disassembled source text to a string.
+    std::string to_string() const;
+    // Appends the disassembled source text to the given buffer.
+    void AppendText(StringBuffer* target_buffer) const;
+
+   private:
+    spv_text text_ = nullptr;
+    spv_diagnostic diagnostic_ = nullptr;
+  };
+
+  SpirvValidator();
+  ~SpirvValidator();
+
+  // Validates the given SPIRV binary.
+  // The return will be nullptr if validation fails due to a library error.
+  // The return may have an error set on it if the SPIRV binary is malformed.
+  std::unique_ptr<Result> Validate(const uint32_t* words, size_t word_count);
+
+ private:
+  spv_context spv_context_ = nullptr;
+};
+
+}  // namespace spirv
+}  // namespace ui
+}  // namespace xe
+
+#endif  // XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_
diff --git a/src/xenia/ui/vulkan/circular_buffer.cc b/src/xenia/ui/vulkan/circular_buffer.cc
new file mode 100644
index 000000000..94d2996ce
--- /dev/null
+++ b/src/xenia/ui/vulkan/circular_buffer.cc
@@ -0,0 +1,227 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2015 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include <algorithm>
+
+#include "xenia/base/assert.h"
+#include "xenia/base/logging.h"
+#include "xenia/base/math.h"
+
+#include "xenia/ui/vulkan/circular_buffer.h"
+
+namespace xe {
+namespace ui {
+namespace vulkan {
+
+CircularBuffer::CircularBuffer(VulkanDevice* device) : device_(device) {}
+CircularBuffer::~CircularBuffer() { Shutdown(); }
+
+bool CircularBuffer::Initialize(VkDeviceSize capacity, VkBufferUsageFlags usage,
+                                VkDeviceSize alignment) {
+  VkResult status = VK_SUCCESS;
+  capacity = xe::round_up(capacity, alignment);
+
+  // Create our internal buffer.
+  VkBufferCreateInfo buffer_info;
+  buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+  buffer_info.pNext = nullptr;
+  buffer_info.flags = 0;
+  buffer_info.size = capacity;
+  buffer_info.usage = usage;
+  buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+  buffer_info.queueFamilyIndexCount = 0;
+  buffer_info.pQueueFamilyIndices = nullptr;
+  status = vkCreateBuffer(*device_, &buffer_info, nullptr, &gpu_buffer_);
+  CheckResult(status, "vkCreateBuffer");
+  if (status != VK_SUCCESS) {
+    return false;
+  }
+
+  VkMemoryRequirements reqs;
+  vkGetBufferMemoryRequirements(*device_, gpu_buffer_, &reqs);
+
+  // Allocate memory from the device to back the buffer.
+  assert_true(reqs.size == capacity);
+  reqs.alignment = std::max(alignment, reqs.alignment);
+  gpu_memory_ = device_->AllocateMemory(reqs);
+  if (!gpu_memory_) {
+    XELOGE("CircularBuffer::Initialize - Failed to allocate memory!");
+    Shutdown();
+    return false;
+  }
+
+  alignment_ = reqs.alignment;
+  capacity_ = reqs.size;
+  gpu_base_ = 0;
+
+  // Bind the buffer to its backing memory.
+  status = vkBindBufferMemory(*device_, gpu_buffer_, gpu_memory_, gpu_base_);
+  CheckResult(status, "vkBindBufferMemory");
+  if (status != VK_SUCCESS) {
+    XELOGE("CircularBuffer::Initialize - Failed to bind memory!");
+    Shutdown();
+    return false;
+  }
+
+  // Map the memory so we can access it.
+  status = vkMapMemory(*device_, gpu_memory_, gpu_base_, capacity_, 0,
+                       reinterpret_cast<void**>(&host_base_));
+  CheckResult(status, "vkMapMemory");
+  if (status != VK_SUCCESS) {
+    XELOGE("CircularBuffer::Initialize - Failed to map memory!");
+    Shutdown();
+    return false;
+  }
+
+  return true;
+}
+
+void CircularBuffer::Shutdown() {
+  Clear();
+  if (host_base_) {
+    vkUnmapMemory(*device_, gpu_memory_);
+    host_base_ = nullptr;
+  }
+  if (gpu_buffer_) {
+    vkDestroyBuffer(*device_, gpu_buffer_, nullptr);
+    gpu_buffer_ = nullptr;
+  }
+  if (gpu_memory_) {
+    vkFreeMemory(*device_, gpu_memory_, nullptr);
+    gpu_memory_ = nullptr;
+  }
+}
+
+bool CircularBuffer::CanAcquire(VkDeviceSize length) {
+  // Make sure the length is aligned.
+  length = xe::round_up(length, alignment_);
+  if (allocations_.empty()) {
+    // Read head has caught up to write head (entire buffer available for write)
+    assert_true(read_head_ == write_head_);
+    return capacity_ >= length;
+  } else if (write_head_ < read_head_) {
+    // Write head wrapped around and is behind read head.
+    // |  write  |---- read ----|
+    return (read_head_ - write_head_) >= length;
+  } else if (write_head_ > read_head_) {
+    // Read head behind write head.
+    // 1. Check if there's enough room from write -> capacity
+    // |  |---- read ----|    write     |
+    if ((capacity_ - write_head_) >= length) {
+      return true;
+    }
+
+    // 2. Check if there's enough room from 0 -> read
+    // |    write     |---- read ----|  |
+    if ((read_head_ - 0) >= length) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+CircularBuffer::Allocation* CircularBuffer::Acquire(
+    VkDeviceSize length, std::shared_ptr<Fence> fence) {
+  VkDeviceSize aligned_length = xe::round_up(length, alignment_);
+  if (!CanAcquire(aligned_length)) {
+    return nullptr;
+  }
+
+  assert_true(write_head_ % alignment_ == 0);
+  if (write_head_ < read_head_) {
+    // Write head behind read head.
+    assert_true(read_head_ - write_head_ >= aligned_length);
+
+    auto alloc = new Allocation();
+    alloc->host_ptr = host_base_ + write_head_;
+    alloc->gpu_memory = gpu_memory_;
+    alloc->offset = gpu_base_ + write_head_;
+    alloc->length = length;
+    alloc->aligned_length = aligned_length;
+    alloc->fence = fence;
+    write_head_ += aligned_length;
+    allocations_.push_back(alloc);
+
+    return alloc;
+  } else {
+    // Write head equal to/after read head
+    if (capacity_ - write_head_ >= aligned_length) {
+      // Free space from write -> capacity
+      auto alloc = new Allocation();
+      alloc->host_ptr = host_base_ + write_head_;
+      alloc->gpu_memory = gpu_memory_;
+      alloc->offset = gpu_base_ + write_head_;
+      alloc->length = length;
+      alloc->aligned_length = aligned_length;
+      alloc->fence = fence;
+      write_head_ += aligned_length;
+      allocations_.push_back(alloc);
+
+      return alloc;
+    } else if ((read_head_ - 0) >= aligned_length) {
+      // Free space from begin -> read
+      auto alloc = new Allocation();
+      alloc->host_ptr = host_base_ + 0;
+      alloc->gpu_memory = gpu_memory_;
+      alloc->offset = gpu_base_ + 0;
+      alloc->length = length;
+      alloc->aligned_length = aligned_length;
+      alloc->fence = fence;
+      write_head_ = aligned_length;
+      allocations_.push_back(alloc);
+
+      return alloc;
+    }
+  }
+
+  return nullptr;
+}
+
+void CircularBuffer::Flush(Allocation* allocation) {
+  VkMappedMemoryRange range;
+  range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+  range.pNext = nullptr;
+  range.memory = gpu_memory_;
+  range.offset = gpu_base_ + allocation->offset;
+  range.size = allocation->length;
+  vkFlushMappedMemoryRanges(*device_, 1, &range);
+}
+
+void CircularBuffer::Clear() {
+  for (auto alloc : allocations_) {
+    delete alloc;
+  }
+  allocations_.clear();
+
+  write_head_ = read_head_ = 0;
+}
+
+void CircularBuffer::Scavenge() {
+  for (auto it = allocations_.begin(); it != allocations_.end();) {
+    if ((*it)->fence->status() != VK_SUCCESS) {
+      // Don't bother freeing following allocations to ensure proper ordering.
+      break;
+    }
+
+    if (capacity_ - read_head_ < (*it)->aligned_length) {
+      // This allocation is stored at the beginning of the buffer.
+      read_head_ = (*it)->aligned_length;
+    } else {
+      read_head_ += (*it)->aligned_length;
+    }
+
+    delete *it;
+    it = allocations_.erase(it);
+  }
+}
+
+}  // namespace vulkan
+}  // namespace ui
+}  // namespace xe
\ No newline at end of file
diff --git a/src/xenia/ui/vulkan/circular_buffer.h b/src/xenia/ui/vulkan/circular_buffer.h
new file mode 100644
index 000000000..54aa916fd
--- /dev/null
+++ b/src/xenia/ui/vulkan/circular_buffer.h
@@ -0,0 +1,87 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2015 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_
+#define XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_
+
+#include <list>
+
+#include "xenia/ui/vulkan/vulkan.h"
+#include "xenia/ui/vulkan/vulkan_device.h"
+
+namespace xe {
+namespace ui {
+namespace vulkan {
+
+// A circular buffer, intended to hold (fairly) temporary memory that will be
+// released when a fence is signaled. Best used when allocations are taken
+// in-order with command buffer submission.
+//
+// Allocations loop around the buffer in circles (but are not fragmented at the
+// ends of the buffer), where trailing older allocations are freed after use.
+class CircularBuffer {
+ public:
+  CircularBuffer(VulkanDevice* device);
+  ~CircularBuffer();
+
+  struct Allocation {
+    void* host_ptr;
+    VkDeviceMemory gpu_memory;
+    VkDeviceSize offset;
+    VkDeviceSize length;
+    VkDeviceSize aligned_length;
+
+    // Allocation usage fence. This allocation will be deleted when the fence
+    // becomes signaled.
+    std::shared_ptr<Fence> fence;
+  };
+
+  bool Initialize(VkDeviceSize capacity, VkBufferUsageFlags usage,
+                  VkDeviceSize alignment = 256);
+  void Shutdown();
+
+  VkDeviceSize alignment() const { return alignment_; }
+  VkDeviceSize capacity() const { return capacity_; }
+  VkBuffer gpu_buffer() const { return gpu_buffer_; }
+  VkDeviceMemory gpu_memory() const { return gpu_memory_; }
+  uint8_t* host_base() const { return host_base_; }
+
+  bool CanAcquire(VkDeviceSize length);
+
+  // Acquires space to hold memory. This allocation is only freed when the fence
+  // reaches the signaled state.
+  Allocation* Acquire(VkDeviceSize length, std::shared_ptr<Fence> fence);
+  void Flush(Allocation* allocation);
+
+  // Clears all allocations, regardless of whether they've been consumed or not.
+  void Clear();
+
+  // Frees any allocations whose fences have been signaled.
+  void Scavenge();
+
+ private:
+  VkDeviceSize capacity_ = 0;
+  VkDeviceSize alignment_ = 0;
+  VkDeviceSize write_head_ = 0;
+  VkDeviceSize read_head_ = 0;
+
+  VulkanDevice* device_;
+  VkBuffer gpu_buffer_ = nullptr;
+  VkDeviceMemory gpu_memory_ = nullptr;
+  VkDeviceSize gpu_base_ = 0;
+  uint8_t* host_base_ = nullptr;
+
+  std::list<Allocation*> allocations_;
+};
+
+}  // namespace vulkan
+}  // namespace ui
+}  // namespace xe
+
+#endif  // XENIA_UI_GL_CIRCULAR_BUFFER_H_
diff --git a/src/xenia/ui/vulkan/fenced_pools.h b/src/xenia/ui/vulkan/fenced_pools.h
index a50f82d08..d62ad7452 100644
--- a/src/xenia/ui/vulkan/fenced_pools.h
+++ b/src/xenia/ui/vulkan/fenced_pools.h
@@ -14,6 +14,7 @@
 
 #include "xenia/base/assert.h"
 #include "xenia/ui/vulkan/vulkan.h"
+#include "xenia/ui/vulkan/vulkan_util.h"
 
 namespace xe {
 namespace ui {
@@ -40,13 +41,15 @@ class BaseFencedPool {
 
   // True if one or more batches are still pending on the GPU.
   bool has_pending() const { return pending_batch_list_head_ != nullptr; }
+  // True if a batch is open.
+  bool has_open_batch() const { return open_batch_ != nullptr; }
 
   // Checks all pending batches for completion and scavenges their entries.
   // This should be called as frequently as reasonable.
   void Scavenge() {
     while (pending_batch_list_head_) {
       auto batch = pending_batch_list_head_;
-      if (vkGetFenceStatus(device_, batch->fence) == VK_SUCCESS) {
+      if (vkGetFenceStatus(device_, *batch->fence) == VK_SUCCESS) {
         // Batch has completed. Reclaim.
         pending_batch_list_head_ = batch->next;
         if (batch == pending_batch_list_tail_) {
@@ -88,6 +91,24 @@ class BaseFencedPool {
     open_batch_ = batch;
   }
 
+  // Cancels an open batch, and releases all entries acquired within.
+  void CancelBatch() {
+    assert_not_null(open_batch_);
+
+    auto batch = open_batch_;
+    open_batch_ = nullptr;
+
+    // Relink the batch back into the free batch list.
+    batch->next = free_batch_list_head_;
+    free_batch_list_head_ = batch;
+
+    // Relink entries back into free entries list.
+    batch->entry_list_tail->next = free_entry_list_head_;
+    free_entry_list_head_ = batch->entry_list_head;
+    batch->entry_list_head = nullptr;
+    batch->entry_list_tail = nullptr;
+  }
+
   // Attempts to acquire an entry from the pool in the current batch.
   // If none are available a new one will be allocated.
   HANDLE AcquireEntry() {
@@ -114,7 +135,7 @@ class BaseFencedPool {
 
   // Ends the current batch using the given fence to indicate when the batch
   // has completed execution on the GPU.
-  void EndBatch(VkFence fence) {
+  void EndBatch(std::shared_ptr<Fence> fence) {
     assert_not_null(open_batch_);
 
     // Close and see if we have anything.
@@ -137,6 +158,7 @@ class BaseFencedPool {
     }
     if (pending_batch_list_tail_) {
       pending_batch_list_tail_->next = batch;
+      pending_batch_list_tail_ = batch;
     } else {
       pending_batch_list_tail_ = batch;
     }
@@ -176,7 +198,7 @@ class BaseFencedPool {
     Batch* next;
     Entry* entry_list_head;
     Entry* entry_list_tail;
-    VkFence fence;
+    std::shared_ptr<Fence> fence;
   };
 
   Batch* free_batch_list_head_ = nullptr;
diff --git a/src/xenia/ui/vulkan/vulkan_device.cc b/src/xenia/ui/vulkan/vulkan_device.cc
index 42077ca82..7b1dc7f8d 100644
--- a/src/xenia/ui/vulkan/vulkan_device.cc
+++ b/src/xenia/ui/vulkan/vulkan_device.cc
@@ -93,8 +93,8 @@ bool VulkanDevice::Initialize(DeviceInfo device_info) {
   }
   ENABLE_AND_EXPECT(geometryShader);
   ENABLE_AND_EXPECT(depthClamp);
-  ENABLE_AND_EXPECT(alphaToOne);
   ENABLE_AND_EXPECT(multiViewport);
+  ENABLE_AND_EXPECT(independentBlend);
   // TODO(benvanik): add other features.
   if (any_features_missing) {
     XELOGE(
diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc
index 23dffd6c6..49b0cbc4d 100644
--- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc
+++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc
@@ -136,6 +136,46 @@ class LightweightCircularBuffer {
 
 class VulkanImmediateTexture : public ImmediateTexture {
  public:
+  VulkanImmediateTexture(VulkanDevice* device, VkDescriptorPool descriptor_pool,
+                         VkDescriptorSetLayout descriptor_set_layout,
+                         VkImageView image_view, VkSampler sampler,
+                         uint32_t width, uint32_t height)
+      : ImmediateTexture(width, height),
+        device_(*device),
+        descriptor_pool_(descriptor_pool),
+        image_view_(image_view),
+        sampler_(sampler) {
+    handle = reinterpret_cast<uintptr_t>(this);
+
+    // Create descriptor set used just for this texture.
+    // It never changes, so we can reuse it and not worry with updates.
+    VkDescriptorSetAllocateInfo set_alloc_info;
+    set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
+    set_alloc_info.pNext = nullptr;
+    set_alloc_info.descriptorPool = descriptor_pool_;
+    set_alloc_info.descriptorSetCount = 1;
+    set_alloc_info.pSetLayouts = &descriptor_set_layout;
+    auto err =
+        vkAllocateDescriptorSets(device_, &set_alloc_info, &descriptor_set_);
+    CheckResult(err, "vkAllocateDescriptorSets");
+
+    // Initialize descriptor with our texture.
+    VkDescriptorImageInfo texture_info;
+    texture_info.sampler = sampler_;
+    texture_info.imageView = image_view_;
+    texture_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+    VkWriteDescriptorSet descriptor_write;
+    descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+    descriptor_write.pNext = nullptr;
+    descriptor_write.dstSet = descriptor_set_;
+    descriptor_write.dstBinding = 0;
+    descriptor_write.dstArrayElement = 0;
+    descriptor_write.descriptorCount = 1;
+    descriptor_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+    descriptor_write.pImageInfo = &texture_info;
+    vkUpdateDescriptorSets(device_, 1, &descriptor_write, 0, nullptr);
+  }
+
   VulkanImmediateTexture(VulkanDevice* device, VkDescriptorPool descriptor_pool,
                          VkDescriptorSetLayout descriptor_set_layout,
                          VkSampler sampler, uint32_t width, uint32_t height)
@@ -161,7 +201,7 @@ class VulkanImmediateTexture : public ImmediateTexture {
     image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
     image_info.queueFamilyIndexCount = 0;
     image_info.pQueueFamilyIndices = nullptr;
-    image_info.initialLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+    image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
     auto err = vkCreateImage(device_, &image_info, nullptr, &image_);
     CheckResult(err, "vkCreateImage");
 
@@ -221,9 +261,12 @@ class VulkanImmediateTexture : public ImmediateTexture {
 
   ~VulkanImmediateTexture() override {
     vkFreeDescriptorSets(device_, descriptor_pool_, 1, &descriptor_set_);
-    vkDestroyImageView(device_, image_view_, nullptr);
-    vkDestroyImage(device_, image_, nullptr);
-    vkFreeMemory(device_, device_memory_, nullptr);
+
+    if (device_memory_) {
+      vkDestroyImageView(device_, image_view_, nullptr);
+      vkDestroyImage(device_, image_, nullptr);
+      vkFreeMemory(device_, device_memory_, nullptr);
+    }
   }
 
   void Upload(const uint8_t* src_data) {
@@ -238,25 +281,49 @@ class VulkanImmediateTexture : public ImmediateTexture {
     vkGetImageSubresourceLayout(device_, image_, &subresource, &layout);
 
     // Map memory for upload.
-    void* gpu_data = nullptr;
-    auto err =
-        vkMapMemory(device_, device_memory_, 0, layout.size, 0, &gpu_data);
+    uint8_t* gpu_data = nullptr;
+    auto err = vkMapMemory(device_, device_memory_, 0, layout.size, 0,
+                           reinterpret_cast<void**>(&gpu_data));
     CheckResult(err, "vkMapMemory");
 
     // Copy the entire texture, hoping its layout matches what we expect.
-    std::memcpy(gpu_data, src_data, layout.size);
+    std::memcpy(gpu_data + layout.offset, src_data, layout.size);
 
     vkUnmapMemory(device_, device_memory_);
   }
 
+  // Queues a command to transition this texture to a new layout. This assumes
+  // the command buffer WILL be queued and executed by the device.
+  void TransitionLayout(VkCommandBuffer command_buffer,
+                        VkImageLayout new_layout) {
+    VkImageMemoryBarrier image_barrier;
+    image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+    image_barrier.pNext = nullptr;
+    image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    image_barrier.srcAccessMask = 0;
+    image_barrier.dstAccessMask = 0;
+    image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+    image_barrier.newLayout = new_layout;
+    image_barrier.image = image_;
+    image_barrier.subresourceRange = {0, 0, 1, 0, 1};
+    image_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+    image_layout_ = new_layout;
+
+    vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                         VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                         nullptr, 1, &image_barrier);
+  }
+
   VkDescriptorSet descriptor_set() const { return descriptor_set_; }
+  VkImageLayout layout() const { return image_layout_; }
 
  private:
   VkDevice device_ = nullptr;
   VkDescriptorPool descriptor_pool_ = nullptr;
   VkSampler sampler_ = nullptr;  // Not owned.
   VkImage image_ = nullptr;
-  VkImageLayout image_layout_ = VK_IMAGE_LAYOUT_UNDEFINED;
+  VkImageLayout image_layout_ = VK_IMAGE_LAYOUT_PREINITIALIZED;
   VkDeviceMemory device_memory_ = nullptr;
   VkImageView image_view_ = nullptr;
   VkDescriptorSet descriptor_set_ = nullptr;
@@ -538,7 +605,7 @@ VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext* graphics_context)
   pipeline_info.renderPass = context_->swap_chain()->render_pass();
   pipeline_info.subpass = 0;
   pipeline_info.basePipelineHandle = nullptr;
-  pipeline_info.basePipelineIndex = 0;
+  pipeline_info.basePipelineIndex = -1;
   err = vkCreateGraphicsPipelines(*device, nullptr, 1, &pipeline_info, nullptr,
                                   &triangle_pipeline_);
   CheckResult(err, "vkCreateGraphicsPipelines");
@@ -547,7 +614,7 @@ VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext* graphics_context)
   pipeline_info.flags = VK_PIPELINE_CREATE_DERIVATIVE_BIT;
   input_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
   pipeline_info.basePipelineHandle = triangle_pipeline_;
-  pipeline_info.basePipelineIndex = 0;
+  pipeline_info.basePipelineIndex = -1;
   err = vkCreateGraphicsPipelines(*device, nullptr, 1, &pipeline_info, nullptr,
                                   &line_pipeline_);
   CheckResult(err, "vkCreateGraphicsPipelines");
@@ -604,6 +671,14 @@ std::unique_ptr<ImmediateTexture> VulkanImmediateDrawer::CreateTexture(
   return std::unique_ptr<ImmediateTexture>(texture.release());
 }
 
+std::unique_ptr<ImmediateTexture> VulkanImmediateDrawer::WrapTexture(
+    VkImageView image_view, VkSampler sampler, uint32_t width,
+    uint32_t height) {
+  return std::make_unique<VulkanImmediateTexture>(
+      context_->device(), descriptor_pool_, texture_set_layout_, image_view,
+      sampler, width, height);
+}
+
 void VulkanImmediateDrawer::UpdateTexture(ImmediateTexture* texture,
                                           const uint8_t* data) {
   static_cast<VulkanImmediateTexture*>(texture)->Upload(data);
@@ -672,9 +747,6 @@ void VulkanImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) {
 void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) {
   auto swap_chain = context_->swap_chain();
 
-  if (draw.primitive_type != ImmediatePrimitiveType::kTriangles) {
-    return;
-  }
   switch (draw.primitive_type) {
     case ImmediatePrimitiveType::kLines:
       vkCmdBindPipeline(current_cmd_buffer_, VK_PIPELINE_BIND_POINT_GRAPHICS,
@@ -689,6 +761,10 @@ void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) {
   // Setup texture binding.
   auto texture = reinterpret_cast<VulkanImmediateTexture*>(draw.texture_handle);
   if (texture) {
+    if (texture->layout() != VK_IMAGE_LAYOUT_GENERAL) {
+      texture->TransitionLayout(current_cmd_buffer_, VK_IMAGE_LAYOUT_GENERAL);
+    }
+
     auto texture_set = texture->descriptor_set();
     vkCmdBindDescriptorSets(current_cmd_buffer_,
                             VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout_,
diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h
index d14a6eb7c..1db47f0d8 100644
--- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h
+++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h
@@ -32,6 +32,10 @@ class VulkanImmediateDrawer : public ImmediateDrawer {
                                                   ImmediateTextureFilter filter,
                                                   bool repeat,
                                                   const uint8_t* data) override;
+  std::unique_ptr<ImmediateTexture> WrapTexture(VkImageView image_view,
+                                                VkSampler sampler,
+                                                uint32_t width,
+                                                uint32_t height);
   void UpdateTexture(ImmediateTexture* texture, const uint8_t* data) override;
 
   void Begin(int render_target_width, int render_target_height) override;
diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.cc b/src/xenia/ui/vulkan/vulkan_swap_chain.cc
index 15d2795fd..ad383f32f 100644
--- a/src/xenia/ui/vulkan/vulkan_swap_chain.cc
+++ b/src/xenia/ui/vulkan/vulkan_swap_chain.cc
@@ -187,6 +187,10 @@ bool VulkanSwapChain::Initialize(VkSurfaceKHR surface) {
       vkAllocateCommandBuffers(*device_, &cmd_buffer_info, &render_cmd_buffer_);
   CheckResult(err, "vkCreateCommandBuffer");
 
+  // Create another command buffer that handles image copies.
+  err = vkAllocateCommandBuffers(*device_, &cmd_buffer_info, &copy_cmd_buffer_);
+  CheckResult(err, "vkCreateCommandBuffer");
+
   // Create the render pass used to draw to the swap chain.
   // The actual framebuffer attached will depend on which image we are drawing
   // into.
@@ -194,7 +198,7 @@ bool VulkanSwapChain::Initialize(VkSurfaceKHR surface) {
   color_attachment.flags = 0;
   color_attachment.format = surface_format_;
   color_attachment.samples = VK_SAMPLE_COUNT_1_BIT;
-  color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
+  color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;  // CLEAR;
   color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
   color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
   color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
@@ -388,6 +392,7 @@ bool VulkanSwapChain::Begin() {
 
   // Reset all command buffers.
   vkResetCommandBuffer(render_cmd_buffer_, 0);
+  vkResetCommandBuffer(copy_cmd_buffer_, 0);
   auto& current_buffer = buffers_[current_buffer_index_];
 
   // Build the command buffer that will execute all queued rendering buffers.
@@ -399,14 +404,18 @@ bool VulkanSwapChain::Begin() {
   err = vkBeginCommandBuffer(render_cmd_buffer_, &begin_info);
   CheckResult(err, "vkBeginCommandBuffer");
 
-  // Transition the image to a format we can render to.
+  // Start recording the copy command buffer as well.
+  err = vkBeginCommandBuffer(copy_cmd_buffer_, &begin_info);
+  CheckResult(err, "vkBeginCommandBuffer");
+
+  // Transition the image to a format we can copy to.
   VkImageMemoryBarrier pre_image_memory_barrier;
   pre_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
   pre_image_memory_barrier.pNext = nullptr;
   pre_image_memory_barrier.srcAccessMask = 0;
-  pre_image_memory_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+  pre_image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
   pre_image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
-  pre_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+  pre_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
   pre_image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
   pre_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
   pre_image_memory_barrier.image = current_buffer.image;
@@ -416,23 +425,37 @@ bool VulkanSwapChain::Begin() {
   pre_image_memory_barrier.subresourceRange.levelCount = 1;
   pre_image_memory_barrier.subresourceRange.baseArrayLayer = 0;
   pre_image_memory_barrier.subresourceRange.layerCount = 1;
+  vkCmdPipelineBarrier(copy_cmd_buffer_, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &pre_image_memory_barrier);
+
+  // First: Issue a command to clear the render target.
+  VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
+  VkClearColorValue clear_color;
+  clear_color.float32[0] = 238 / 255.0f;
+  clear_color.float32[1] = 238 / 255.0f;
+  clear_color.float32[2] = 238 / 255.0f;
+  clear_color.float32[3] = 1.0f;
+  if (FLAGS_vulkan_random_clear_color) {
+    clear_color.float32[0] =
+        rand() / static_cast<float>(RAND_MAX);  // NOLINT(runtime/threadsafe_fn)
+    clear_color.float32[1] = 1.0f;
+    clear_color.float32[2] = 0.0f;
+  }
+  vkCmdClearColorImage(copy_cmd_buffer_, current_buffer.image,
+                       VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1,
+                       &clear_range);
+
+  // Transition the image to a color attachment target for drawing.
+  pre_image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+  pre_image_memory_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+  pre_image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+  pre_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
   vkCmdPipelineBarrier(render_cmd_buffer_, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
                        VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
                        nullptr, 1, &pre_image_memory_barrier);
 
   // Begin render pass.
-  VkClearValue color_clear_value;
-  color_clear_value.color.float32[0] = 238 / 255.0f;
-  color_clear_value.color.float32[1] = 238 / 255.0f;
-  color_clear_value.color.float32[2] = 238 / 255.0f;
-  color_clear_value.color.float32[3] = 1.0f;
-  if (FLAGS_vulkan_random_clear_color) {
-    color_clear_value.color.float32[0] =
-        rand() / static_cast<float>(RAND_MAX);  // NOLINT(runtime/threadsafe_fn)
-    color_clear_value.color.float32[1] = 1.0f;
-    color_clear_value.color.float32[2] = 0.0f;
-  }
-  VkClearValue clear_values[] = {color_clear_value};
   VkRenderPassBeginInfo render_pass_begin_info;
   render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
   render_pass_begin_info.pNext = nullptr;
@@ -442,9 +465,8 @@ bool VulkanSwapChain::Begin() {
   render_pass_begin_info.renderArea.offset.y = 0;
   render_pass_begin_info.renderArea.extent.width = surface_width_;
   render_pass_begin_info.renderArea.extent.height = surface_height_;
-  render_pass_begin_info.clearValueCount =
-      static_cast<uint32_t>(xe::countof(clear_values));
-  render_pass_begin_info.pClearValues = clear_values;
+  render_pass_begin_info.clearValueCount = 0;
+  render_pass_begin_info.pClearValues = nullptr;
   vkCmdBeginRenderPass(render_cmd_buffer_, &render_pass_begin_info,
                        VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
 
@@ -458,6 +480,7 @@ bool VulkanSwapChain::End() {
   vkCmdEndRenderPass(render_cmd_buffer_);
 
   // Transition the image to a format the presentation engine can source from.
+  // FIXME: Do we need more synchronization here between the copy buffer?
   VkImageMemoryBarrier post_image_memory_barrier;
   post_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
   post_image_memory_barrier.pNext = nullptr;
@@ -483,14 +506,20 @@ bool VulkanSwapChain::End() {
   auto err = vkEndCommandBuffer(render_cmd_buffer_);
   CheckResult(err, "vkEndCommandBuffer");
 
+  err = vkEndCommandBuffer(copy_cmd_buffer_);
+  CheckResult(err, "vkEndCommandBuffer");
+
+  VkCommandBuffer command_buffers[] = {copy_cmd_buffer_, render_cmd_buffer_};
+
   // Submit rendering.
   VkSubmitInfo render_submit_info;
   render_submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
   render_submit_info.pNext = nullptr;
   render_submit_info.waitSemaphoreCount = 0;
   render_submit_info.pWaitSemaphores = nullptr;
-  render_submit_info.commandBufferCount = 1;
-  render_submit_info.pCommandBuffers = &render_cmd_buffer_;
+  render_submit_info.commandBufferCount =
+      static_cast<uint32_t>(xe::countof(command_buffers));
+  render_submit_info.pCommandBuffers = command_buffers;
   render_submit_info.signalSemaphoreCount = 0;
   render_submit_info.pSignalSemaphores = nullptr;
   {
diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.h b/src/xenia/ui/vulkan/vulkan_swap_chain.h
index 1d1f578c3..773a52053 100644
--- a/src/xenia/ui/vulkan/vulkan_swap_chain.h
+++ b/src/xenia/ui/vulkan/vulkan_swap_chain.h
@@ -35,11 +35,16 @@ class VulkanSwapChain {
 
   uint32_t surface_width() const { return surface_width_; }
   uint32_t surface_height() const { return surface_height_; }
+  VkImage surface_image() const {
+    return buffers_[current_buffer_index_].image;
+  }
 
   // Render pass used for compositing.
   VkRenderPass render_pass() const { return render_pass_; }
   // Render command buffer, active inside the render pass from Begin to End.
   VkCommandBuffer render_cmd_buffer() const { return render_cmd_buffer_; }
+  // Copy commands, ran before the render command buffer.
+  VkCommandBuffer copy_cmd_buffer() const { return copy_cmd_buffer_; }
 
   // Initializes the swap chain with the given WSI surface.
   bool Initialize(VkSurfaceKHR surface);
@@ -74,6 +79,7 @@ class VulkanSwapChain {
   uint32_t surface_height_ = 0;
   VkFormat surface_format_ = VK_FORMAT_UNDEFINED;
   VkCommandPool cmd_pool_ = nullptr;
+  VkCommandBuffer copy_cmd_buffer_ = nullptr;
   VkCommandBuffer render_cmd_buffer_ = nullptr;
   VkRenderPass render_pass_ = nullptr;
   VkSemaphore image_available_semaphore_ = nullptr;
diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h
index fcf9e4f8f..f5475edd8 100644
--- a/src/xenia/ui/vulkan/vulkan_util.h
+++ b/src/xenia/ui/vulkan/vulkan_util.h
@@ -25,6 +25,30 @@ namespace xe {
 namespace ui {
 namespace vulkan {
 
+class Fence {
+ public:
+  Fence(VkDevice device) : device_(device) {
+    VkFenceCreateInfo fence_info;
+    fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+    fence_info.pNext = nullptr;
+    fence_info.flags = 0;
+    vkCreateFence(device, &fence_info, nullptr, &fence_);
+  }
+  ~Fence() {
+    vkDestroyFence(device_, fence_, nullptr);
+    fence_ = nullptr;
+  }
+
+  VkResult status() const { return vkGetFenceStatus(device_, fence_); }
+
+  VkFence fence() const { return fence_; }
+  operator VkFence() const { return fence_; }
+
+ private:
+  VkDevice device_;
+  VkFence fence_ = nullptr;
+};
+
 struct Version {
   uint32_t major;
   uint32_t minor;
diff --git a/third_party/glslang-spirv/SpvBuilder.cpp b/third_party/glslang-spirv/SpvBuilder.cpp
index 0a2fa2139..13a6c946a 100644
--- a/third_party/glslang-spirv/SpvBuilder.cpp
+++ b/third_party/glslang-spirv/SpvBuilder.cpp
@@ -1166,6 +1166,7 @@ void Builder::createMemoryBarrier(unsigned executionScope, unsigned memorySemant
 // An opcode that has one operands, a result id, and a type
 Id Builder::createUnaryOp(Op opCode, Id typeId, Id operand)
 {
+    assert(operand != 0);
     Instruction* op = new Instruction(getUniqueId(), typeId, opCode);
     op->addIdOperand(operand);
     buildPoint->addInstruction(std::unique_ptr<Instruction>(op));
@@ -1175,6 +1176,8 @@ Id Builder::createUnaryOp(Op opCode, Id typeId, Id operand)
 
 Id Builder::createBinOp(Op opCode, Id typeId, Id left, Id right)
 {
+    assert(left != 0);
+    assert(right != 0);
     Instruction* op = new Instruction(getUniqueId(), typeId, opCode);
     op->addIdOperand(left);
     op->addIdOperand(right);
@@ -1185,6 +1188,9 @@ Id Builder::createBinOp(Op opCode, Id typeId, Id left, Id right)
 
 Id Builder::createTriOp(Op opCode, Id typeId, Id op1, Id op2, Id op3)
 {
+    assert(op1 != 0);
+    assert(op2 != 0);
+    assert(op3 != 0);
     Instruction* op = new Instruction(getUniqueId(), typeId, opCode);
     op->addIdOperand(op1);
     op->addIdOperand(op2);
diff --git a/third_party/glslang-spirv/SpvBuilder.h b/third_party/glslang-spirv/SpvBuilder.h
index d6dc61218..7eae4fe91 100644
--- a/third_party/glslang-spirv/SpvBuilder.h
+++ b/third_party/glslang-spirv/SpvBuilder.h
@@ -93,6 +93,8 @@ public:
         return id;
     }
 
+    Module* getModule() { return &module; }
+
     // For creating new types (will return old type if the requested one was already made).
     Id makeVoidType();
     Id makeBoolType();
@@ -517,6 +519,7 @@ public:
     void createBranch(Block* block);
     void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock);
     void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control);
+    void createSelectionMerge(Block* mergeBlock, unsigned int control);
 
  protected:
     Id makeIntConstant(Id typeId, unsigned value, bool specConstant);
@@ -527,7 +530,6 @@ public:
     void transferAccessChainSwizzle(bool dynamic);
     void simplifyAccessChainSwizzle();
     void createAndSetNoPredecessorBlock(const char*);
-    void createSelectionMerge(Block* mergeBlock, unsigned int control);
     void dumpInstructions(std::vector<unsigned int>&, const std::vector<std::unique_ptr<Instruction> >&) const;
 
     SourceLanguage source;
diff --git a/third_party/glslang-spirv/spvIR.h b/third_party/glslang-spirv/spvIR.h
index 98f4971b4..63e460ebb 100644
--- a/third_party/glslang-spirv/spvIR.h
+++ b/third_party/glslang-spirv/spvIR.h
@@ -180,6 +180,11 @@ public:
     void addInstruction(std::unique_ptr<Instruction> inst);
     void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);}
     void addLocalVariable(std::unique_ptr<Instruction> inst) { localVariables.push_back(std::move(inst)); }
+    void insertInstruction(size_t pos, std::unique_ptr<Instruction> inst);
+
+    size_t getInstructionCount() { return instructions.size(); }
+    Instruction* getInstruction(size_t i) { return instructions[i].get(); }
+    void removeInstruction(size_t i) { instructions.erase(instructions.begin() + i); }
     const std::vector<Block*>& getPredecessors() const { return predecessors; }
     const std::vector<Block*>& getSuccessors() const { return successors; }
     void setUnreachable() { unreachable = true; }
@@ -200,6 +205,10 @@ public:
 
     bool isTerminated() const
     {
+        if (instructions.size() == 0) {
+          return false;
+        }
+
         switch (instructions.back()->getOpCode()) {
         case OpBranch:
         case OpBranchConditional:
@@ -215,6 +224,7 @@ public:
 
     void dump(std::vector<unsigned int>& out) const
     {
+        // OpLabel
         instructions[0]->dump(out);
         for (int i = 0; i < (int)localVariables.size(); ++i)
             localVariables[i]->dump(out);
@@ -222,7 +232,51 @@ public:
             instructions[i]->dump(out);
     }
 
-protected:
+    // Moves all instructions from a target block into this block, and removes
+    // the target block from our list of successors.
+    // This function assumes this block unconditionally branches to the target
+    // block directly.
+    void merge(Block* target_block) {
+      if (isTerminated()) {
+        instructions.erase(instructions.end() - 1);
+      }
+
+      // Find the target block in our successors first.
+      for (auto it = successors.begin(); it != successors.end(); ++it) {
+        if (*it == target_block) {
+          it = successors.erase(it);
+          break;
+        }
+      }
+
+      // Add target block's successors to our successors.
+      successors.insert(successors.end(), target_block->successors.begin(),
+                        target_block->successors.end());
+
+      // For each successor, replace the target block in their predecessors with
+      // us.
+      for (auto block : successors) {
+        std::replace(block->predecessors.begin(), block->predecessors.end(),
+                     target_block, this);
+      }
+
+      // Move instructions from target block into this block.
+      for (auto it = target_block->instructions.begin();
+           it != target_block->instructions.end();) {
+        if ((*it)->getOpCode() == spv::Op::OpLabel) {
+          ++it;
+          continue;
+        }
+
+        instructions.push_back(std::move(*it));
+        it = target_block->instructions.erase(it);
+      }
+
+      target_block->predecessors.clear();
+      target_block->successors.clear();
+    }
+
+   protected:
     Block(const Block&);
     Block& operator=(Block&);
 
@@ -275,6 +329,17 @@ public:
     Module& getParent() const { return parent; }
     Block* getEntryBlock() const { return blocks.front(); }
     Block* getLastBlock() const { return blocks.back(); }
+    Block* findBlockById(Id id)
+    {
+      for (auto block : blocks) {
+        if (block->getId() == id) {
+          return block;
+        }
+      }
+
+      return nullptr;
+    }
+    std::vector<Block*>& getBlocks() { return blocks; }
     void addLocalVariable(std::unique_ptr<Instruction> inst);
     Id getReturnType() const { return functionInstruction.getTypeId(); }
     void dump(std::vector<unsigned int>& out) const
@@ -315,6 +380,8 @@ public:
     }
 
     void addFunction(Function *fun) { functions.push_back(fun); }
+    const std::vector<Function*>& getFunctions() const { return functions; }
+    std::vector<Function*>& getFunctions() { return functions; }
 
     void mapInstruction(Instruction *instruction)
     {
@@ -398,6 +465,14 @@ __inline void Block::addInstruction(std::unique_ptr<Instruction> inst)
         parent.getParent().mapInstruction(raw_instruction);
 }
 
+__inline void Block::insertInstruction(size_t pos, std::unique_ptr<Instruction> inst) {
+    Instruction* raw_instruction = inst.get();
+    instructions.insert(instructions.begin() + pos, std::move(inst));
+    raw_instruction->setBlock(this);
+    if (raw_instruction->getResultId())
+        parent.getParent().mapInstruction(raw_instruction);
+}
+
 };  // end spv namespace
 
 #endif // spvIR_H
diff --git a/third_party/spirv-tools.lua b/third_party/spirv-tools.lua
index 4218ff08e..afa3cdef5 100644
--- a/third_party/spirv-tools.lua
+++ b/third_party/spirv-tools.lua
@@ -13,9 +13,9 @@ project("spirv-tools")
     "spirv-tools/include",
   })
   files({
-    "spirv-tools/external/include/headers/GLSL.std.450.h",
-    "spirv-tools/external/include/headers/OpenCL.std.h",
-    "spirv-tools/external/include/headers/spirv.h",
+    "spirv-tools/include/spirv/GLSL.std.450.h",
+    "spirv-tools/include/spirv/OpenCL.std.h",
+    "spirv-tools/include/spirv/spirv.h",
     "spirv-tools/include/spirv-tools/libspirv.h",
     "spirv-tools/source/assembly_grammar.cpp",
     "spirv-tools/source/assembly_grammar.h",
diff --git a/xenia-build b/xenia-build
index 4587374c4..98330b6a5 100755
--- a/xenia-build
+++ b/xenia-build
@@ -642,8 +642,7 @@ class GenSpirvCommand(Command):
     print('Generating SPIR-V binaries...')
     print('')
 
-    # TODO(benvanik): actually find vulkan SDK. Env var? etc?
-    vulkan_sdk_path = 'C:\\VulkanSDK\\1.0.3.1'
+    vulkan_sdk_path = os.environ['VULKAN_SDK']
     vulkan_bin_path = os.path.join(vulkan_sdk_path, 'bin')
     glslang = os.path.join(vulkan_bin_path, 'glslangValidator')
     spirv_dis = os.path.join(vulkan_bin_path, 'spirv-dis')