From de1e4661ff538b20da3d3f72f87c13822b794404 Mon Sep 17 00:00:00 2001
From: Ben Vanik <ben.vanik@gmail.com>
Date: Fri, 19 Feb 2016 20:36:10 -0800
Subject: [PATCH] Not crashing (but also likely not working) EDRAM emulation.

---
 src/xenia/gpu/trace_dump.cc                   |  15 +-
 src/xenia/gpu/vulkan/pipeline_cache.cc        |   2 +-
 src/xenia/gpu/vulkan/pipeline_cache.h         |   4 +-
 src/xenia/gpu/vulkan/render_cache.cc          | 713 +++++++++++++++++-
 src/xenia/gpu/vulkan/render_cache.h           | 106 ++-
 .../gpu/vulkan/vulkan_command_processor.cc    |  36 +-
 src/xenia/gpu/vulkan/vulkan_shader.h          |   2 +-
 src/xenia/ui/vulkan/fenced_pools.h            |   3 +
 src/xenia/ui/vulkan/vulkan.cc                 |   4 +
 src/xenia/ui/vulkan/vulkan.h                  |   1 +
 src/xenia/ui/vulkan/vulkan_context.cc         |   2 +-
 src/xenia/ui/vulkan/vulkan_device.cc          |   5 +
 12 files changed, 851 insertions(+), 42 deletions(-)
diff --git a/src/xenia/gpu/trace_dump.cc b/src/xenia/gpu/trace_dump.cc
index b7804ebec..7535e0f01 100644
--- a/src/xenia/gpu/trace_dump.cc
+++ b/src/xenia/gpu/trace_dump.cc
@@ -13,6 +13,7 @@
 
 #include "third_party/stb/stb_image_write.h"
 #include "xenia/base/logging.h"
+#include "xenia/base/profiling.h"
 #include "xenia/base/string.h"
 #include "xenia/base/threading.h"
 #include "xenia/gpu/command_processor.h"
@@ -189,10 +190,16 @@ void TraceDump::Run() {
   });
 
   xe::threading::Fence capture_fence;
+  bool did_capture = false;
   loop_->PostDelayed(
       [&]() {
         // Capture.
         auto raw_image = window_->context()->Capture();
+        if (!raw_image) {
+          // Failed to capture anything.
+          capture_fence.Signal();
+          return;
+        }
 
         // Save framebuffer png.
         std::string png_path = xe::to_string(base_output_path_ + L".png");
@@ -201,6 +208,7 @@ void TraceDump::Run() {
                        raw_image->data.data(),
                        static_cast<int>(raw_image->stride));
 
+        did_capture = true;
         capture_fence.Signal();
       },
       50);
@@ -211,10 +219,13 @@ void TraceDump::Run() {
   loop_->Quit();
   loop_->AwaitQuit();
 
-  player_.reset();
-  emulator_.reset();
+  Profiler::Shutdown();
   window_.reset();
   loop_.reset();
+  player_.reset();
+  emulator_.reset();
+
+  // TODO(benvanik): die if failed to capture?
 }
 
 }  //  namespace gpu
diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc
index ec6c28eac..5416aea7f 100644
--- a/src/xenia/gpu/vulkan/pipeline_cache.cc
+++ b/src/xenia/gpu/vulkan/pipeline_cache.cc
@@ -84,7 +84,7 @@ VulkanShader* PipelineCache::LoadShader(ShaderType shader_type,
 }
 
 bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer,
-                                      VkRenderPass render_pass,
+                                      const RenderState* render_state,
                                       VulkanShader* vertex_shader,
                                       VulkanShader* pixel_shader,
                                       PrimitiveType primitive_type) {
diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h
index 00e36ef12..74c461504 100644
--- a/src/xenia/gpu/vulkan/pipeline_cache.h
+++ b/src/xenia/gpu/vulkan/pipeline_cache.h
@@ -14,6 +14,7 @@
 
 #include "xenia/gpu/register_file.h"
 #include "xenia/gpu/spirv_shader_translator.h"
+#include "xenia/gpu/vulkan/render_cache.h"
 #include "xenia/gpu/vulkan/vulkan_shader.h"
 #include "xenia/gpu/xenos.h"
 #include "xenia/ui/spirv/spirv_disassembler.h"
@@ -42,7 +43,8 @@ class PipelineCache {
   // in the command buffer is issued at this time.
   // Returns whether the pipeline could be successfully created.
   bool ConfigurePipeline(VkCommandBuffer command_buffer,
-                         VkRenderPass render_pass, VulkanShader* vertex_shader,
+                         const RenderState* render_state,
+                         VulkanShader* vertex_shader,
                          VulkanShader* pixel_shader,
                          PrimitiveType primitive_type);
 
diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc
index 32d9349e5..b477d5633 100644
--- a/src/xenia/gpu/vulkan/render_cache.cc
+++ b/src/xenia/gpu/vulkan/render_cache.cc
@@ -9,6 +9,8 @@
 
 #include "xenia/gpu/vulkan/render_cache.h"
 
+#include <algorithm>
+
 #include "xenia/base/logging.h"
 #include "xenia/base/math.h"
 #include "xenia/base/memory.h"
@@ -20,55 +22,711 @@ namespace xe {
 namespace gpu {
 namespace vulkan {
 
+using namespace xe::gpu::xenos;
 using xe::ui::vulkan::CheckResult;
 
+constexpr uint32_t kEdramBufferCapacity = 10 * 1024 * 1024;
+
+VkFormat ColorRenderTargetFormatToVkFormat(ColorRenderTargetFormat format) {
+  switch (format) {
+    case ColorRenderTargetFormat::k_8_8_8_8:
+    case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
+      return VK_FORMAT_R8G8B8A8_UNORM;
+    case ColorRenderTargetFormat::k_2_10_10_10:
+    case ColorRenderTargetFormat::k_2_10_10_10_unknown:
+      return VK_FORMAT_A2R10G10B10_UNORM_PACK32;
+    case ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
+    case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown:
+      // WARNING: this is wrong, most likely - no float form in vulkan?
+      XELOGW("Unsupported EDRAM format k_2_10_10_10_FLOAT used");
+      return VK_FORMAT_A2R10G10B10_SSCALED_PACK32;
+    case ColorRenderTargetFormat::k_16_16:
+      return VK_FORMAT_R16G16_UNORM;
+    case ColorRenderTargetFormat::k_16_16_16_16:
+      return VK_FORMAT_R16G16B16A16_UNORM;
+    case ColorRenderTargetFormat::k_16_16_FLOAT:
+      return VK_FORMAT_R16G16_SFLOAT;
+    case ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
+      return VK_FORMAT_R16G16B16A16_SFLOAT;
+    case ColorRenderTargetFormat::k_32_FLOAT:
+      return VK_FORMAT_R32_SFLOAT;
+    case ColorRenderTargetFormat::k_32_32_FLOAT:
+      return VK_FORMAT_R32G32_SFLOAT;
+    default:
+      assert_unhandled_case(key.edram_format);
+      return VK_FORMAT_UNDEFINED;
+  }
+}
+
+VkFormat DepthRenderTargetFormatToVkFormat(DepthRenderTargetFormat format) {
+  switch (format) {
+    case DepthRenderTargetFormat::kD24S8:
+      return VK_FORMAT_D24_UNORM_S8_UINT;
+    case DepthRenderTargetFormat::kD24FS8:
+      // TODO(benvanik): some way to emulate? resolve-time flag?
+      XELOGW("Unsupported EDRAM format kD24FS8 used");
+      return VK_FORMAT_D24_UNORM_S8_UINT;
+    default:
+      return VK_FORMAT_UNDEFINED;
+  }
+}
+
+// Cached view into the EDRAM memory.
+// The image is aliased to a region of the edram_memory_ based on the tile
+// parameters.
+// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible
+//     formats?
+class CachedTileView {
+ public:
+  // Key identifying the view in the cache.
+  TileViewKey key;
+  // Image mapped into EDRAM.
+  VkImage image = nullptr;
+  // Simple view on the image matching the format.
+  VkImageView image_view = nullptr;
+
+  CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
+                 TileViewKey view_key);
+  ~CachedTileView();
+
+  bool IsEqual(const TileViewKey& other_key) const {
+    auto a = reinterpret_cast<const uint64_t*>(&key);
+    auto b = reinterpret_cast<const uint64_t*>(&other_key);
+    return *a == *b;
+  }
+
+ private:
+  VkDevice device_ = nullptr;
+};
+
+// Cached framebuffer referencing tile attachments.
+// Each framebuffer is specific to a render pass. Ugh.
+class CachedFramebuffer {
+ public:
+  // TODO(benvanik): optimized key? tile base + format for each?
+
+  // Framebuffer with the attachments ready for use in the parent render pass.
+  VkFramebuffer handle = nullptr;
+  // Width of the framebuffer in pixels.
+  uint32_t width = 0;
+  // Height of the framebuffer in pixels.
+  uint32_t height = 0;
+  // References to color attachments, if used.
+  CachedTileView* color_attachments[4] = {nullptr};
+  // Reference to depth/stencil attachment, if used.
+  CachedTileView* depth_stencil_attachment = nullptr;
+
+  CachedFramebuffer(VkDevice device, VkRenderPass render_pass,
+                    uint32_t surface_width, uint32_t surface_height,
+                    CachedTileView* target_color_attachments[4],
+                    CachedTileView* target_depth_stencil_attachment);
+  ~CachedFramebuffer();
+
+  bool IsCompatible(const RenderConfiguration& desired_config) const;
+
+ private:
+  VkDevice device_ = nullptr;
+};
+
+// Cached render passes based on register states.
+// Each render pass is dependent on the format, dimensions, and use of
+// all attachments. The same render pass can be reused for multiple
+// framebuffers pointing at various tile views, though those cached
+// framebuffers are specific to the render pass.
+class CachedRenderPass {
+ public:
+  // Configuration this pass was created with.
+  RenderConfiguration config;
+  // Initialized render pass for the register state.
+  VkRenderPass handle = nullptr;
+  // Cache of framebuffers for the various tile attachments.
+  std::vector<CachedFramebuffer*> cached_framebuffers;
+
+  CachedRenderPass(VkDevice device, const RenderConfiguration& desired_config);
+  ~CachedRenderPass();
+
+  bool IsCompatible(const RenderConfiguration& desired_config) const;
+
+ private:
+  VkDevice device_ = nullptr;
+};
+
+CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
+                               TileViewKey view_key)
+    : device_(device), key(std::move(view_key)) {
+  // Map format to Vulkan.
+  VkFormat vulkan_format = VK_FORMAT_UNDEFINED;
+  uint32_t bpp = 4;
+  if (key.color_or_depth) {
+    auto edram_format = static_cast<ColorRenderTargetFormat>(key.edram_format);
+    vulkan_format = ColorRenderTargetFormatToVkFormat(edram_format);
+    switch (edram_format) {
+      case ColorRenderTargetFormat::k_16_16_16_16:
+      case ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
+      case ColorRenderTargetFormat::k_32_32_FLOAT:
+        bpp = 8;
+        break;
+      default:
+        bpp = 4;
+        break;
+    }
+  } else {
+    auto edram_format = static_cast<DepthRenderTargetFormat>(key.edram_format);
+    vulkan_format = DepthRenderTargetFormatToVkFormat(edram_format);
+  }
+  assert_true(vulkan_format != VK_FORMAT_UNDEFINED);
+  assert_true(bpp == 4);
+
+  // Create the image with the desired properties.
+  VkImageCreateInfo image_info;
+  image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
+  image_info.pNext = nullptr;
+  // TODO(benvanik): exploit VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT so we can have
+  //     multiple views.
+  image_info.flags = 0;
+  image_info.imageType = VK_IMAGE_TYPE_2D;
+  image_info.format = vulkan_format;
+  image_info.extent.width = key.tile_width * 80;
+  image_info.extent.height = key.tile_height * 16;
+  image_info.extent.depth = 1;
+  image_info.mipLevels = 1;
+  image_info.arrayLayers = 1;
+  // TODO(benvanik): native MSAA support?
+  image_info.samples = VK_SAMPLE_COUNT_1_BIT;
+  image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
+  image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+                     VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+                     VK_IMAGE_USAGE_SAMPLED_BIT;
+  image_info.usage |= key.color_or_depth
+                          ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
+                          : VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
+  image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+  image_info.queueFamilyIndexCount = 0;
+  image_info.pQueueFamilyIndices = nullptr;
+  image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
+  auto err = vkCreateImage(device_, &image_info, nullptr, &image);
+  CheckResult(err, "vkCreateImage");
+
+  // Verify our assumptions about memory layout are correct.
+  VkDeviceSize edram_offset = key.tile_offset * 5120;
+  VkMemoryRequirements memory_requirements;
+  vkGetImageMemoryRequirements(device, image, &memory_requirements);
+  assert_true(edram_offset + memory_requirements.size <= kEdramBufferCapacity);
+  assert_true(edram_offset % memory_requirements.alignment == 0);
+
+  // Bind to the region of EDRAM we occupy.
+  err = vkBindImageMemory(device_, image, edram_memory, edram_offset);
+  CheckResult(err, "vkBindImageMemory");
+
+  // Create the image view we'll use to attach it to a framebuffer.
+  VkImageViewCreateInfo image_view_info;
+  image_view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
+  image_view_info.pNext = nullptr;
+  image_view_info.flags = 0;
+  image_view_info.image = image;
+  image_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
+  image_view_info.format = image_info.format;
+  // TODO(benvanik): manipulate? may not be able to when attached.
+  image_view_info.components = {
+      VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B,
+      VK_COMPONENT_SWIZZLE_A,
+  };
+  image_view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
+  if (key.color_or_depth) {
+    image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+  } else {
+    image_view_info.subresourceRange.aspectMask =
+        VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+  }
+  err = vkCreateImageView(device_, &image_view_info, nullptr, &image_view);
+  CheckResult(err, "vkCreateImageView");
+
+  // TODO(benvanik): transition to general layout?
+}
+
+CachedTileView::~CachedTileView() {
+  vkDestroyImageView(device_, image_view, nullptr);
+  vkDestroyImage(device_, image, nullptr);
+}
+
+CachedFramebuffer::CachedFramebuffer(
+    VkDevice device, VkRenderPass render_pass, uint32_t surface_width,
+    uint32_t surface_height, CachedTileView* target_color_attachments[4],
+    CachedTileView* target_depth_stencil_attachment)
+    : device_(device),
+      width(surface_width),
+      height(surface_height),
+      depth_stencil_attachment(target_depth_stencil_attachment) {
+  for (int i = 0; i < 4; ++i) {
+    color_attachments[i] = target_color_attachments[i];
+  }
+
+  // Create framebuffer.
+  VkImageView image_views[5] = {nullptr};
+  int image_view_count = 0;
+  for (int i = 0; i < 4; ++i) {
+    if (color_attachments[i]) {
+      image_views[image_view_count++] = color_attachments[i]->image_view;
+    }
+  }
+  if (depth_stencil_attachment) {
+    image_views[image_view_count++] = depth_stencil_attachment->image_view;
+  }
+  VkFramebufferCreateInfo framebuffer_info;
+  framebuffer_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
+  framebuffer_info.pNext = nullptr;
+  framebuffer_info.renderPass = render_pass;
+  framebuffer_info.attachmentCount = image_view_count;
+  framebuffer_info.pAttachments = image_views;
+  framebuffer_info.width = width;
+  framebuffer_info.height = height;
+  framebuffer_info.layers = 1;
+  auto err = vkCreateFramebuffer(device_, &framebuffer_info, nullptr, &handle);
+  CheckResult(err, "vkCreateFramebuffer");
+}
+
+CachedFramebuffer::~CachedFramebuffer() {
+  vkDestroyFramebuffer(device_, handle, nullptr);
+}
+
+bool CachedFramebuffer::IsCompatible(
+    const RenderConfiguration& desired_config) const {
+  // We already know all render pass things line up, so let's verify dimensions,
+  // edram offsets, etc. We need an exact match.
+  // TODO(benvanik): separate image views from images in tiles and store in fb?
+  for (int i = 0; i < 4; ++i) {
+    // Ensure the the attachment points to the same tile.
+    if (!color_attachments[i]) {
+      continue;
+    }
+    auto& color_info = color_attachments[i]->key;
+    auto& desired_color_info = desired_config.color[i];
+    if (color_info.tile_offset != desired_color_info.edram_base ||
+        color_info.edram_format !=
+            static_cast<uint16_t>(desired_color_info.format)) {
+      return false;
+    }
+  }
+  // Ensure depth attachment is correct.
+  if (depth_stencil_attachment &&
+      (depth_stencil_attachment->key.tile_offset !=
+           desired_config.depth_stencil.edram_base ||
+       depth_stencil_attachment->key.edram_format !=
+           static_cast<uint16_t>(desired_config.depth_stencil.format))) {
+    return false;
+  }
+  return true;
+}
+
+CachedRenderPass::CachedRenderPass(VkDevice device,
+                                   const RenderConfiguration& desired_config)
+    : device_(device) {
+  std::memcpy(&config, &desired_config, sizeof(config));
+
+  // Initialize all attachments to default unused.
+  // As we set layout(location=RT) in shaders we must always provide 4.
+  VkAttachmentDescription attachments[5];
+  for (int i = 0; i < 4; ++i) {
+    attachments[i].flags = 0;
+    attachments[i].format = VK_FORMAT_UNDEFINED;
+    attachments[i].samples = VK_SAMPLE_COUNT_1_BIT;
+    attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+    attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+    attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+    attachments[i].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
+    attachments[i].initialLayout = VK_IMAGE_LAYOUT_GENERAL;
+    attachments[i].finalLayout = VK_IMAGE_LAYOUT_GENERAL;
+  }
+  auto& depth_stencil_attachment = attachments[4];
+  depth_stencil_attachment.flags = 0;
+  depth_stencil_attachment.format = VK_FORMAT_UNDEFINED;
+  depth_stencil_attachment.samples = VK_SAMPLE_COUNT_1_BIT;
+  depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+  depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+  depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+  depth_stencil_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
+  depth_stencil_attachment.initialLayout = VK_IMAGE_LAYOUT_GENERAL;
+  depth_stencil_attachment.finalLayout = VK_IMAGE_LAYOUT_GENERAL;
+  VkAttachmentReference depth_stencil_attachment_ref;
+  depth_stencil_attachment_ref.attachment = VK_ATTACHMENT_UNUSED;
+  depth_stencil_attachment_ref.layout = VK_IMAGE_LAYOUT_GENERAL;
+
+  // Configure attachments based on what's enabled.
+  VkAttachmentReference color_attachment_refs[4];
+  for (int i = 0; i < 4; ++i) {
+    auto& color_config = config.color[i];
+    // TODO(benvanik): see how loose we can be with these.
+    attachments[i].format =
+        ColorRenderTargetFormatToVkFormat(color_config.format);
+    auto& color_attachment_ref = color_attachment_refs[i];
+    color_attachment_ref.attachment = i;
+    color_attachment_ref.layout = VK_IMAGE_LAYOUT_GENERAL;
+  }
+  auto& depth_config = config.depth_stencil;
+  depth_stencil_attachment_ref.attachment = 4;
+  depth_stencil_attachment.format =
+      DepthRenderTargetFormatToVkFormat(depth_config.format);
+
+  // Single subpass that writes to our attachments.
+  VkSubpassDescription subpass_info;
+  subpass_info.flags = 0;
+  subpass_info.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
+  subpass_info.inputAttachmentCount = 0;
+  subpass_info.pInputAttachments = nullptr;
+  subpass_info.colorAttachmentCount = 4;
+  subpass_info.pColorAttachments = color_attachment_refs;
+  subpass_info.pResolveAttachments = nullptr;
+  subpass_info.pDepthStencilAttachment = &depth_stencil_attachment_ref;
+  subpass_info.preserveAttachmentCount = 0;
+  subpass_info.pPreserveAttachments = nullptr;
+
+  // Create the render pass.
+  VkRenderPassCreateInfo render_pass_info;
+  render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
+  render_pass_info.pNext = nullptr;
+  render_pass_info.attachmentCount = 5;
+  render_pass_info.pAttachments = attachments;
+  render_pass_info.subpassCount = 1;
+  render_pass_info.pSubpasses = &subpass_info;
+  render_pass_info.dependencyCount = 0;
+  render_pass_info.pDependencies = nullptr;
+  auto err = vkCreateRenderPass(device_, &render_pass_info, nullptr, &handle);
+  CheckResult(err, "vkCreateRenderPass");
+}
+
+CachedRenderPass::~CachedRenderPass() {
+  for (auto framebuffer : cached_framebuffers) {
+    delete framebuffer;
+  }
+  cached_framebuffers.clear();
+
+  vkDestroyRenderPass(device_, handle, nullptr);
+}
+
+bool CachedRenderPass::IsCompatible(
+    const RenderConfiguration& desired_config) const {
+  for (int i = 0; i < 4; ++i) {
+    // TODO(benvanik): allow compatible vulkan formats.
+    if (config.color[i].format != desired_config.color[i].format) {
+      return false;
+    }
+  }
+  if (config.depth_stencil.format != desired_config.depth_stencil.format) {
+    return false;
+  }
+  return true;
+}
+
 RenderCache::RenderCache(RegisterFile* register_file,
                          ui::vulkan::VulkanDevice* device)
-    : register_file_(register_file), device_(*device) {}
+    : register_file_(register_file), device_(*device) {
+  // Create the buffer we'll bind to our memory.
+  // We do this first so we can get the right memory type.
+  VkBufferCreateInfo buffer_info;
+  buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+  buffer_info.pNext = nullptr;
+  buffer_info.flags = 0;
+  buffer_info.size = kEdramBufferCapacity;
+  buffer_info.usage =
+      VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+  buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+  buffer_info.queueFamilyIndexCount = 0;
+  buffer_info.pQueueFamilyIndices = nullptr;
+  auto err = vkCreateBuffer(*device, &buffer_info, nullptr, &edram_buffer_);
+  CheckResult(err, "vkCreateBuffer");
 
-RenderCache::~RenderCache() = default;
+  // Query requirements for the buffer.
+  // It should be 1:1.
+  VkMemoryRequirements buffer_requirements;
+  vkGetBufferMemoryRequirements(device_, edram_buffer_, &buffer_requirements);
+  assert_true(buffer_requirements.size == kEdramBufferCapacity);
 
-VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
-                                          VulkanShader* vertex_shader,
-                                          VulkanShader* pixel_shader) {
+  // Create a dummy image so we can see what memory bits it requires.
+  // They should overlap with the buffer requirements but are likely more
+  // strict.
+  VkImageCreateInfo test_image_info;
+  test_image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
+  test_image_info.pNext = nullptr;
+  test_image_info.flags = 0;
+  test_image_info.imageType = VK_IMAGE_TYPE_2D;
+  test_image_info.format = VK_FORMAT_R8G8B8A8_UINT;
+  test_image_info.extent.width = 128;
+  test_image_info.extent.height = 128;
+  test_image_info.extent.depth = 1;
+  test_image_info.mipLevels = 1;
+  test_image_info.arrayLayers = 1;
+  test_image_info.samples = VK_SAMPLE_COUNT_1_BIT;
+  test_image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
+  test_image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+  test_image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+  test_image_info.queueFamilyIndexCount = 0;
+  test_image_info.pQueueFamilyIndices = nullptr;
+  test_image_info.initialLayout = VK_IMAGE_LAYOUT_GENERAL;
+  VkImage test_image = nullptr;
+  err = vkCreateImage(device_, &test_image_info, nullptr, &test_image);
+  CheckResult(err, "vkCreateImage");
+  VkMemoryRequirements image_requirements;
+  vkGetImageMemoryRequirements(device_, test_image, &image_requirements);
+  vkDestroyImage(device_, test_image, nullptr);
+  assert_true((image_requirements.memoryTypeBits &
+               buffer_requirements.memoryTypeBits) != 0);
+
+  // Allocate EDRAM memory.
+  VkMemoryRequirements memory_requirements;
+  memory_requirements.size = buffer_requirements.size;
+  memory_requirements.alignment = buffer_requirements.alignment;
+  memory_requirements.memoryTypeBits = image_requirements.memoryTypeBits;
+  // TODO(benvanik): do we need it host visible?
+  edram_memory_ = device->AllocateMemory(memory_requirements, 0);
+
+  // Bind buffer to map our entire memory.
+  vkBindBufferMemory(device_, edram_buffer_, edram_memory_, 0);
+}
+
+RenderCache::~RenderCache() {
+  // TODO(benvanik): wait for idle.
+
+  // Dispose all render passes (and their framebuffers).
+  for (auto render_pass : cached_render_passes_) {
+    delete render_pass;
+  }
+  cached_render_passes_.clear();
+
+  // Dispose all of our cached tile views.
+  for (auto tile_view : cached_tile_views_) {
+    delete tile_view;
+  }
+  cached_tile_views_.clear();
+
+  // Release underlying EDRAM memory.
+  vkDestroyBuffer(device_, edram_buffer_, nullptr);
+  vkFreeMemory(device_, edram_memory_, nullptr);
+}
+
+const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
+                                                VulkanShader* vertex_shader,
+                                                VulkanShader* pixel_shader) {
   assert_null(current_command_buffer_);
   current_command_buffer_ = command_buffer;
 
   // Lookup or construct a render pass compatible with our current state.
-  VkRenderPass render_pass = nullptr;
+  auto config = &current_state_.config;
+  CachedRenderPass* render_pass = nullptr;
+  CachedFramebuffer* framebuffer = nullptr;
+  auto& regs = shadow_registers_;
+  bool dirty = false;
+  dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
+  dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
+  dirty |= SetShadowRegister(&regs.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
+  dirty |= SetShadowRegister(&regs.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
+  dirty |= SetShadowRegister(&regs.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
+  dirty |= SetShadowRegister(&regs.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO);
+  dirty |= SetShadowRegister(&regs.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO);
+  dirty |= SetShadowRegister(&regs.pa_sc_window_scissor_tl,
+                             XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
+  dirty |= SetShadowRegister(&regs.pa_sc_window_scissor_br,
+                             XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
+  if (!dirty && current_state_.render_pass) {
+    // No registers have changed so we can reuse the previous render pass -
+    // just begin with what we had.
+    render_pass = current_state_.render_pass;
+    framebuffer = current_state_.framebuffer;
+  } else {
+    // Re-parse configuration.
+    if (!ParseConfiguration(config)) {
+      return nullptr;
+    }
 
-  // Begin render pass.
+    // Lookup or generate a new render pass and framebuffer for the new state.
+    if (!ConfigureRenderPass(config, &render_pass, &framebuffer)) {
+      return nullptr;
+    }
+    current_state_.render_pass = render_pass;
+    current_state_.framebuffer = framebuffer;
+  }
+  if (!render_pass) {
+    return nullptr;
+  }
+
+  // Setup render pass in command buffer.
+  // This is meant to preserve previous contents as we may be called
+  // repeatedly.
   VkRenderPassBeginInfo render_pass_begin_info;
   render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
   render_pass_begin_info.pNext = nullptr;
-  render_pass_begin_info.renderPass = render_pass;
-
-  // Target framebuffer.
-  // render_pass_begin_info.framebuffer = current_buffer.framebuffer;
+  render_pass_begin_info.renderPass = render_pass->handle;
+  render_pass_begin_info.framebuffer = framebuffer->handle;
 
   // Render into the entire buffer (or at least tell the API we are doing
   // this). In theory it'd be better to clip this to the scissor region, but
   // the docs warn anything but the full framebuffer may be slow.
   render_pass_begin_info.renderArea.offset.x = 0;
   render_pass_begin_info.renderArea.offset.y = 0;
-  // render_pass_begin_info.renderArea.extent.width = surface_width_;
-  // render_pass_begin_info.renderArea.extent.height = surface_height_;
+  render_pass_begin_info.renderArea.extent.width = config->surface_pitch_px;
+  render_pass_begin_info.renderArea.extent.height = config->surface_height_px;
 
   // Configure clear color, if clearing.
-  VkClearValue color_clear_value;
-  color_clear_value.color.float32[0] = 238 / 255.0f;
-  color_clear_value.color.float32[1] = 238 / 255.0f;
-  color_clear_value.color.float32[2] = 238 / 255.0f;
-  color_clear_value.color.float32[3] = 1.0f;
-  VkClearValue clear_values[] = {color_clear_value};
-  render_pass_begin_info.clearValueCount =
-      static_cast<uint32_t>(xe::countof(clear_values));
-  render_pass_begin_info.pClearValues = clear_values;
+  // TODO(benvanik): enable clearing here during resolve?
+  render_pass_begin_info.clearValueCount = 0;
+  render_pass_begin_info.pClearValues = nullptr;
 
+  // Begin the render pass.
   vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info,
                        VK_SUBPASS_CONTENTS_INLINE);
 
-  return render_pass;
+  return &current_state_;
+}
+
+bool RenderCache::ParseConfiguration(RenderConfiguration* config) {
+  auto& regs = shadow_registers_;
+
+  // RB_MODECONTROL
+  // Rough mode control (color, color+depth, etc).
+  config->mode_control = static_cast<ModeControl>(regs.rb_modecontrol & 0x7);
+
+  // RB_SURFACE_INFO
+  // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
+  config->surface_pitch_px = regs.rb_surface_info & 0x3FFF;
+  config->surface_msaa =
+      static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3);
+
+  // TODO(benvanik): verify min/max so we don't go out of bounds.
+  // TODO(benvanik): has to be a good way to get height.
+  // Guess the height from the scissor height.
+  // It's wildly inaccurate, but I've never seen it be bigger than the
+  // EDRAM tiling.
+  uint32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF;
+  uint32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y;
+  config->surface_height_px = std::min(2560u, xe::round_up(ws_h, 16));
+
+  // Color attachment configuration.
+  if (config->mode_control == ModeControl::kColorDepth) {
+    uint32_t color_info[4] = {
+        regs.rb_color_info, regs.rb_color1_info, regs.rb_color2_info,
+        regs.rb_color3_info,
+    };
+    for (int i = 0; i < 4; ++i) {
+      config->color[i].edram_base = color_info[i] & 0xFFF;
+      config->color[i].format =
+          static_cast<ColorRenderTargetFormat>((color_info[i] >> 16) & 0xF);
+      // We don't support GAMMA formats, so switch them to what we do support.
+      switch (config->color[i].format) {
+        case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
+          config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8;
+          break;
+      }
+    }
+  } else {
+    for (int i = 0; i < 4; ++i) {
+      config->color[i].edram_base = 0;
+      config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8;
+    }
+  }
+
+  // Depth/stencil attachment configuration.
+  if (config->mode_control == ModeControl::kColorDepth ||
+      config->mode_control == ModeControl::kDepth) {
+    config->depth_stencil.edram_base = regs.rb_depth_info & 0xFFF;
+    config->depth_stencil.format =
+        static_cast<DepthRenderTargetFormat>((regs.rb_depth_info >> 16) & 0x1);
+  } else {
+    config->depth_stencil.edram_base = 0;
+    config->depth_stencil.format = DepthRenderTargetFormat::kD24S8;
+  }
+
+  return true;
+}
+
+bool RenderCache::ConfigureRenderPass(RenderConfiguration* config,
+                                      CachedRenderPass** out_render_pass,
+                                      CachedFramebuffer** out_framebuffer) {
+  *out_render_pass = nullptr;
+  *out_framebuffer = nullptr;
+
+  // TODO(benvanik): better lookup.
+  // Attempt to find the render pass in our cache.
+  CachedRenderPass* render_pass = nullptr;
+  for (auto cached_render_pass : cached_render_passes_) {
+    if (cached_render_pass->IsCompatible(*config)) {
+      // Found a match.
+      render_pass = cached_render_pass;
+      break;
+    }
+  }
+
+  // If no render pass was found in the cache create a new one.
+  if (!render_pass) {
+    render_pass = new CachedRenderPass(device_, *config);
+    cached_render_passes_.push_back(render_pass);
+  }
+
+  // TODO(benvanik): better lookup.
+  // Attempt to find the framebuffer in the render pass cache.
+  CachedFramebuffer* framebuffer = nullptr;
+  for (auto cached_framebuffer : render_pass->cached_framebuffers) {
+    if (cached_framebuffer->IsCompatible(*config)) {
+      // Found a match.
+      framebuffer = cached_framebuffer;
+      break;
+    }
+  }
+
+  // If no framebuffer was found in the cache create a new one.
+  if (!framebuffer) {
+    CachedTileView* target_color_attachments[4] = {nullptr, nullptr, nullptr,
+                                                   nullptr};
+    for (int i = 0; i < 4; ++i) {
+      TileViewKey color_key;
+      color_key.tile_offset = config->color[i].edram_base;
+      color_key.tile_width = config->surface_pitch_px / 80;
+      color_key.tile_height = config->surface_height_px / 16;
+      color_key.color_or_depth = 1;
+      color_key.edram_format = static_cast<uint16_t>(config->color[i].format);
+      target_color_attachments[i] = GetTileView(color_key);
+      if (!target_color_attachments) {
+        XELOGE("Failed to get tile view for color attachment");
+        return false;
+      }
+    }
+
+    TileViewKey depth_stencil_key;
+    depth_stencil_key.tile_offset = config->depth_stencil.edram_base;
+    depth_stencil_key.tile_width = config->surface_pitch_px / 80;
+    depth_stencil_key.tile_height = config->surface_height_px / 16;
+    depth_stencil_key.color_or_depth = 0;
+    depth_stencil_key.edram_format =
+        static_cast<uint16_t>(config->depth_stencil.format);
+    auto target_depth_stencil_attachment = GetTileView(depth_stencil_key);
+    if (!target_depth_stencil_attachment) {
+      XELOGE("Failed to get tile view for depth/stencil attachment");
+      return false;
+    }
+
+    framebuffer = new CachedFramebuffer(
+        device_, render_pass->handle, config->surface_pitch_px,
+        config->surface_height_px, target_color_attachments,
+        target_depth_stencil_attachment);
+    render_pass->cached_framebuffers.push_back(framebuffer);
+  }
+
+  *out_render_pass = render_pass;
+  *out_framebuffer = framebuffer;
+  return true;
+}
+
+CachedTileView* RenderCache::GetTileView(const TileViewKey& view_key) {
+  // Check the cache.
+  // TODO(benvanik): better lookup.
+  for (auto tile_view : cached_tile_views_) {
+    if (tile_view->IsEqual(view_key)) {
+      return tile_view;
+    }
+  }
+
+  // Create a new tile and add to the cache.
+  auto tile_view = new CachedTileView(device_, edram_memory_, view_key);
+  cached_tile_views_.push_back(tile_view);
+  return tile_view;
 }
 
 void RenderCache::EndRenderPass() {
@@ -84,6 +742,15 @@ void RenderCache::ClearCache() {
   // TODO(benvanik): caching.
 }
 
+bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) {
+  uint32_t value = register_file_->values[register_name].u32;
+  if (*dest == value) {
+    return false;
+  }
+  *dest = value;
+  return true;
+}
+
 }  // namespace vulkan
 }  // namespace gpu
 }  // namespace xe
diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h
index aeaa07264..865b34cfd 100644
--- a/src/xenia/gpu/vulkan/render_cache.h
+++ b/src/xenia/gpu/vulkan/render_cache.h
@@ -21,6 +21,58 @@ namespace xe {
 namespace gpu {
 namespace vulkan {
 
+// TODO(benvanik): make public API?
+class CachedTileView;
+class CachedFramebuffer;
+class CachedRenderPass;
+
+// Uniquely identifies EDRAM tiles.
+struct TileViewKey {
+  // Offset into EDRAM in 5120b tiles.
+  uint16_t tile_offset;
+  // Tile width of the view in base 80x16 tiles.
+  uint16_t tile_width;
+  // Tile height of the view in base 80x16 tiles.
+  uint16_t tile_height;
+  // 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat.
+  uint16_t color_or_depth : 1;
+  // Either ColorRenderTargetFormat or DepthRenderTargetFormat.
+  uint16_t edram_format : 15;
+};
+static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed");
+
+// Parsed render configuration from the current render state.
+struct RenderConfiguration {
+  // Render mode (color+depth, depth-only, etc).
+  xenos::ModeControl mode_control;
+  // Target surface pitch, in pixels.
+  uint32_t surface_pitch_px;
+  // ESTIMATED target surface height, in pixels.
+  uint32_t surface_height_px;
+  // Surface MSAA setting.
+  MsaaSamples surface_msaa;
+  // Color attachments for the 4 render targets.
+  struct {
+    uint32_t edram_base;
+    ColorRenderTargetFormat format;
+  } color[4];
+  // Depth/stencil attachment.
+  struct {
+    uint32_t edram_base;
+    DepthRenderTargetFormat format;
+  } depth_stencil;
+};
+
+// Current render state based on the register-specified configuration.
+struct RenderState {
+  // Parsed configuration.
+  RenderConfiguration config;
+  // Render pass (to be used with pipelines/etc).
+  CachedRenderPass* render_pass = nullptr;
+  // Target framebuffer bound to the render pass.
+  CachedFramebuffer* framebuffer = nullptr;
+};
+
 // Manages the virtualized EDRAM and the render target cache.
 //
 // On the 360 the render target is an opaque block of memory in EDRAM that's
@@ -165,9 +217,9 @@ class RenderCache {
 
   // Begins a render pass targeting the state-specified framebuffer formats.
   // The command buffer will be transitioned into the render pass phase.
-  VkRenderPass BeginRenderPass(VkCommandBuffer command_buffer,
-                               VulkanShader* vertex_shader,
-                               VulkanShader* pixel_shader);
+  const RenderState* BeginRenderPass(VkCommandBuffer command_buffer,
+                                     VulkanShader* vertex_shader,
+                                     VulkanShader* pixel_shader);
 
   // Ends the current render pass.
   // The command buffer will be transitioned out of the render pass phase.
@@ -177,9 +229,57 @@ class RenderCache {
   void ClearCache();
 
  private:
+  // Parses the current state into a configuration object.
+  bool ParseConfiguration(RenderConfiguration* config);
+
+  // Gets or creates a render pass and frame buffer for the given configuration.
+  // This attempts to reuse as much as possible across render passes and
+  // framebuffers.
+  bool ConfigureRenderPass(RenderConfiguration* config,
+                           CachedRenderPass** out_render_pass,
+                           CachedFramebuffer** out_framebuffer);
+
+  // Gets or creates a tile view with the given parameters.
+  CachedTileView* GetTileView(const TileViewKey& view_key);
+
   RegisterFile* register_file_ = nullptr;
   VkDevice device_ = nullptr;
 
+  // Entire 10MiB of EDRAM, aliased to hell by various VkImages.
+  VkDeviceMemory edram_memory_ = nullptr;
+  // Buffer overlayed 1:1 with edram_memory_ to allow raw access.
+  VkBuffer edram_buffer_ = nullptr;
+
+  // Cache of VkImage and VkImageView's for all of our EDRAM tilings.
+  // TODO(benvanik): non-linear lookup? Should only be a small number of these.
+  std::vector<CachedTileView*> cached_tile_views_;
+
+  // Cache of render passes based on formats.
+  std::vector<CachedRenderPass*> cached_render_passes_;
+
+  // Shadows of the registers that impact the render pass we choose.
+  // If the registers don't change between passes we can quickly reuse the
+  // previous one.
+  struct ShadowRegisters {
+    uint32_t rb_modecontrol;
+    uint32_t rb_surface_info;
+    uint32_t rb_color_info;
+    uint32_t rb_color1_info;
+    uint32_t rb_color2_info;
+    uint32_t rb_color3_info;
+    uint32_t rb_depth_info;
+    uint32_t pa_sc_window_scissor_tl;
+    uint32_t pa_sc_window_scissor_br;
+
+    ShadowRegisters() { Reset(); }
+    void Reset() { std::memset(this, 0, sizeof(*this)); }
+  } shadow_registers_;
+  bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
+
+  // Configuration used for the current/previous Begin/End, representing the
+  // current shadow register state.
+  RenderState current_state_;
+
   // Only valid during a BeginRenderPass/EndRenderPass block.
   VkCommandBuffer current_command_buffer_ = nullptr;
 };
diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc
index 8047bd202..e19e89c29 100644
--- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc
+++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc
@@ -179,9 +179,17 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
   // We need the to do just about anything so validate here.
   auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader());
   auto pixel_shader = static_cast<VulkanShader*>(active_pixel_shader());
-  if (!vertex_shader || !vertex_shader->is_valid() || !pixel_shader ||
-      !pixel_shader->is_valid()) {
-    // Skipped because we can't understand the shader.
+  if (!vertex_shader || !vertex_shader->is_valid()) {
+    // Always need a vertex shader.
+    return true;
+  }
+  // Depth-only mode doesn't need a pixel shader (we'll use a fake one).
+  if (enable_mode == ModeControl::kDepth) {
+    // Use a dummy pixel shader when required.
+    // TODO(benvanik): dummy pixel shader.
+    assert_not_null(pixel_shader);
+  } else if (!pixel_shader || !pixel_shader->is_valid()) {
+    // Need a pixel shader in normal color mode.
     return true;
   }
 
@@ -198,16 +206,16 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
 
   // Begin the render pass.
   // This will setup our framebuffer and begin the pass in the command buffer.
-  VkRenderPass render_pass = render_cache_->BeginRenderPass(
+  auto render_state = render_cache_->BeginRenderPass(
       command_buffer, vertex_shader, pixel_shader);
-  if (!render_pass) {
+  if (!render_state) {
     return false;
   }
 
   // Configure the pipeline for drawing.
   // This encodes all render state (blend, depth, etc), our shader stages,
   // and our vertex input layout.
-  if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_pass,
+  if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_state,
                                           vertex_shader, pixel_shader,
                                           primitive_type)) {
     render_cache_->EndRenderPass();
@@ -215,12 +223,14 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
   }
 
   // Upload the constants the shaders require.
-  auto vertex_constant_offset = buffer_cache_->UploadConstantRegisters(
+  // These are optional, and if none are defined 0 will be returned.
+  VkDeviceSize vertex_constant_offset = buffer_cache_->UploadConstantRegisters(
       vertex_shader->constant_register_map());
-  auto pixel_constant_offset = buffer_cache_->UploadConstantRegisters(
+  VkDeviceSize pixel_constant_offset = buffer_cache_->UploadConstantRegisters(
       pixel_shader->constant_register_map());
   if (vertex_constant_offset == VK_WHOLE_SIZE ||
       pixel_constant_offset == VK_WHOLE_SIZE) {
+    // Shader wants constants but we couldn't upload them.
     render_cache_->EndRenderPass();
     return false;
   }
@@ -307,11 +317,17 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
   if (queue_mutex_) {
     queue_mutex_->lock();
   }
-  vkQueueWaitIdle(queue_);
+  err = vkQueueWaitIdle(queue_);
+  CheckResult(err, "vkQueueWaitIdle");
+  err = vkDeviceWaitIdle(*device_);
+  CheckResult(err, "vkDeviceWaitIdle");
   if (queue_mutex_) {
     queue_mutex_->unlock();
   }
-  command_buffer_pool_->Scavenge();
+  while (command_buffer_pool_->has_pending()) {
+    command_buffer_pool_->Scavenge();
+    xe::threading::MaybeYield();
+  }
   vkDestroyFence(*device_, fence, nullptr);
 
   return true;
diff --git a/src/xenia/gpu/vulkan/vulkan_shader.h b/src/xenia/gpu/vulkan/vulkan_shader.h
index 97dbd5822..1dc55d8b0 100644
--- a/src/xenia/gpu/vulkan/vulkan_shader.h
+++ b/src/xenia/gpu/vulkan/vulkan_shader.h
@@ -31,7 +31,7 @@ class VulkanShader : public Shader {
   bool Prepare();
 
  private:
-   VkDevice device_ = nullptr;
+  VkDevice device_ = nullptr;
   VkShaderModule shader_module_ = nullptr;
 };
 
diff --git a/src/xenia/ui/vulkan/fenced_pools.h b/src/xenia/ui/vulkan/fenced_pools.h
index 52274a9d9..3bc7e30f6 100644
--- a/src/xenia/ui/vulkan/fenced_pools.h
+++ b/src/xenia/ui/vulkan/fenced_pools.h
@@ -47,6 +47,9 @@ class BaseFencedPool {
     }
   }
 
+  // True if one or more batches are still pending on the GPU.
+  bool has_pending() const { return pending_batch_list_head_ != nullptr; }
+
   // Checks all pending batches for completion and scavenges their entries.
   // This should be called as frequently as reasonable.
   void Scavenge() {
diff --git a/src/xenia/ui/vulkan/vulkan.cc b/src/xenia/ui/vulkan/vulkan.cc
index ba889e109..77c8fb548 100644
--- a/src/xenia/ui/vulkan/vulkan.cc
+++ b/src/xenia/ui/vulkan/vulkan.cc
@@ -10,3 +10,7 @@
 #include "xenia/ui/vulkan/vulkan.h"
 
 DEFINE_bool(vulkan_validation, false, "Enable Vulkan validation layers.");
+
+DEFINE_bool(vulkan_primary_queue_only, false,
+            "Force the use of the primary queue, ignoring any additional that "
+            "may be present.");
diff --git a/src/xenia/ui/vulkan/vulkan.h b/src/xenia/ui/vulkan/vulkan.h
index 697c9fa57..7a7e64f10 100644
--- a/src/xenia/ui/vulkan/vulkan.h
+++ b/src/xenia/ui/vulkan/vulkan.h
@@ -30,5 +30,6 @@
 #define XELOGVK XELOGI
 
 DECLARE_bool(vulkan_validation);
+DECLARE_bool(vulkan_primary_queue_only);
 
 #endif  // XENIA_UI_VULKAN_VULKAN_H_
diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc
index a2c5998f4..381fb0ab7 100644
--- a/src/xenia/ui/vulkan/vulkan_context.cc
+++ b/src/xenia/ui/vulkan/vulkan_context.cc
@@ -142,7 +142,7 @@ void VulkanContext::EndSwap() {
 }
 
 std::unique_ptr<RawImage> VulkanContext::Capture() {
-  assert_always();
+  // TODO(benvanik): read back swap chain front buffer.
   return nullptr;
 }
 
diff --git a/src/xenia/ui/vulkan/vulkan_device.cc b/src/xenia/ui/vulkan/vulkan_device.cc
index c7ca1d974..ded29212d 100644
--- a/src/xenia/ui/vulkan/vulkan_device.cc
+++ b/src/xenia/ui/vulkan/vulkan_device.cc
@@ -129,6 +129,11 @@ bool VulkanDevice::Initialize(DeviceInfo device_info) {
     return false;
   }
 
+  // Some tools *cough* renderdoc *cough* can't handle multiple queues.
+  if (FLAGS_vulkan_primary_queue_only) {
+    queue_count = 1;
+  }
+
   VkDeviceQueueCreateInfo queue_info;
   queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
   queue_info.pNext = nullptr;