WIP rough sketch of vulkan backend structure.

2016-02-18 16:43:17 -08:00 · 2016-02-18 16:43:17 -08:00 · 4c4a641096
parent 35e08d9428
commit 4c4a641096
14 changed files with 1691 additions and 436 deletions
--- a/src/xenia/base/memory.cc
+++ b/src/xenia/base/memory.cc
@ -18,6 +18,10 @@ namespace xe {
 // http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/f2bc76cc65ffba51a141950f98e75364e49df874/entry/volk/kernels/volk/volk_32u_byteswap.h
 // http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/2c4c371885c31222362f70a1cd714415d1398021/entry/volk/kernels/volk/volk_64u_byteswap.h

+void copy_128_aligned(void* dest, const void* src, size_t count) {
+  std::memcpy(dest, src, count * 16);
+}
+
 void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src,
                              size_t count) {
  return copy_and_swap_16_unaligned(dest, src, count);
--- a/src/xenia/base/memory.h
+++ b/src/xenia/base/memory.h
@ -121,6 +121,8 @@ inline void* low_address(void* address) {
  return reinterpret_cast<void*>(uint64_t(address) & 0xFFFFFFFF);
 }

+void copy_128_aligned(void* dest, const void* src, size_t count);
+
 void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src,
                              size_t count);
 void copy_and_swap_16_unaligned(uint16_t* dest, const uint16_t* src,
--- a/src/xenia/gpu/vulkan/buffer_cache.cc
+++ b/src/xenia/gpu/vulkan/buffer_cache.cc
@ -0,0 +1,330 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/gpu/vulkan/buffer_cache.h"
+
+#include "xenia/base/logging.h"
+#include "xenia/base/math.h"
+#include "xenia/base/memory.h"
+#include "xenia/base/profiling.h"
+#include "xenia/gpu/gpu_flags.h"
+#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
+
+namespace xe {
+namespace gpu {
+namespace vulkan {
+
+using xe::ui::vulkan::CheckResult;
+
+BufferCache::BufferCache(RegisterFile* register_file,
+                         ui::vulkan::VulkanDevice* device, size_t capacity)
+    : register_file_(register_file),
+      device_(*device),
+      transient_capacity_(capacity) {
+  // Uniform buffer.
+  VkBufferCreateInfo uniform_buffer_info;
+  uniform_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+  uniform_buffer_info.pNext = nullptr;
+  uniform_buffer_info.flags = 0;
+  uniform_buffer_info.size = transient_capacity_;
+  uniform_buffer_info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+  uniform_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+  uniform_buffer_info.queueFamilyIndexCount = 0;
+  uniform_buffer_info.pQueueFamilyIndices = nullptr;
+  auto err = vkCreateBuffer(device_, &uniform_buffer_info, nullptr,
+                            &transient_uniform_buffer_);
+  CheckResult(err, "vkCreateBuffer");
+
+  // Index buffer.
+  VkBufferCreateInfo index_buffer_info;
+  index_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+  index_buffer_info.pNext = nullptr;
+  index_buffer_info.flags = 0;
+  index_buffer_info.size = transient_capacity_;
+  index_buffer_info.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
+  index_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+  index_buffer_info.queueFamilyIndexCount = 0;
+  index_buffer_info.pQueueFamilyIndices = nullptr;
+  err = vkCreateBuffer(device_, &index_buffer_info, nullptr,
+                       &transient_index_buffer_);
+  CheckResult(err, "vkCreateBuffer");
+
+  // Vertex buffer.
+  VkBufferCreateInfo vertex_buffer_info;
+  vertex_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+  vertex_buffer_info.pNext = nullptr;
+  vertex_buffer_info.flags = 0;
+  vertex_buffer_info.size = transient_capacity_;
+  vertex_buffer_info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+  vertex_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+  vertex_buffer_info.queueFamilyIndexCount = 0;
+  vertex_buffer_info.pQueueFamilyIndices = nullptr;
+  err = vkCreateBuffer(*device, &vertex_buffer_info, nullptr,
+                       &transient_vertex_buffer_);
+  CheckResult(err, "vkCreateBuffer");
+
+  // Allocate the underlying buffer we use for all storage.
+  // We query all types and take the max alignment.
+  VkMemoryRequirements uniform_buffer_requirements;
+  VkMemoryRequirements index_buffer_requirements;
+  VkMemoryRequirements vertex_buffer_requirements;
+  vkGetBufferMemoryRequirements(device_, transient_uniform_buffer_,
+                                &uniform_buffer_requirements);
+  vkGetBufferMemoryRequirements(device_, transient_index_buffer_,
+                                &index_buffer_requirements);
+  vkGetBufferMemoryRequirements(device_, transient_vertex_buffer_,
+                                &vertex_buffer_requirements);
+  uniform_buffer_alignment_ = uniform_buffer_requirements.alignment;
+  index_buffer_alignment_ = index_buffer_requirements.alignment;
+  vertex_buffer_alignment_ = vertex_buffer_requirements.alignment;
+  VkMemoryRequirements buffer_requirements;
+  buffer_requirements.size = transient_capacity_;
+  buffer_requirements.alignment =
+      std::max(uniform_buffer_requirements.alignment,
+               std::max(index_buffer_requirements.alignment,
+                        vertex_buffer_requirements.alignment));
+  buffer_requirements.memoryTypeBits =
+      uniform_buffer_requirements.memoryTypeBits |
+      index_buffer_requirements.memoryTypeBits |
+      vertex_buffer_requirements.memoryTypeBits;
+  transient_buffer_memory_ = device->AllocateMemory(
+      buffer_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+
+  // Alias all buffers to our memory.
+  vkBindBufferMemory(device_, transient_uniform_buffer_,
+                     transient_buffer_memory_, 0);
+  vkBindBufferMemory(device_, transient_index_buffer_, transient_buffer_memory_,
+                     0);
+  vkBindBufferMemory(device_, transient_vertex_buffer_,
+                     transient_buffer_memory_, 0);
+
+  // Map memory and keep it mapped while we use it.
+  err = vkMapMemory(device_, transient_buffer_memory_, 0, VK_WHOLE_SIZE, 0,
+                    &transient_buffer_data_);
+  CheckResult(err, "vkMapMemory");
+
+  // Descriptor pool used for all of our cached descriptors.
+  // In the steady state we don't allocate anything, so these are all manually
+  // managed.
+  VkDescriptorPoolCreateInfo descriptor_pool_info;
+  descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+  descriptor_pool_info.pNext = nullptr;
+  descriptor_pool_info.flags =
+      VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
+  descriptor_pool_info.maxSets = 1;
+  VkDescriptorPoolSize pool_sizes[1];
+  pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
+  pool_sizes[0].descriptorCount = 2;
+  descriptor_pool_info.poolSizeCount = 1;
+  descriptor_pool_info.pPoolSizes = pool_sizes;
+  err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr,
+                               &descriptor_pool_);
+  CheckResult(err, "vkCreateDescriptorPool");
+
+  // Create the descriptor set layout used for our uniform buffer.
+  // As it is a static binding that uses dynamic offsets during draws we can
+  // create this once and reuse it forever.
+  VkDescriptorSetLayoutBinding vertex_uniform_binding;
+  vertex_uniform_binding.binding = 0;
+  vertex_uniform_binding.descriptorType =
+      VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
+  vertex_uniform_binding.descriptorCount = 1;
+  vertex_uniform_binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
+  vertex_uniform_binding.pImmutableSamplers = nullptr;
+  VkDescriptorSetLayoutBinding fragment_uniform_binding;
+  fragment_uniform_binding.binding = 1;
+  fragment_uniform_binding.descriptorType =
+      VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
+  fragment_uniform_binding.descriptorCount = 1;
+  fragment_uniform_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
+  fragment_uniform_binding.pImmutableSamplers = nullptr;
+  VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info;
+  descriptor_set_layout_info.sType =
+      VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
+  descriptor_set_layout_info.pNext = nullptr;
+  descriptor_set_layout_info.flags = 0;
+  VkDescriptorSetLayoutBinding uniform_bindings[] = {
+      vertex_uniform_binding, fragment_uniform_binding,
+  };
+  descriptor_set_layout_info.bindingCount =
+      static_cast<uint32_t>(xe::countof(uniform_bindings));
+  descriptor_set_layout_info.pBindings = uniform_bindings;
+  err = vkCreateDescriptorSetLayout(device_, &descriptor_set_layout_info,
+                                    nullptr, &descriptor_set_layout_);
+  CheckResult(err, "vkCreateDescriptorSetLayout");
+
+  // Create the descriptor we'll use for the uniform buffer.
+  // This is what we hand out to everyone (who then also needs to use our
+  // offsets).
+  VkDescriptorSetAllocateInfo set_alloc_info;
+  set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
+  set_alloc_info.pNext = nullptr;
+  set_alloc_info.descriptorPool = descriptor_pool_;
+  set_alloc_info.descriptorSetCount = 1;
+  set_alloc_info.pSetLayouts = &descriptor_set_layout_;
+  err = vkAllocateDescriptorSets(device_, &set_alloc_info,
+                                 &transient_descriptor_set_);
+  CheckResult(err, "vkAllocateDescriptorSets");
+}
+
+BufferCache::~BufferCache() {
+  vkFreeDescriptorSets(device_, descriptor_pool_, 1,
+                       &transient_descriptor_set_);
+  vkDestroyDescriptorSetLayout(device_, descriptor_set_layout_, nullptr);
+  vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr);
+  vkUnmapMemory(device_, transient_buffer_memory_);
+  vkFreeMemory(device_, transient_buffer_memory_, nullptr);
+  vkDestroyBuffer(device_, transient_uniform_buffer_, nullptr);
+  vkDestroyBuffer(device_, transient_index_buffer_, nullptr);
+  vkDestroyBuffer(device_, transient_vertex_buffer_, nullptr);
+}
+
+VkDeviceSize BufferCache::UploadConstantRegisters(
+    const Shader::ConstantRegisterMap& constant_register_map) {
+  // Allocate space in the buffer for our data.
+  auto offset = AllocateTransientData(uniform_buffer_alignment_,
+                                      constant_register_map.packed_byte_length);
+  if (offset == VK_WHOLE_SIZE) {
+    // OOM.
+    return VK_WHOLE_SIZE;
+  }
+
+  // Run through registers and copy them into the buffer.
+  // TODO(benvanik): optimize this - it's hit twice every call.
+  const auto& values = register_file_->values;
+  uint8_t* dest_ptr =
+      reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset;
+  for (int i = 0; i < 4; ++i) {
+    auto piece = constant_register_map.float_bitmap[i];
+    if (!piece) {
+      continue;
+    }
+    for (int j = 0, sh = 0; j < 64; ++j, sh << 1) {
+      if (piece & sh) {
+        xe::copy_128_aligned(
+            dest_ptr,
+            &values[XE_GPU_REG_SHADER_CONSTANT_000_X + i * 64 + j].f32, 1);
+        dest_ptr += 16;
+      }
+    }
+  }
+  for (int i = 0; i < 32; ++i) {
+    if (constant_register_map.int_bitmap & (1 << i)) {
+      xe::store<uint32_t>(dest_ptr,
+                          values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32);
+      dest_ptr += 4;
+    }
+  }
+  for (int i = 0; i < 8; ++i) {
+    if (constant_register_map.bool_bitmap[i]) {
+      xe::store<uint32_t>(
+          dest_ptr, values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32);
+      dest_ptr += 4;
+    }
+  }
+
+  return offset;
+}
+
+std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
+    const void* source_ptr, size_t source_length, IndexFormat format) {
+  // TODO(benvanik): check cache.
+
+  // Allocate space in the buffer for our data.
+  auto offset = AllocateTransientData(index_buffer_alignment_, source_length);
+  if (offset == VK_WHOLE_SIZE) {
+    // OOM.
+    return {nullptr, VK_WHOLE_SIZE};
+  }
+
+  // Copy data into the buffer.
+  // TODO(benvanik): get min/max indices and pass back?
+  // TODO(benvanik): memcpy then use compute shaders to swap?
+  if (format == IndexFormat::kInt16) {
+    // Endian::k8in16, swap half-words.
+    xe::copy_and_swap_16_aligned(
+        reinterpret_cast<uint16_t*>(transient_buffer_data_) + offset,
+        reinterpret_cast<const uint16_t*>(source_ptr), source_length / 2);
+  } else if (format == IndexFormat::kInt32) {
+    // Endian::k8in32, swap words.
+    xe::copy_and_swap_32_aligned(
+        reinterpret_cast<uint32_t*>(transient_buffer_data_) + offset,
+        reinterpret_cast<const uint32_t*>(source_ptr), source_length / 4);
+  }
+
+  return {transient_index_buffer_, offset};
+}
+
+std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
+    const void* source_ptr, size_t source_length) {
+  // TODO(benvanik): check cache.
+
+  // Allocate space in the buffer for our data.
+  auto offset = AllocateTransientData(vertex_buffer_alignment_, source_length);
+  if (offset == VK_WHOLE_SIZE) {
+    // OOM.
+    return {nullptr, VK_WHOLE_SIZE};
+  }
+
+  // Copy data into the buffer.
+  // TODO(benvanik): memcpy then use compute shaders to swap?
+  // Endian::k8in32, swap words.
+  xe::copy_and_swap_32_aligned(
+      reinterpret_cast<uint32_t*>(transient_buffer_data_) + offset,
+      reinterpret_cast<const uint32_t*>(source_ptr), source_length / 4);
+
+  return {transient_vertex_buffer_, offset};
+}
+
+VkDeviceSize BufferCache::AllocateTransientData(size_t alignment,
+                                                size_t length) {
+  // Try to add to end, wrapping if required.
+
+  // Check to ensure there is space.
+  if (false) {
+    // Consume all fences.
+  }
+
+  // Slice off our bit.
+
+  return VK_WHOLE_SIZE;
+}
+
+void BufferCache::Flush(VkCommandBuffer command_buffer) {
+  // If we are flushing a big enough chunk queue up an event.
+  // We don't want to do this for everything but often enough so that we won't
+  // run out of space.
+  if (true) {
+    // VkEvent finish_event;
+    // vkCmdSetEvent(cmd_buffer, finish_event,
+    //              VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
+  }
+
+  // Flush memory.
+  // TODO(benvanik): subrange.
+  VkMappedMemoryRange dirty_range;
+  dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+  dirty_range.pNext = nullptr;
+  dirty_range.memory = transient_buffer_memory_;
+  dirty_range.offset = 0;
+  dirty_range.size = transient_capacity_;
+  vkFlushMappedMemoryRanges(device_, 1, &dirty_range);
+}
+
+void BufferCache::InvalidateCache() {
+  // TODO(benvanik): caching.
+}
+
+void BufferCache::ClearCache() {
+  // TODO(benvanik): caching.
+}
+
+}  // namespace vulkan
+}  // namespace gpu
+}  // namespace xe
--- a/src/xenia/gpu/vulkan/buffer_cache.h
+++ b/src/xenia/gpu/vulkan/buffer_cache.h
@ -0,0 +1,111 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_VULKAN_BUFFER_CACHE_H_
+#define XENIA_GPU_VULKAN_BUFFER_CACHE_H_
+
+#include "xenia/gpu/register_file.h"
+#include "xenia/gpu/shader.h"
+#include "xenia/gpu/xenos.h"
+#include "xenia/ui/vulkan/vulkan.h"
+#include "xenia/ui/vulkan/vulkan_device.h"
+
+namespace xe {
+namespace gpu {
+namespace vulkan {
+
+// Efficiently manages buffers of various kinds.
+// Used primarily for uploading index and vertex data from guest memory and
+// transient data like shader constants.
+class BufferCache {
+ public:
+  BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device,
+              size_t capacity);
+  ~BufferCache();
+
+  // Descriptor set containing the dynamic uniform buffer used for constant
+  // uploads. Used in conjunction with a dynamic offset returned by
+  // UploadConstantRegisters.
+  // The set contains two bindings:
+  //   binding = 0: for use in vertex shaders
+  //   binding = 1: for use in fragment shaders
+  VkDescriptorSet constant_descriptor_set() const {
+    return transient_descriptor_set_;
+  }
+
+  // Uploads the constants specified in the register maps to the transient
+  // uniform storage buffer.
+  // The registers are tightly packed in order as [floats, ints, bools].
+  // Returns an offset that can be used with the transient_descriptor_set or
+  // VK_WHOLE_SIZE if the constants could not be uploaded (OOM).
+  VkDeviceSize UploadConstantRegisters(
+      const Shader::ConstantRegisterMap& constant_register_map);
+
+  // Uploads index buffer data from guest memory, possibly eliding with
+  // recently uploaded data or cached copies.
+  // Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
+  // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
+  std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(const void* source_ptr,
+                                                      size_t source_length,
+                                                      IndexFormat format);
+
+  // Uploads vertex buffer data from guest memory, possibly eliding with
+  // recently uploaded data or cached copies.
+  // Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
+  // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
+  std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(const void* source_ptr,
+                                                       size_t source_length);
+
+  // Flushes all pending data to the GPU.
+  // Until this is called the GPU is not guaranteed to see any data.
+  // The given command buffer will be used to queue up events so that the
+  // cache can determine when data has been consumed.
+  void Flush(VkCommandBuffer command_buffer);
+
+  // Marks the cache as potentially invalid.
+  // This is not as strong as ClearCache and is a hint that any and all data
+  // should be verified before being reused.
+  void InvalidateCache();
+
+  // Clears all cached content and prevents future elision with pending data.
+  void ClearCache();
+
+ private:
+  // Allocates a block of memory in the transient buffer.
+  // Returns VK_WHOLE_SIZE if requested amount of memory is not available.
+  VkDeviceSize AllocateTransientData(size_t alignment, size_t length);
+
+  RegisterFile* register_file_ = nullptr;
+  VkDevice device_ = nullptr;
+
+  // Staging ringbuffer we cycle through fast. Used for data we don't
+  // plan on keeping past the current frame.
+  size_t transient_capacity_ = 0;
+  VkBuffer transient_uniform_buffer_ = nullptr;
+  VkBuffer transient_index_buffer_ = nullptr;
+  VkBuffer transient_vertex_buffer_ = nullptr;
+  VkDeviceMemory transient_buffer_memory_ = nullptr;
+  void* transient_buffer_data_ = nullptr;
+
+  // Required alignemnts for our various types.
+  // All allocations must start at the appropriate alignment.
+  VkDeviceSize uniform_buffer_alignment_ = 0;
+  VkDeviceSize index_buffer_alignment_ = 0;
+  VkDeviceSize vertex_buffer_alignment_ = 0;
+
+  VkDescriptorPool descriptor_pool_ = nullptr;
+  VkDescriptorSetLayout descriptor_set_layout_ = nullptr;
+  VkDescriptorSet transient_descriptor_set_ = nullptr;
+};
+
+}  // namespace vulkan
+}  // namespace gpu
+}  // namespace xe
+
+#endif  // XENIA_GPU_VULKAN_BUFFER_CACHE_H_
--- a/src/xenia/gpu/vulkan/pipeline_cache.cc
+++ b/src/xenia/gpu/vulkan/pipeline_cache.cc
@ -0,0 +1,335 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/gpu/vulkan/pipeline_cache.h"
+
+#include "third_party/xxhash/xxhash.h"
+#include "xenia/base/logging.h"
+#include "xenia/base/math.h"
+#include "xenia/base/memory.h"
+#include "xenia/base/profiling.h"
+#include "xenia/gpu/gpu_flags.h"
+#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
+
+namespace xe {
+namespace gpu {
+namespace vulkan {
+
+using xe::ui::vulkan::CheckResult;
+
+PipelineCache::PipelineCache(RegisterFile* register_file,
+                             ui::vulkan::VulkanDevice* device)
+    : register_file_(register_file), device_(*device) {}
+
+PipelineCache::~PipelineCache() {
+  // Destroy all shaders.
+  for (auto it : shader_map_) {
+    delete it.second;
+  }
+  shader_map_.clear();
+}
+
+VulkanShader* PipelineCache::LoadShader(ShaderType shader_type,
+                                        uint32_t guest_address,
+                                        const uint32_t* host_address,
+                                        uint32_t dword_count) {
+  // Hash the input memory and lookup the shader.
+  uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
+  auto it = shader_map_.find(data_hash);
+  if (it != shader_map_.end()) {
+    // Shader has been previously loaded.
+    return it->second;
+  }
+
+  // Always create the shader and stash it away.
+  // We need to track it even if it fails translation so we know not to try
+  // again.
+  VulkanShader* shader =
+      new VulkanShader(shader_type, data_hash, host_address, dword_count);
+  shader_map_.insert({data_hash, shader});
+
+  // Perform translation.
+  // If this fails the shader will be marked as invalid and ignored later.
+  if (!shader_translator_.Translate(shader)) {
+    XELOGE("Shader translation failed; marking shader as ignored");
+    return shader;
+  }
+
+  // Prepare the shader for use (creates our VkShaderModule).
+  // It could still fail at this point.
+  if (!shader->Prepare()) {
+    XELOGE("Shader preparation failed; marking shader as ignored");
+    return shader;
+  }
+
+  if (shader->is_valid()) {
+    XELOGGPU("Generated %s shader at 0x%.8X (%db):\n%s",
+             shader_type == ShaderType::kVertex ? "vertex" : "pixel",
+             guest_address, dword_count * 4,
+             shader->ucode_disassembly().c_str());
+  }
+
+  // Dump shader files if desired.
+  if (!FLAGS_dump_shaders.empty()) {
+    shader->Dump(FLAGS_dump_shaders, "vk");
+  }
+
+  return shader;
+}
+
+bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer,
+                                      VkRenderPass render_pass,
+                                      PrimitiveType primitive_type) {
+  return false;
+}
+
+void PipelineCache::ClearCache() {
+  // TODO(benvanik): caching.
+}
+
+bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) {
+  uint32_t value = register_file_->values[register_name].u32;
+  if (*dest == value) {
+    return false;
+  }
+  *dest = value;
+  return true;
+}
+
+bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) {
+  float value = register_file_->values[register_name].f32;
+  if (*dest == value) {
+    return false;
+  }
+  *dest = value;
+  return true;
+}
+
+PipelineCache::UpdateStatus PipelineCache::UpdateShaders(
+    PrimitiveType prim_type) {
+  auto& regs = update_shaders_regs_;
+
+  // These are the constant base addresses/ranges for shaders.
+  // We have these hardcoded right now cause nothing seems to differ.
+  assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 ==
+                  0x000FF000 ||
+              register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
+  assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 ==
+                  0x000FF100 ||
+              register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
+
+  bool dirty = false;
+  dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
+                             XE_GPU_REG_PA_SU_SC_MODE_CNTL);
+  dirty |= SetShadowRegister(&regs.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
+  dirty |= SetShadowRegister(&regs.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC);
+  // dirty |= regs.vertex_shader != active_vertex_shader_;
+  // dirty |= regs.pixel_shader != active_pixel_shader_;
+  dirty |= regs.prim_type != prim_type;
+  if (!dirty) {
+    return UpdateStatus::kCompatible;
+  }
+  // regs.vertex_shader = static_cast<VulkanShader*>(active_vertex_shader_);
+  // regs.pixel_shader = static_cast<VulkanShader*>(active_pixel_shader_);
+  regs.prim_type = prim_type;
+
+  SCOPE_profile_cpu_f("gpu");
+
+  return UpdateStatus::kMismatch;
+}
+
+PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargets() {
+  auto& regs = update_render_targets_regs_;
+
+  bool dirty = false;
+  dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
+  dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
+  dirty |= SetShadowRegister(&regs.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
+  dirty |= SetShadowRegister(&regs.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
+  dirty |= SetShadowRegister(&regs.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
+  dirty |= SetShadowRegister(&regs.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO);
+  dirty |= SetShadowRegister(&regs.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
+  dirty |= SetShadowRegister(&regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
+  dirty |=
+      SetShadowRegister(&regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
+  dirty |= SetShadowRegister(&regs.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO);
+  if (!dirty) {
+    return UpdateStatus::kCompatible;
+  }
+
+  SCOPE_profile_cpu_f("gpu");
+
+  return UpdateStatus::kMismatch;
+}
+
+PipelineCache::UpdateStatus PipelineCache::UpdateState(
+    PrimitiveType prim_type) {
+  bool mismatch = false;
+
+#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \
+  {                                                          \
+    if (status == UpdateStatus::kError) {                    \
+      XELOGE(error_message);                                 \
+      return status;                                         \
+    } else if (status == UpdateStatus::kMismatch) {          \
+      mismatch = true;                                       \
+    }                                                        \
+  }
+
+  UpdateStatus status;
+  status = UpdateViewportState();
+  CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state");
+  status = UpdateRasterizerState(prim_type);
+  CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
+  status = UpdateBlendState();
+  CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
+  status = UpdateDepthStencilState();
+  CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
+
+  return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
+}
+
+PipelineCache::UpdateStatus PipelineCache::UpdateViewportState() {
+  auto& regs = update_viewport_state_regs_;
+
+  bool dirty = false;
+  // dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl,
+  //     XE_GPU_REG_PA_CL_CLIP_CNTL);
+  dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
+  dirty |= SetShadowRegister(&regs.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL);
+  dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
+                             XE_GPU_REG_PA_SU_SC_MODE_CNTL);
+  dirty |= SetShadowRegister(&regs.pa_sc_window_offset,
+                             XE_GPU_REG_PA_SC_WINDOW_OFFSET);
+  dirty |= SetShadowRegister(&regs.pa_sc_window_scissor_tl,
+                             XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
+  dirty |= SetShadowRegister(&regs.pa_sc_window_scissor_br,
+                             XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
+  dirty |= SetShadowRegister(&regs.pa_cl_vport_xoffset,
+                             XE_GPU_REG_PA_CL_VPORT_XOFFSET);
+  dirty |= SetShadowRegister(&regs.pa_cl_vport_yoffset,
+                             XE_GPU_REG_PA_CL_VPORT_YOFFSET);
+  dirty |= SetShadowRegister(&regs.pa_cl_vport_zoffset,
+                             XE_GPU_REG_PA_CL_VPORT_ZOFFSET);
+  dirty |= SetShadowRegister(&regs.pa_cl_vport_xscale,
+                             XE_GPU_REG_PA_CL_VPORT_XSCALE);
+  dirty |= SetShadowRegister(&regs.pa_cl_vport_yscale,
+                             XE_GPU_REG_PA_CL_VPORT_YSCALE);
+  dirty |= SetShadowRegister(&regs.pa_cl_vport_zscale,
+                             XE_GPU_REG_PA_CL_VPORT_ZSCALE);
+
+  // Much of this state machine is extracted from:
+  // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
+  // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
+  // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
+
+  // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
+  // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
+  //            = false: multiply the X, Y coordinates by 1/W0.
+  // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
+  //           = false: multiply the Z coordinate by 1/W0.
+  // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to
+  //                    get 1/W0.
+  // draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f,
+  //                          (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f,
+  //                          (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f);
+
+  // Done in VS, no need to flush state.
+  // if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) {
+  //  draw_batcher_.set_window_scalar(1.0f, 1.0f);
+  //} else {
+  //  draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f);
+  //}
+
+  if (!dirty) {
+    return UpdateStatus::kCompatible;
+  }
+
+  return UpdateStatus::kMismatch;
+}
+
+PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState(
+    PrimitiveType prim_type) {
+  auto& regs = update_rasterizer_state_regs_;
+
+  bool dirty = false;
+  dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
+                             XE_GPU_REG_PA_SU_SC_MODE_CNTL);
+  dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_tl,
+                             XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL);
+  dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_br,
+                             XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
+  dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index,
+                             XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
+  dirty |= regs.prim_type != prim_type;
+  if (!dirty) {
+    return UpdateStatus::kCompatible;
+  }
+
+  regs.prim_type = prim_type;
+
+  SCOPE_profile_cpu_f("gpu");
+
+  return UpdateStatus::kMismatch;
+}
+
+PipelineCache::UpdateStatus PipelineCache::UpdateBlendState() {
+  auto& reg_file = *register_file_;
+  auto& regs = update_blend_state_regs_;
+
+  // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
+  // Deprecated in GL, implemented in shader.
+  // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
+  // uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32;
+  // draw_batcher_.set_alpha_test((color_control & 0x4) != 0,  //
+  // ALPAHTESTENABLE
+  //                             color_control & 0x7,         // ALPHAFUNC
+  //                             reg_file[XE_GPU_REG_RB_ALPHA_REF].f32);
+
+  bool dirty = false;
+  dirty |=
+      SetShadowRegister(&regs.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0);
+  dirty |=
+      SetShadowRegister(&regs.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1);
+  dirty |=
+      SetShadowRegister(&regs.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2);
+  dirty |=
+      SetShadowRegister(&regs.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3);
+  dirty |= SetShadowRegister(&regs.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED);
+  dirty |= SetShadowRegister(&regs.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN);
+  dirty |= SetShadowRegister(&regs.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE);
+  dirty |= SetShadowRegister(&regs.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA);
+  if (!dirty) {
+    return UpdateStatus::kCompatible;
+  }
+
+  SCOPE_profile_cpu_f("gpu");
+
+  return UpdateStatus::kMismatch;
+}
+
+PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
+  auto& regs = update_depth_stencil_state_regs_;
+
+  bool dirty = false;
+  dirty |= SetShadowRegister(&regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
+  dirty |=
+      SetShadowRegister(&regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
+  if (!dirty) {
+    return UpdateStatus::kCompatible;
+  }
+
+  SCOPE_profile_cpu_f("gpu");
+
+  return UpdateStatus::kMismatch;
+}
+
+}  // namespace vulkan
+}  // namespace gpu
+}  // namespace xe
--- a/src/xenia/gpu/vulkan/pipeline_cache.h
+++ b/src/xenia/gpu/vulkan/pipeline_cache.h
@ -0,0 +1,167 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_VULKAN_PIPELINE_CACHE_H_
+#define XENIA_GPU_VULKAN_PIPELINE_CACHE_H_
+
+#include <unordered_map>
+
+#include "xenia/gpu/register_file.h"
+#include "xenia/gpu/spirv_shader_translator.h"
+#include "xenia/gpu/vulkan/vulkan_shader.h"
+#include "xenia/gpu/xenos.h"
+#include "xenia/ui/spirv/spirv_disassembler.h"
+#include "xenia/ui/vulkan/vulkan.h"
+#include "xenia/ui/vulkan/vulkan_device.h"
+
+namespace xe {
+namespace gpu {
+namespace vulkan {
+
+// Configures and caches pipelines based on render state.
+// This is responsible for properly setting all state required for a draw
+// including shaders, various blend/etc options, and input configuration.
+class PipelineCache {
+ public:
+  PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
+  ~PipelineCache();
+
+  // Loads a shader from the cache, possibly translating it.
+  VulkanShader* LoadShader(ShaderType shader_type, uint32_t guest_address,
+                           const uint32_t* host_address, uint32_t dword_count);
+
+  // Configures a pipeline using the current render state and the given render
+  // pass. If a previously available pipeline is available it will be used,
+  // otherwise a new one may be created. Any state that can be set dynamically
+  // in the command buffer is issued at this time.
+  // Returns whether the pipeline could be successfully created.
+  bool ConfigurePipeline(VkCommandBuffer command_buffer,
+                         VkRenderPass render_pass,
+                         PrimitiveType primitive_type);
+
+  // Currently configured pipeline layout, if any.
+  VkPipelineLayout current_pipeline_layout() const { return nullptr; }
+  // Currently configured vertex shader, if any.
+  VulkanShader* current_vertex_shader() const { return nullptr; }
+  // Currently configured pixel shader, if any.
+  VulkanShader* current_pixel_shader() const { return nullptr; }
+
+  // Clears all cached content.
+  void ClearCache();
+
+ private:
+  // TODO(benvanik): geometry shader cache.
+  // TODO(benvanik): translated shader cache.
+  // TODO(benvanik): pipeline layouts.
+  // TODO(benvanik): pipeline cache.
+
+  RegisterFile* register_file_ = nullptr;
+  VkDevice device_ = nullptr;
+
+  SpirvShaderTranslator shader_translator_;
+  xe::ui::spirv::SpirvDisassembler disassembler_;
+  // All loaded shaders mapped by their guest hash key.
+  std::unordered_map<uint64_t, VulkanShader*> shader_map_;
+
+ private:
+  enum class UpdateStatus {
+    kCompatible,
+    kMismatch,
+    kError,
+  };
+
+  UpdateStatus UpdateShaders(PrimitiveType prim_type);
+  UpdateStatus UpdateRenderTargets();
+  UpdateStatus UpdateState(PrimitiveType prim_type);
+  UpdateStatus UpdateViewportState();
+  UpdateStatus UpdateRasterizerState(PrimitiveType prim_type);
+  UpdateStatus UpdateBlendState();
+  UpdateStatus UpdateDepthStencilState();
+
+  bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
+  bool SetShadowRegister(float* dest, uint32_t register_name);
+
+  struct UpdateRenderTargetsRegisters {
+    uint32_t rb_modecontrol;
+    uint32_t rb_surface_info;
+    uint32_t rb_color_info;
+    uint32_t rb_color1_info;
+    uint32_t rb_color2_info;
+    uint32_t rb_color3_info;
+    uint32_t rb_color_mask;
+    uint32_t rb_depthcontrol;
+    uint32_t rb_stencilrefmask;
+    uint32_t rb_depth_info;
+
+    UpdateRenderTargetsRegisters() { Reset(); }
+    void Reset() { std::memset(this, 0, sizeof(*this)); }
+  } update_render_targets_regs_;
+  struct UpdateViewportStateRegisters {
+    // uint32_t pa_cl_clip_cntl;
+    uint32_t rb_surface_info;
+    uint32_t pa_cl_vte_cntl;
+    uint32_t pa_su_sc_mode_cntl;
+    uint32_t pa_sc_window_offset;
+    uint32_t pa_sc_window_scissor_tl;
+    uint32_t pa_sc_window_scissor_br;
+    float pa_cl_vport_xoffset;
+    float pa_cl_vport_yoffset;
+    float pa_cl_vport_zoffset;
+    float pa_cl_vport_xscale;
+    float pa_cl_vport_yscale;
+    float pa_cl_vport_zscale;
+
+    UpdateViewportStateRegisters() { Reset(); }
+    void Reset() { std::memset(this, 0, sizeof(*this)); }
+  } update_viewport_state_regs_;
+  struct UpdateRasterizerStateRegisters {
+    uint32_t pa_su_sc_mode_cntl;
+    uint32_t pa_sc_screen_scissor_tl;
+    uint32_t pa_sc_screen_scissor_br;
+    uint32_t multi_prim_ib_reset_index;
+    PrimitiveType prim_type;
+
+    UpdateRasterizerStateRegisters() { Reset(); }
+    void Reset() { std::memset(this, 0, sizeof(*this)); }
+  } update_rasterizer_state_regs_;
+  struct UpdateBlendStateRegisters {
+    uint32_t rb_blendcontrol[4];
+    float rb_blend_rgba[4];
+
+    UpdateBlendStateRegisters() { Reset(); }
+    void Reset() { std::memset(this, 0, sizeof(*this)); }
+  } update_blend_state_regs_;
+  struct UpdateDepthStencilStateRegisters {
+    uint32_t rb_depthcontrol;
+    uint32_t rb_stencilrefmask;
+
+    UpdateDepthStencilStateRegisters() { Reset(); }
+    void Reset() { std::memset(this, 0, sizeof(*this)); }
+  } update_depth_stencil_state_regs_;
+  struct UpdateShadersRegisters {
+    PrimitiveType prim_type;
+    uint32_t pa_su_sc_mode_cntl;
+    uint32_t sq_program_cntl;
+    uint32_t sq_context_misc;
+    VulkanShader* vertex_shader;
+    VulkanShader* pixel_shader;
+
+    UpdateShadersRegisters() { Reset(); }
+    void Reset() {
+      sq_program_cntl = 0;
+      vertex_shader = pixel_shader = nullptr;
+    }
+  } update_shaders_regs_;
+};
+
+}  // namespace vulkan
+}  // namespace gpu
+}  // namespace xe
+
+#endif  // XENIA_GPU_VULKAN_PIPELINE_CACHE_H_
--- a/src/xenia/gpu/vulkan/render_cache.cc
+++ b/src/xenia/gpu/vulkan/render_cache.cc
@ -0,0 +1,45 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/gpu/vulkan/render_cache.h"
+
+#include "xenia/base/logging.h"
+#include "xenia/base/math.h"
+#include "xenia/base/memory.h"
+#include "xenia/base/profiling.h"
+#include "xenia/gpu/gpu_flags.h"
+#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
+
+namespace xe {
+namespace gpu {
+namespace vulkan {
+
+using xe::ui::vulkan::CheckResult;
+
+RenderCache::RenderCache(RegisterFile* register_file,
+                         ui::vulkan::VulkanDevice* device)
+    : register_file_(register_file), device_(*device) {}
+
+RenderCache::~RenderCache() = default;
+
+VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer) {
+  return nullptr;
+}
+
+void RenderCache::EndRenderPass() {
+  //
+}
+
+void RenderCache::ClearCache() {
+  // TODO(benvanik): caching.
+}
+
+}  // namespace vulkan
+}  // namespace gpu
+}  // namespace xe
--- a/src/xenia/gpu/vulkan/render_cache.h
+++ b/src/xenia/gpu/vulkan/render_cache.h
@ -0,0 +1,46 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_VULKAN_RENDER_CACHE_H_
+#define XENIA_GPU_VULKAN_RENDER_CACHE_H_
+
+#include "xenia/gpu/register_file.h"
+#include "xenia/gpu/shader.h"
+#include "xenia/gpu/xenos.h"
+#include "xenia/ui/vulkan/vulkan.h"
+#include "xenia/ui/vulkan/vulkan_device.h"
+
+namespace xe {
+namespace gpu {
+namespace vulkan {
+
+// Configures and caches pipelines based on render state.
+// This is responsible for properly setting all state required for a draw
+// including shaders, various blend/etc options, and input configuration.
+class RenderCache {
+ public:
+  RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
+  ~RenderCache();
+
+  VkRenderPass BeginRenderPass(VkCommandBuffer command_buffer);
+  void EndRenderPass();
+
+  // Clears all cached content.
+  void ClearCache();
+
+ private:
+  RegisterFile* register_file_ = nullptr;
+  VkDevice device_ = nullptr;
+};
+
+}  // namespace vulkan
+}  // namespace gpu
+}  // namespace xe
+
+#endif  // XENIA_GPU_VULKAN_RENDER_CACHE_H_
--- a/src/xenia/gpu/vulkan/texture_cache.cc
+++ b/src/xenia/gpu/vulkan/texture_cache.cc
@ -0,0 +1,37 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/gpu/vulkan/texture_cache.h"
+
+#include "xenia/base/logging.h"
+#include "xenia/base/math.h"
+#include "xenia/base/memory.h"
+#include "xenia/base/profiling.h"
+#include "xenia/gpu/gpu_flags.h"
+#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
+
+namespace xe {
+namespace gpu {
+namespace vulkan {
+
+using xe::ui::vulkan::CheckResult;
+
+TextureCache::TextureCache(RegisterFile* register_file,
+                           ui::vulkan::VulkanDevice* device)
+    : register_file_(register_file), device_(*device) {}
+
+TextureCache::~TextureCache() = default;
+
+void TextureCache::ClearCache() {
+  // TODO(benvanik): caching.
+}
+
+}  // namespace vulkan
+}  // namespace gpu
+}  // namespace xe
--- a/src/xenia/gpu/vulkan/texture_cache.h
+++ b/src/xenia/gpu/vulkan/texture_cache.h
@ -0,0 +1,47 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
+#define XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
+
+#include "xenia/gpu/register_file.h"
+#include "xenia/gpu/shader.h"
+#include "xenia/gpu/xenos.h"
+#include "xenia/ui/vulkan/vulkan.h"
+#include "xenia/ui/vulkan/vulkan_device.h"
+
+namespace xe {
+namespace gpu {
+namespace vulkan {
+
+// Configures and caches pipelines based on render state.
+// This is responsible for properly setting all state required for a draw
+// including shaders, various blend/etc options, and input configuration.
+class TextureCache {
+ public:
+  TextureCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
+  ~TextureCache();
+
+  // TODO(benvanik): UploadTexture.
+  // TODO(benvanik): Resolve.
+  // TODO(benvanik): ReadTexture.
+
+  // Clears all cached content.
+  void ClearCache();
+
+ private:
+  RegisterFile* register_file_ = nullptr;
+  VkDevice device_ = nullptr;
+};
+
+}  // namespace vulkan
+}  // namespace gpu
+}  // namespace xe
+
+#endif  // XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
--- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc
+++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc
@ -20,12 +20,16 @@
 #include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
 #include "xenia/gpu/vulkan/vulkan_graphics_system.h"
 #include "xenia/gpu/xenos.h"
+#include "xenia/ui/vulkan/vulkan_util.h"

 namespace xe {
 namespace gpu {
 namespace vulkan {

 using namespace xe::gpu::xenos;
+using xe::ui::vulkan::CheckResult;
+
+constexpr size_t kDefaultBufferCacheCapacity = 256 * 1024 * 1024;

 VulkanCommandProcessor::VulkanCommandProcessor(
    VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state)
@ -33,7 +37,14 @@ VulkanCommandProcessor::VulkanCommandProcessor(

 VulkanCommandProcessor::~VulkanCommandProcessor() = default;

-void VulkanCommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); }
+void VulkanCommandProcessor::ClearCaches() {
+  CommandProcessor::ClearCaches();
+
+  buffer_cache_->ClearCache();
+  pipeline_cache_->ClearCache();
+  render_cache_->ClearCache();
+  texture_cache_->ClearCache();
+}

 bool VulkanCommandProcessor::SetupContext() {
  if (!CommandProcessor::SetupContext()) {
@ -41,10 +52,47 @@ bool VulkanCommandProcessor::SetupContext() {
    return false;
  }

+  // Acquire our device and queue.
+  auto context = static_cast<xe::ui::vulkan::VulkanContext*>(context_.get());
+  device_ = context->device();
+  queue_ = device_->AcquireQueue();
+  if (!queue_) {
+    // Need to reuse primary queue (with locks).
+    queue_ = device_->primary_queue();
+    queue_mutex_ = &device_->primary_queue_mutex();
+  }
+
+  // Setup fenced pools used for all our per-frame/per-draw resources.
+  command_buffer_pool_ = std::make_unique<ui::vulkan::CommandBufferPool>(
+      *device_, device_->queue_family_index(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+
+  // Initialize the state machine caches.
+  buffer_cache_ = std::make_unique<BufferCache>(register_file_, device_,
+                                                kDefaultBufferCacheCapacity);
+  pipeline_cache_ = std::make_unique<PipelineCache>(register_file_, device_);
+  render_cache_ = std::make_unique<RenderCache>(register_file_, device_);
+  texture_cache_ = std::make_unique<TextureCache>(register_file_, device_);
+
  return true;
 }

 void VulkanCommandProcessor::ShutdownContext() {
+  // TODO(benvanik): wait until idle.
+
+  buffer_cache_.reset();
+  pipeline_cache_.reset();
+  render_cache_.reset();
+  texture_cache_.reset();
+
+  // Free all pools. This must come after all of our caches clean up.
+  command_buffer_pool_.reset();
+
+  // Release queue, if were using an acquired one.
+  if (!queue_mutex_) {
+    device_->ReleaseQueue(queue_);
+    queue_ = nullptr;
+  }
+
  CommandProcessor::ShutdownContext();
 }

@ -55,7 +103,8 @@ void VulkanCommandProcessor::MakeCoherent() {
  CommandProcessor::MakeCoherent();

  if (status_host & 0x80000000ul) {
-    // scratch_buffer_.ClearCache();
+    // TODO(benvanik): less-fine-grained clearing.
+    buffer_cache_->InvalidateCache();
  }
 }

@ -103,346 +152,167 @@ Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type,
                                           uint32_t guest_address,
                                           const uint32_t* host_address,
                                           uint32_t dword_count) {
-  // return shader_cache_.LookupOrInsertShader(shader_type, host_address,
-  //                                          dword_count);
-  return nullptr;
+  return pipeline_cache_->LoadShader(shader_type, guest_address, host_address,
+                                     dword_count);
 }

-bool VulkanCommandProcessor::IssueDraw(PrimitiveType prim_type,
+bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
                                       uint32_t index_count,
                                       IndexBufferInfo* index_buffer_info) {
+  auto& regs = *register_file_;
+
 #if FINE_GRAINED_DRAW_SCOPES
  SCOPE_profile_cpu_f("gpu");
 #endif  // FINE_GRAINED_DRAW_SCOPES

-  // Skip all drawing for now - what did you expect? :)
-  return true;
-
-  bool draw_valid = false;
-  // if (index_buffer_info) {
-  //  draw_valid = draw_batcher_.BeginDrawElements(prim_type, index_count,
-  //                                               index_buffer_info->format);
-  //} else {
-  //  draw_valid = draw_batcher_.BeginDrawArrays(prim_type, index_count);
-  //}
-  if (!draw_valid) {
-    return false;
-  }
-
-  auto& regs = *register_file_;
-
  auto enable_mode =
      static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
  if (enable_mode == ModeControl::kIgnore) {
    // Ignored.
-    // draw_batcher_.DiscardDraw();
    return true;
  } else if (enable_mode == ModeControl::kCopy) {
    // Special copy handling.
-    // draw_batcher_.DiscardDraw();
    return IssueCopy();
  }

-#define CHECK_ISSUE_UPDATE_STATUS(status, mismatch, error_message)  \
-  {                                                                 \
-    if (status == UpdateStatus::kError) {                           \
-      XELOGE(error_message);                                        \
-      /*draw_batcher_.DiscardDraw();                             */ \
-      return false;                                                 \
-    } else if (status == UpdateStatus::kMismatch) {                 \
-      mismatch = true;                                              \
-    }                                                               \
-  }
+  // TODO(benvanik): bigger batches.
+  command_buffer_pool_->BeginBatch();
+  VkCommandBuffer command_buffer = command_buffer_pool_->AcquireEntry();
+  VkCommandBufferBeginInfo command_buffer_begin_info;
+  command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+  command_buffer_begin_info.pNext = nullptr;
+  command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+  command_buffer_begin_info.pInheritanceInfo = nullptr;
+  auto err = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info);
+  CheckResult(err, "vkBeginCommandBuffer");

-  UpdateStatus status;
-  bool mismatch = false;
-  status = UpdateShaders(prim_type);
-  CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to prepare draw shaders");
-  status = UpdateRenderTargets();
-  CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render targets");
-  // if (!active_framebuffer_) {
-  //  // No framebuffer, so nothing we do will actually have an effect.
-  //  // Treat it as a no-op.
-  //  // TODO(benvanik): if we have a vs export, still allow it to go.
-  //  draw_batcher_.DiscardDraw();
-  //  return true;
-  //}
-
-  status = UpdateState(prim_type);
-  CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render state");
-  status = PopulateSamplers();
-  CHECK_ISSUE_UPDATE_STATUS(status, mismatch,
-                            "Unable to prepare draw samplers");
-
-  status = PopulateIndexBuffer(index_buffer_info);
-  CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup index buffer");
-  status = PopulateVertexBuffers();
-  CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup vertex buffers");
-
-  // if (!draw_batcher_.CommitDraw()) {
-  //  return false;
-  //}
-
-  // draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent);
-  if (context_->WasLost()) {
-    // This draw lost us the context. This typically isn't hit.
-    assert_always();
+  // Begin the render pass.
+  // This will setup our framebuffer and begin the pass in the command buffer.
+  VkRenderPass render_pass = render_cache_->BeginRenderPass(command_buffer);
+  if (!render_pass) {
    return false;
  }

+  // Configure the pipeline for drawing.
+  // This encodes all render state (blend, depth, etc), our shader stages,
+  // and our vertex input layout.
+  if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_pass,
+                                          primitive_type)) {
+    render_cache_->EndRenderPass();
+    return false;
+  }
+
+  // Upload the constants the shaders require.
+  auto vertex_shader = pipeline_cache_->current_vertex_shader();
+  auto pixel_shader = pipeline_cache_->current_pixel_shader();
+  auto vertex_constant_offset = buffer_cache_->UploadConstantRegisters(
+      vertex_shader->constant_register_map());
+  auto pixel_constant_offset = buffer_cache_->UploadConstantRegisters(
+      pixel_shader->constant_register_map());
+  if (vertex_constant_offset == VK_WHOLE_SIZE ||
+      pixel_constant_offset == VK_WHOLE_SIZE) {
+    render_cache_->EndRenderPass();
+    return false;
+  }
+
+  // Configure constant uniform access to point at our offsets.
+  auto constant_descriptor_set = buffer_cache_->constant_descriptor_set();
+  auto pipeline_layout = pipeline_cache_->current_pipeline_layout();
+  uint32_t constant_offsets[2] = {static_cast<uint32_t>(vertex_constant_offset),
+                                  static_cast<uint32_t>(pixel_constant_offset)};
+  vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                          pipeline_layout, 0, 1, &constant_descriptor_set,
+                          static_cast<uint32_t>(xe::countof(constant_offsets)),
+                          constant_offsets);
+
+  // Upload and bind index buffer data (if we have any).
+  if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) {
+    render_cache_->EndRenderPass();
+    return false;
+  }
+
+  // Upload and bind all vertex buffer data.
+  if (!PopulateVertexBuffers(command_buffer, vertex_shader)) {
+    render_cache_->EndRenderPass();
+    return false;
+  }
+
+  // Upload and set descriptors for all textures.
+  if (!PopulateSamplers(command_buffer, vertex_shader, pixel_shader)) {
+    render_cache_->EndRenderPass();
+    return false;
+  }
+
+#if 0
+  // Actually issue the draw.
+  if (!index_buffer_info) {
+    // Auto-indexed draw.
+    uint32_t instance_count = 1;
+    uint32_t first_vertex = 0;
+    uint32_t first_instance = 0;
+    vkCmdDraw(command_buffer, index_count, instance_count, first_vertex,
+              first_instance);
+  } else {
+    // Index buffer draw.
+    uint32_t instance_count = 1;
+    uint32_t first_index =
+        register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32;
+    uint32_t vertex_offset = 0;
+    uint32_t first_instance = 0;
+    vkCmdDrawIndexed(command_buffer, index_count, instance_count, first_index,
+                     vertex_offset, first_instance);
+  }
+#endif
+
+  // End the rendering pass.
+  render_cache_->EndRenderPass();
+
+  // TODO(benvanik): bigger batches.
+  err = vkEndCommandBuffer(command_buffer);
+  CheckResult(err, "vkEndCommandBuffer");
+  VkFence fence;
+  VkFenceCreateInfo fence_info;
+  fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+  fence_info.pNext = nullptr;
+  fence_info.flags = 0;
+  vkCreateFence(*device_, &fence_info, nullptr, &fence);
+  command_buffer_pool_->EndBatch(fence);
+  VkSubmitInfo submit_info;
+  submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+  submit_info.pNext = nullptr;
+  submit_info.waitSemaphoreCount = 0;
+  submit_info.pWaitSemaphores = nullptr;
+  submit_info.commandBufferCount = 1;
+  submit_info.pCommandBuffers = &command_buffer;
+  submit_info.signalSemaphoreCount = 0;
+  submit_info.pSignalSemaphores = nullptr;
+  if (queue_mutex_) {
+    queue_mutex_->lock();
+  }
+  err = vkQueueSubmit(queue_, 1, &submit_info, fence);
+  if (queue_mutex_) {
+    queue_mutex_->unlock();
+  }
+  CheckResult(err, "vkQueueSubmit");
+  if (queue_mutex_) {
+    queue_mutex_->lock();
+  }
+  vkQueueWaitIdle(queue_);
+  if (queue_mutex_) {
+    queue_mutex_->unlock();
+  }
+  command_buffer_pool_->Scavenge();
+  vkDestroyFence(*device_, fence, nullptr);
+
  return true;
 }

-bool VulkanCommandProcessor::SetShadowRegister(uint32_t* dest,
-                                               uint32_t register_name) {
-  uint32_t value = register_file_->values[register_name].u32;
-  if (*dest == value) {
-    return false;
-  }
-  *dest = value;
-  return true;
-}
-
-bool VulkanCommandProcessor::SetShadowRegister(float* dest,
-                                               uint32_t register_name) {
-  float value = register_file_->values[register_name].f32;
-  if (*dest == value) {
-    return false;
-  }
-  *dest = value;
-  return true;
-}
-
-VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateShaders(
-    PrimitiveType prim_type) {
-  auto& regs = update_shaders_regs_;
-
-  // These are the constant base addresses/ranges for shaders.
-  // We have these hardcoded right now cause nothing seems to differ.
-  assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 ==
-                  0x000FF000 ||
-              register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
-  assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 ==
-                  0x000FF100 ||
-              register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
-
-  bool dirty = false;
-  dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
-                             XE_GPU_REG_PA_SU_SC_MODE_CNTL);
-  dirty |= SetShadowRegister(&regs.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
-  dirty |= SetShadowRegister(&regs.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC);
-  dirty |= regs.vertex_shader != active_vertex_shader_;
-  dirty |= regs.pixel_shader != active_pixel_shader_;
-  dirty |= regs.prim_type != prim_type;
-  if (!dirty) {
-    return UpdateStatus::kCompatible;
-  }
-  regs.vertex_shader = static_cast<VulkanShader*>(active_vertex_shader_);
-  regs.pixel_shader = static_cast<VulkanShader*>(active_pixel_shader_);
-  regs.prim_type = prim_type;
-
-  SCOPE_profile_cpu_f("gpu");
-
-  return UpdateStatus::kMismatch;
-}
-
-VulkanCommandProcessor::UpdateStatus
-VulkanCommandProcessor::UpdateRenderTargets() {
-  auto& regs = update_render_targets_regs_;
-
-  bool dirty = false;
-  dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
-  dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
-  dirty |= SetShadowRegister(&regs.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
-  dirty |= SetShadowRegister(&regs.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
-  dirty |= SetShadowRegister(&regs.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
-  dirty |= SetShadowRegister(&regs.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO);
-  dirty |= SetShadowRegister(&regs.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
-  dirty |= SetShadowRegister(&regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
-  dirty |=
-      SetShadowRegister(&regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
-  dirty |= SetShadowRegister(&regs.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO);
-  if (!dirty) {
-    return UpdateStatus::kCompatible;
-  }
-
-  SCOPE_profile_cpu_f("gpu");
-
-  return UpdateStatus::kMismatch;
-}
-
-VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateState(
-    PrimitiveType prim_type) {
-  bool mismatch = false;
-
-#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \
-  {                                                          \
-    if (status == UpdateStatus::kError) {                    \
-      XELOGE(error_message);                                 \
-      return status;                                         \
-    } else if (status == UpdateStatus::kMismatch) {          \
-      mismatch = true;                                       \
-    }                                                        \
-  }
-
-  UpdateStatus status;
-  status = UpdateViewportState();
-  CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state");
-  status = UpdateRasterizerState(prim_type);
-  CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
-  status = UpdateBlendState();
-  CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
-  status = UpdateDepthStencilState();
-  CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
-
-  return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
-}
-
-VulkanCommandProcessor::UpdateStatus
-VulkanCommandProcessor::UpdateViewportState() {
-  auto& regs = update_viewport_state_regs_;
-
-  bool dirty = false;
-  // dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl,
-  //     XE_GPU_REG_PA_CL_CLIP_CNTL);
-  dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
-  dirty |= SetShadowRegister(&regs.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL);
-  dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
-                             XE_GPU_REG_PA_SU_SC_MODE_CNTL);
-  dirty |= SetShadowRegister(&regs.pa_sc_window_offset,
-                             XE_GPU_REG_PA_SC_WINDOW_OFFSET);
-  dirty |= SetShadowRegister(&regs.pa_sc_window_scissor_tl,
-                             XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
-  dirty |= SetShadowRegister(&regs.pa_sc_window_scissor_br,
-                             XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
-  dirty |= SetShadowRegister(&regs.pa_cl_vport_xoffset,
-                             XE_GPU_REG_PA_CL_VPORT_XOFFSET);
-  dirty |= SetShadowRegister(&regs.pa_cl_vport_yoffset,
-                             XE_GPU_REG_PA_CL_VPORT_YOFFSET);
-  dirty |= SetShadowRegister(&regs.pa_cl_vport_zoffset,
-                             XE_GPU_REG_PA_CL_VPORT_ZOFFSET);
-  dirty |= SetShadowRegister(&regs.pa_cl_vport_xscale,
-                             XE_GPU_REG_PA_CL_VPORT_XSCALE);
-  dirty |= SetShadowRegister(&regs.pa_cl_vport_yscale,
-                             XE_GPU_REG_PA_CL_VPORT_YSCALE);
-  dirty |= SetShadowRegister(&regs.pa_cl_vport_zscale,
-                             XE_GPU_REG_PA_CL_VPORT_ZSCALE);
-
-  // Much of this state machine is extracted from:
-  // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
-  // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
-  // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
-
-  // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
-  // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
-  //            = false: multiply the X, Y coordinates by 1/W0.
-  // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
-  //           = false: multiply the Z coordinate by 1/W0.
-  // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to
-  //                    get 1/W0.
-  // draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f,
-  //                          (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f,
-  //                          (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f);
-
-  // Done in VS, no need to flush state.
-  // if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) {
-  //  draw_batcher_.set_window_scalar(1.0f, 1.0f);
-  //} else {
-  //  draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f);
-  //}
-
-  if (!dirty) {
-    return UpdateStatus::kCompatible;
-  }
-
-  return UpdateStatus::kMismatch;
-}
-
-VulkanCommandProcessor::UpdateStatus
-VulkanCommandProcessor::UpdateRasterizerState(PrimitiveType prim_type) {
-  auto& regs = update_rasterizer_state_regs_;
-
-  bool dirty = false;
-  dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
-                             XE_GPU_REG_PA_SU_SC_MODE_CNTL);
-  dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_tl,
-                             XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL);
-  dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_br,
-                             XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
-  dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index,
-                             XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
-  dirty |= regs.prim_type != prim_type;
-  if (!dirty) {
-    return UpdateStatus::kCompatible;
-  }
-
-  regs.prim_type = prim_type;
-
-  SCOPE_profile_cpu_f("gpu");
-
-  return UpdateStatus::kMismatch;
-}
-
-VulkanCommandProcessor::UpdateStatus
-VulkanCommandProcessor::UpdateBlendState() {
-  auto& reg_file = *register_file_;
-  auto& regs = update_blend_state_regs_;
-
-  // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
-  // Deprecated in GL, implemented in shader.
-  // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
-  // uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32;
-  // draw_batcher_.set_alpha_test((color_control & 0x4) != 0,  //
-  // ALPAHTESTENABLE
-  //                             color_control & 0x7,         // ALPHAFUNC
-  //                             reg_file[XE_GPU_REG_RB_ALPHA_REF].f32);
-
-  bool dirty = false;
-  dirty |=
-      SetShadowRegister(&regs.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0);
-  dirty |=
-      SetShadowRegister(&regs.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1);
-  dirty |=
-      SetShadowRegister(&regs.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2);
-  dirty |=
-      SetShadowRegister(&regs.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3);
-  dirty |= SetShadowRegister(&regs.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED);
-  dirty |= SetShadowRegister(&regs.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN);
-  dirty |= SetShadowRegister(&regs.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE);
-  dirty |= SetShadowRegister(&regs.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA);
-  if (!dirty) {
-    return UpdateStatus::kCompatible;
-  }
-
-  SCOPE_profile_cpu_f("gpu");
-
-  return UpdateStatus::kMismatch;
-}
-
-VulkanCommandProcessor::UpdateStatus
-VulkanCommandProcessor::UpdateDepthStencilState() {
-  auto& regs = update_depth_stencil_state_regs_;
-
-  bool dirty = false;
-  dirty |= SetShadowRegister(&regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
-  dirty |=
-      SetShadowRegister(&regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
-  if (!dirty) {
-    return UpdateStatus::kCompatible;
-  }
-
-  SCOPE_profile_cpu_f("gpu");
-
-  return UpdateStatus::kMismatch;
-}
-
-VulkanCommandProcessor::UpdateStatus
-VulkanCommandProcessor::PopulateIndexBuffer(
-    IndexBufferInfo* index_buffer_info) {
+bool VulkanCommandProcessor::PopulateIndexBuffer(
+    VkCommandBuffer command_buffer, IndexBufferInfo* index_buffer_info) {
  auto& regs = *register_file_;
  if (!index_buffer_info || !index_buffer_info->guest_base) {
    // No index buffer or auto draw.
-    return UpdateStatus::kCompatible;
+    return true;
  }
  auto& info = *index_buffer_info;

@ -462,19 +332,44 @@ VulkanCommandProcessor::PopulateIndexBuffer(

  trace_writer_.WriteMemoryRead(info.guest_base, info.length);

-  return UpdateStatus::kCompatible;
+  // Upload (or get a cached copy of) the buffer.
+  const void* source_ptr =
+      memory_->TranslatePhysical<const void*>(info.guest_base);
+  size_t source_length =
+      info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
+                                                       : sizeof(uint16_t));
+  auto buffer_ref =
+      buffer_cache_->UploadIndexBuffer(source_ptr, source_length, info.format);
+  if (buffer_ref.second == VK_WHOLE_SIZE) {
+    // Failed to upload buffer.
+    return false;
+  }
+
+  // Bind the buffer.
+  VkIndexType index_type = info.format == IndexFormat::kInt32
+                               ? VK_INDEX_TYPE_UINT32
+                               : VK_INDEX_TYPE_UINT16;
+  vkCmdBindIndexBuffer(command_buffer, buffer_ref.first, buffer_ref.second,
+                       index_type);
+
+  return true;
 }

-VulkanCommandProcessor::UpdateStatus
-VulkanCommandProcessor::PopulateVertexBuffers() {
+bool VulkanCommandProcessor::PopulateVertexBuffers(
+    VkCommandBuffer command_buffer, VulkanShader* vertex_shader) {
+  auto& regs = *register_file_;
+
 #if FINE_GRAINED_DRAW_SCOPES
  SCOPE_profile_cpu_f("gpu");
 #endif  // FINE_GRAINED_DRAW_SCOPES

-  auto& regs = *register_file_;
-  assert_not_null(active_vertex_shader_);
+  auto& vertex_bindings = vertex_shader->vertex_bindings();
+  assert_true(vertex_bindings.size() <= 32);
+  VkBuffer all_buffers[32];
+  VkDeviceSize all_buffer_offsets[32];
+  uint32_t buffer_index = 0;

-  for (const auto& vertex_binding : active_vertex_shader_->vertex_bindings()) {
+  for (const auto& vertex_binding : vertex_bindings) {
    int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
            (vertex_binding.fetch_constant / 3) * 6;
    const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(&regs.values[r]);
@ -492,58 +387,72 @@ VulkanCommandProcessor::PopulateVertexBuffers() {
    }
    assert_true(fetch->endian == 2);

+    // TODO(benvanik): compute based on indices or vertex count.
+    //     THIS CAN BE MASSIVELY INCORRECT (too large).
    size_t valid_range = size_t(fetch->size * 4);

    trace_writer_.WriteMemoryRead(fetch->address << 2, valid_range);
+
+    // Upload (or get a cached copy of) the buffer.
+    const void* source_ptr =
+        memory_->TranslatePhysical<const void*>(fetch->address << 2);
+    size_t source_length = valid_range;
+    auto buffer_ref =
+        buffer_cache_->UploadVertexBuffer(source_ptr, source_length);
+    if (buffer_ref.second == VK_WHOLE_SIZE) {
+      // Failed to upload buffer.
+      return false;
+    }
+
+    // Stash the buffer reference for our bulk bind at the end.
+    all_buffers[buffer_index] = buffer_ref.first;
+    all_buffer_offsets[buffer_index] = buffer_ref.second;
+    ++buffer_index;
  }

-  return UpdateStatus::kCompatible;
+  // Bind buffers.
+  vkCmdBindVertexBuffers(command_buffer, 0, buffer_index, all_buffers,
+                         all_buffer_offsets);
+
+  return true;
 }

-VulkanCommandProcessor::UpdateStatus
-VulkanCommandProcessor::PopulateSamplers() {
+bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer,
+                                              VulkanShader* vertex_shader,
+                                              VulkanShader* pixel_shader) {
 #if FINE_GRAINED_DRAW_SCOPES
  SCOPE_profile_cpu_f("gpu");
 #endif  // FINE_GRAINED_DRAW_SCOPES

-  bool mismatch = false;
+  bool any_failed = false;

  // VS and PS samplers are shared, but may be used exclusively.
  // We walk each and setup lazily.
  bool has_setup_sampler[32] = {false};

  // Vertex texture samplers.
-  for (auto& texture_binding : active_vertex_shader_->texture_bindings()) {
+  for (auto& texture_binding : vertex_shader->texture_bindings()) {
    if (has_setup_sampler[texture_binding.fetch_constant]) {
      continue;
    }
    has_setup_sampler[texture_binding.fetch_constant] = true;
-    auto status = PopulateSampler(texture_binding);
-    if (status == UpdateStatus::kError) {
-      return status;
-    } else if (status == UpdateStatus::kMismatch) {
-      mismatch = true;
-    }
+    any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed;
  }

  // Pixel shader texture sampler.
-  for (auto& texture_binding : active_pixel_shader_->texture_bindings()) {
+  for (auto& texture_binding : pixel_shader->texture_bindings()) {
    if (has_setup_sampler[texture_binding.fetch_constant]) {
      continue;
    }
    has_setup_sampler[texture_binding.fetch_constant] = true;
-    auto status = PopulateSampler(texture_binding);
-    if (status == UpdateStatus::kError) {
-      return UpdateStatus::kError;
-    } else if (status == UpdateStatus::kMismatch) {
-      mismatch = true;
-    }
+    any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed;
  }

-  return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
+  return !any_failed;
 }

-VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler(
+bool VulkanCommandProcessor::PopulateSampler(
+    VkCommandBuffer command_buffer,
    const Shader::TextureBinding& texture_binding) {
  auto& regs = *register_file_;
  int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
@ -553,30 +462,34 @@ VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler(

  // ?
  if (!fetch.type) {
-    return UpdateStatus::kCompatible;
+    return true;
  }
  assert_true(fetch.type == 0x2);

  TextureInfo texture_info;
  if (!TextureInfo::Prepare(fetch, &texture_info)) {
    XELOGE("Unable to parse texture fetcher info");
-    return UpdateStatus::kCompatible;  // invalid texture used
+    return true;  // invalid texture used
  }
  SamplerInfo sampler_info;
  if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr,
                            &sampler_info)) {
    XELOGE("Unable to parse sampler info");
-    return UpdateStatus::kCompatible;  // invalid texture used
+    return true;  // invalid texture used
  }

  trace_writer_.WriteMemoryRead(texture_info.guest_address,
                                texture_info.input_length);

-  return UpdateStatus::kCompatible;
+  // TODO(benvanik): texture cache lookup.
+  // TODO(benvanik): bind or return so PopulateSamplers can batch.
+
+  return true;
 }

 bool VulkanCommandProcessor::IssueCopy() {
  SCOPE_profile_cpu_f("gpu");
+  // TODO(benvanik): resolve.
  return true;
 }

--- a/src/xenia/gpu/vulkan/vulkan_command_processor.h
+++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h
@ -23,12 +23,17 @@
 #include "xenia/base/threading.h"
 #include "xenia/gpu/command_processor.h"
 #include "xenia/gpu/register_file.h"
-#include "xenia/gpu/spirv_shader_translator.h"
+#include "xenia/gpu/vulkan/buffer_cache.h"
+#include "xenia/gpu/vulkan/pipeline_cache.h"
+#include "xenia/gpu/vulkan/render_cache.h"
+#include "xenia/gpu/vulkan/texture_cache.h"
 #include "xenia/gpu/vulkan/vulkan_shader.h"
 #include "xenia/gpu/xenos.h"
 #include "xenia/kernel/xthread.h"
 #include "xenia/memory.h"
+#include "xenia/ui/vulkan/fenced_pools.h"
 #include "xenia/ui/vulkan/vulkan_context.h"
+#include "xenia/ui/vulkan/vulkan_device.h"

 namespace xe {
 namespace gpu {
@ -45,12 +50,6 @@ class VulkanCommandProcessor : public CommandProcessor {
  void ClearCaches() override;

 private:
-  enum class UpdateStatus {
-    kCompatible,
-    kMismatch,
-    kError,
-  };
-
  bool SetupContext() override;
  void ShutdownContext() override;

@ -65,97 +64,35 @@ class VulkanCommandProcessor : public CommandProcessor {
                     const uint32_t* host_address,
                     uint32_t dword_count) override;

-  bool IssueDraw(PrimitiveType prim_type, uint32_t index_count,
+  bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count,
                 IndexBufferInfo* index_buffer_info) override;
-  UpdateStatus UpdateShaders(PrimitiveType prim_type);
-  UpdateStatus UpdateRenderTargets();
-  UpdateStatus UpdateState(PrimitiveType prim_type);
-  UpdateStatus UpdateViewportState();
-  UpdateStatus UpdateRasterizerState(PrimitiveType prim_type);
-  UpdateStatus UpdateBlendState();
-  UpdateStatus UpdateDepthStencilState();
-  UpdateStatus PopulateIndexBuffer(IndexBufferInfo* index_buffer_info);
-  UpdateStatus PopulateVertexBuffers();
-  UpdateStatus PopulateSamplers();
-  UpdateStatus PopulateSampler(const Shader::TextureBinding& texture_binding);
+  bool PopulateIndexBuffer(VkCommandBuffer command_buffer,
+                           IndexBufferInfo* index_buffer_info);
+  bool PopulateVertexBuffers(VkCommandBuffer command_buffer,
+                             VulkanShader* vertex_shader);
+  bool PopulateSamplers(VkCommandBuffer command_buffer,
+                        VulkanShader* vertex_shader,
+                        VulkanShader* pixel_shader);
+  bool PopulateSampler(VkCommandBuffer command_buffer,
+                       const Shader::TextureBinding& texture_binding);
  bool IssueCopy() override;

-  SpirvShaderTranslator shader_translator_;
+  xe::ui::vulkan::VulkanDevice* device_ = nullptr;

- private:
-  bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
-  bool SetShadowRegister(float* dest, uint32_t register_name);
-  struct UpdateRenderTargetsRegisters {
-    uint32_t rb_modecontrol;
-    uint32_t rb_surface_info;
-    uint32_t rb_color_info;
-    uint32_t rb_color1_info;
-    uint32_t rb_color2_info;
-    uint32_t rb_color3_info;
-    uint32_t rb_color_mask;
-    uint32_t rb_depthcontrol;
-    uint32_t rb_stencilrefmask;
-    uint32_t rb_depth_info;
+  // TODO(benvanik): abstract behind context?
+  // Queue used to submit work. This may be a dedicated queue for the command
+  // processor and no locking will be required for use. If a dedicated queue
+  // was not available this will be the device primary_queue and the
+  // queue_mutex must be used to synchronize access to it.
+  VkQueue queue_ = nullptr;
+  std::mutex* queue_mutex_ = nullptr;

-    UpdateRenderTargetsRegisters() { Reset(); }
-    void Reset() { std::memset(this, 0, sizeof(*this)); }
-  } update_render_targets_regs_;
-  struct UpdateViewportStateRegisters {
-    // uint32_t pa_cl_clip_cntl;
-    uint32_t rb_surface_info;
-    uint32_t pa_cl_vte_cntl;
-    uint32_t pa_su_sc_mode_cntl;
-    uint32_t pa_sc_window_offset;
-    uint32_t pa_sc_window_scissor_tl;
-    uint32_t pa_sc_window_scissor_br;
-    float pa_cl_vport_xoffset;
-    float pa_cl_vport_yoffset;
-    float pa_cl_vport_zoffset;
-    float pa_cl_vport_xscale;
-    float pa_cl_vport_yscale;
-    float pa_cl_vport_zscale;
+  std::unique_ptr<BufferCache> buffer_cache_;
+  std::unique_ptr<PipelineCache> pipeline_cache_;
+  std::unique_ptr<RenderCache> render_cache_;
+  std::unique_ptr<TextureCache> texture_cache_;

-    UpdateViewportStateRegisters() { Reset(); }
-    void Reset() { std::memset(this, 0, sizeof(*this)); }
-  } update_viewport_state_regs_;
-  struct UpdateRasterizerStateRegisters {
-    uint32_t pa_su_sc_mode_cntl;
-    uint32_t pa_sc_screen_scissor_tl;
-    uint32_t pa_sc_screen_scissor_br;
-    uint32_t multi_prim_ib_reset_index;
-    PrimitiveType prim_type;
-
-    UpdateRasterizerStateRegisters() { Reset(); }
-    void Reset() { std::memset(this, 0, sizeof(*this)); }
-  } update_rasterizer_state_regs_;
-  struct UpdateBlendStateRegisters {
-    uint32_t rb_blendcontrol[4];
-    float rb_blend_rgba[4];
-
-    UpdateBlendStateRegisters() { Reset(); }
-    void Reset() { std::memset(this, 0, sizeof(*this)); }
-  } update_blend_state_regs_;
-  struct UpdateDepthStencilStateRegisters {
-    uint32_t rb_depthcontrol;
-    uint32_t rb_stencilrefmask;
-
-    UpdateDepthStencilStateRegisters() { Reset(); }
-    void Reset() { std::memset(this, 0, sizeof(*this)); }
-  } update_depth_stencil_state_regs_;
-  struct UpdateShadersRegisters {
-    PrimitiveType prim_type;
-    uint32_t pa_su_sc_mode_cntl;
-    uint32_t sq_program_cntl;
-    uint32_t sq_context_misc;
-    VulkanShader* vertex_shader;
-    VulkanShader* pixel_shader;
-
-    UpdateShadersRegisters() { Reset(); }
-    void Reset() {
-      sq_program_cntl = 0;
-      vertex_shader = pixel_shader = nullptr;
-    }
-  } update_shaders_regs_;
+  std::unique_ptr<ui::vulkan::CommandBufferPool> command_buffer_pool_;
 };

 }  // namespace vulkan
--- a/src/xenia/ui/vulkan/fenced_pools.cc
+++ b/src/xenia/ui/vulkan/fenced_pools.cc
@ -0,0 +1,81 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/ui/vulkan/fenced_pools.h"
+
+#include "xenia/base/assert.h"
+#include "xenia/base/math.h"
+#include "xenia/ui/vulkan/vulkan_util.h"
+
+namespace xe {
+namespace ui {
+namespace vulkan {
+
+using xe::ui::vulkan::CheckResult;
+
+CommandBufferPool::CommandBufferPool(VkDevice device,
+                                     uint32_t queue_family_index,
+                                     VkCommandBufferLevel level)
+    : BaseFencedPool(device), level_(level) {
+  // Create the pool used for allocating buffers.
+  // They are marked as transient (short-lived) and cycled frequently.
+  VkCommandPoolCreateInfo cmd_pool_info;
+  cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+  cmd_pool_info.pNext = nullptr;
+  cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
+                        VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
+  cmd_pool_info.queueFamilyIndex = queue_family_index;
+  auto err =
+      vkCreateCommandPool(device_, &cmd_pool_info, nullptr, &command_pool_);
+  CheckResult(err, "vkCreateCommandPool");
+
+  // Allocate a bunch of command buffers to start.
+  constexpr uint32_t kDefaultCount = 32;
+  VkCommandBufferAllocateInfo command_buffer_info;
+  command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+  command_buffer_info.pNext = nullptr;
+  command_buffer_info.commandPool = command_pool_;
+  command_buffer_info.level = level;
+  command_buffer_info.commandBufferCount = kDefaultCount;
+  VkCommandBuffer command_buffers[kDefaultCount];
+  err =
+      vkAllocateCommandBuffers(device_, &command_buffer_info, command_buffers);
+  CheckResult(err, "vkCreateCommandBuffer");
+  for (size_t i = 0; i < xe::countof(command_buffers); ++i) {
+    PushEntry(command_buffers[i]);
+  }
+}
+
+CommandBufferPool::~CommandBufferPool() {
+  vkDestroyCommandPool(device_, command_pool_, nullptr);
+  command_pool_ = nullptr;
+}
+
+VkCommandBuffer CommandBufferPool::AllocateEntry() {
+  // TODO(benvanik): allocate a bunch at once?
+  VkCommandBufferAllocateInfo command_buffer_info;
+  command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+  command_buffer_info.pNext = nullptr;
+  command_buffer_info.commandPool = command_pool_;
+  command_buffer_info.level = level_;
+  command_buffer_info.commandBufferCount = 1;
+  VkCommandBuffer command_buffer;
+  auto err =
+      vkAllocateCommandBuffers(device_, &command_buffer_info, &command_buffer);
+  CheckResult(err, "vkCreateCommandBuffer");
+  return command_buffer;
+}
+
+void CommandBufferPool::FreeEntry(VkCommandBuffer handle) {
+  vkFreeCommandBuffers(device_, command_pool_, 1, &handle);
+}
+
+}  // namespace vulkan
+}  // namespace ui
+}  // namespace xe
--- a/src/xenia/ui/vulkan/fenced_pools.h
+++ b/src/xenia/ui/vulkan/fenced_pools.h
@ -0,0 +1,200 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_UI_VULKAN_FENCED_POOLS_H_
+#define XENIA_UI_VULKAN_FENCED_POOLS_H_
+
+#include <memory>
+
+#include "xenia/base/assert.h"
+#include "xenia/ui/vulkan/vulkan.h"
+
+namespace xe {
+namespace ui {
+namespace vulkan {
+
+// Simple pool for Vulkan homogenous objects that cannot be reused while
+// in-flight.
+// It batches pooled objects into groups and uses a vkQueueSubmit fence to
+// indicate their availability. If no objects are free when one is requested
+// the caller is expected to create them.
+template <typename T, typename HANDLE>
+class BaseFencedPool {
+ public:
+  BaseFencedPool(VkDevice device) : device_(device) {}
+
+  virtual ~BaseFencedPool() {
+    // TODO(benvanik): wait on fence until done.
+    assert_null(pending_batch_list_head_);
+
+    // Run down free lists.
+    while (free_batch_list_head_) {
+      auto batch = free_batch_list_head_;
+      free_batch_list_head_ = batch->next;
+      delete batch;
+    }
+    while (free_entry_list_head_) {
+      auto entry = free_entry_list_head_;
+      free_entry_list_head_ = entry->next;
+      static_cast<T*>(this)->FreeEntry(entry->handle);
+      delete entry;
+    }
+  }
+
+  // Checks all pending batches for completion and scavenges their entries.
+  // This should be called as frequently as reasonable.
+  void Scavenge() {
+    while (pending_batch_list_head_) {
+      auto batch = pending_batch_list_head_;
+      if (vkGetFenceStatus(device_, batch->fence) == VK_SUCCESS) {
+        // Batch has completed. Reclaim.
+        pending_batch_list_head_ = batch->next;
+        if (batch == pending_batch_list_tail_) {
+          pending_batch_list_tail_ = nullptr;
+        }
+        batch->next = free_batch_list_head_;
+        free_batch_list_head_ = batch;
+        batch->entry_list_tail->next = free_entry_list_head_;
+        free_entry_list_head_ = batch->entry_list_head;
+        batch->entry_list_head = nullptr;
+        batch->entry_list_tail = nullptr;
+      } else {
+        // Batch is still in-flight. Since batches are executed in order we know
+        // no others after it could have completed, so early-exit.
+        return;
+      }
+    }
+  }
+
+  // Begins a new batch.
+  // All entries acquired within this batch will be marked as in-use until
+  // the fence specified in EndBatch is signalled.
+  void BeginBatch() {
+    assert_null(open_batch_);
+    Batch* batch = nullptr;
+    if (free_batch_list_head_) {
+      // Reuse a batch.
+      batch = free_batch_list_head_;
+      free_batch_list_head_ = batch->next;
+      batch->next = nullptr;
+    } else {
+      // Allocate new batch.
+      batch = new Batch();
+      batch->next = nullptr;
+    }
+    batch->entry_list_head = nullptr;
+    batch->entry_list_tail = nullptr;
+    batch->fence = nullptr;
+    open_batch_ = batch;
+  }
+
+  // Attempts to acquire an entry from the pool in the current batch.
+  // If none are available a new one will be allocated.
+  HANDLE AcquireEntry() {
+    Entry* entry = nullptr;
+    if (free_entry_list_head_) {
+      // Slice off an entry from the free list.
+      entry = free_entry_list_head_;
+      free_entry_list_head_ = entry->next;
+    } else {
+      // No entry available; allocate new.
+      entry = new Entry();
+      entry->handle = static_cast<T*>(this)->AllocateEntry();
+    }
+    entry->next = nullptr;
+    if (!open_batch_->entry_list_head) {
+      open_batch_->entry_list_head = entry;
+    }
+    if (open_batch_->entry_list_tail) {
+      open_batch_->entry_list_tail->next = entry;
+    }
+    open_batch_->entry_list_tail = entry;
+    return entry->handle;
+  }
+
+  // Ends the current batch using the given fence to indicate when the batch
+  // has completed execution on the GPU.
+  void EndBatch(VkFence fence) {
+    assert_not_null(open_batch_);
+
+    // Close and see if we have anything.
+    auto batch = open_batch_;
+    open_batch_ = nullptr;
+    if (!batch->entry_list_head) {
+      // Nothing to do.
+      batch->next = free_batch_list_head_;
+      free_batch_list_head_ = batch;
+      return;
+    }
+
+    // Track the fence.
+    batch->fence = fence;
+
+    // Append to the end of the batch list.
+    batch->next = nullptr;
+    if (!pending_batch_list_head_) {
+      pending_batch_list_head_ = batch;
+    }
+    if (pending_batch_list_tail_) {
+      pending_batch_list_tail_->next = batch;
+    } else {
+      pending_batch_list_tail_ = batch;
+    }
+  }
+
+ protected:
+  void PushEntry(HANDLE handle) {
+    auto entry = new Entry();
+    entry->next = free_entry_list_head_;
+    entry->handle = handle;
+    free_entry_list_head_ = entry;
+  }
+
+  VkDevice device_ = nullptr;
+
+ private:
+  struct Entry {
+    Entry* next;
+    HANDLE handle;
+  };
+  struct Batch {
+    Batch* next;
+    Entry* entry_list_head;
+    Entry* entry_list_tail;
+    VkFence fence;
+  };
+
+  Batch* free_batch_list_head_ = nullptr;
+  Entry* free_entry_list_head_ = nullptr;
+  Batch* pending_batch_list_head_ = nullptr;
+  Batch* pending_batch_list_tail_ = nullptr;
+  Batch* open_batch_ = nullptr;
+};
+
+class CommandBufferPool
+    : public BaseFencedPool<CommandBufferPool, VkCommandBuffer> {
+ public:
+  CommandBufferPool(VkDevice device, uint32_t queue_family_index,
+                    VkCommandBufferLevel level);
+  ~CommandBufferPool() override;
+
+ protected:
+  friend class BaseFencedPool<CommandBufferPool, VkCommandBuffer>;
+  VkCommandBuffer AllocateEntry();
+  void FreeEntry(VkCommandBuffer handle);
+
+  VkCommandPool command_pool_ = nullptr;
+  VkCommandBufferLevel level_ = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+};
+
+}  // namespace vulkan
+}  // namespace ui
+}  // namespace xe
+
+#endif  // XENIA_UI_VULKAN_FENCED_POOLS_H_