diff --git a/src/xenia/gpu/buffer.cc b/src/xenia/gpu/buffer.cc
deleted file mode 100644
index 499cb43a6..000000000
--- a/src/xenia/gpu/buffer.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/buffer.h>
-
-#include <xenia/gpu/xenos/ucode_disassembler.h>
-
-
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::xenos;
-
-
-Buffer::Buffer(
-    const uint8_t* src_ptr, size_t length) :
-    src_(src_ptr), length_(length) {
-}
-
-Buffer::~Buffer() {
-}
-
-IndexBuffer::IndexBuffer(const IndexBufferInfo& info,
-                         const uint8_t* src_ptr, size_t length)
-    : Buffer(src_ptr, length),
-      info_(info) {
-}
-
-IndexBuffer::~IndexBuffer() {}
-
-VertexBuffer::VertexBuffer(const VertexBufferInfo& info,
-                           const uint8_t* src_ptr, size_t length)
-    : Buffer(src_ptr, length),
-      info_(info) {
-}
-
-VertexBuffer::~VertexBuffer() {}
diff --git a/src/xenia/gpu/buffer.h b/src/xenia/gpu/buffer.h
deleted file mode 100644
index 9c8e3c654..000000000
--- a/src/xenia/gpu/buffer.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_BUFFER_H_
-#define XENIA_GPU_BUFFER_H_
-
-#include <xenia/core.h>
-#include <xenia/gpu/xenos/ucode.h>
-#include <xenia/gpu/xenos/xenos.h>
-
-
-namespace xe {
-namespace gpu {
-
-
-class Buffer {
-public:
-  Buffer(const uint8_t* src_ptr, size_t length);
-  virtual ~Buffer();
-
-  const uint8_t* src() const { return src_; }
-  size_t length() const { return length_; }
-  uint64_t hash() const { return hash_; }
-
-  virtual bool FetchNew(uint64_t hash) = 0;
-  virtual bool FetchDirty(uint64_t hash) = 0;
-
-protected:
-  const uint8_t* src_;
-  size_t      length_;
-  uint64_t    hash_;
-};
-
-
-struct IndexBufferInfo {
-  bool index_32bit;
-  uint32_t index_count;
-  uint32_t index_size;
-  uint32_t endianness;
-};
-
-
-class IndexBuffer : public Buffer {
-public:
-  IndexBuffer(const IndexBufferInfo& info,
-              const uint8_t* src_ptr, size_t length);
-  virtual ~IndexBuffer();
-
-protected:
-  IndexBufferInfo info_;
-};
-
-
-struct VertexBufferLayout {
-  uint32_t stride_words;
-  uint32_t element_count;
-  struct {
-    uint32_t format;
-    uint32_t offset_words;
-    uint32_t size_words;
-  } elements[16];
-};
-
-struct VertexBufferInfo {
-  VertexBufferLayout layout;
-};
-
-
-class VertexBuffer : public Buffer {
-public:
-  VertexBuffer(const VertexBufferInfo& info,
-               const uint8_t* src_ptr, size_t length);
-  virtual ~VertexBuffer();
-
-protected:
-  VertexBufferInfo info_;
-};
-
-
-
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_BUFFER_H_
diff --git a/src/xenia/gpu/buffer_cache.cc b/src/xenia/gpu/buffer_cache.cc
deleted file mode 100644
index cc963d817..000000000
--- a/src/xenia/gpu/buffer_cache.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/buffer_cache.h>
-
-#include <xenia/gpu/buffer.h>
-
-
-using namespace std;
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::xenos;
-
-
-BufferCache::BufferCache() {
-}
-
-BufferCache::~BufferCache() {
-  Clear();
-}
-
-IndexBuffer* BufferCache::FetchIndexBuffer(
-    const IndexBufferInfo& info,
-    const uint8_t* src_ptr, size_t length) {
-  size_t key = hash_combine(info.endianness, info.index_32bit, info.index_count, info.index_size);
-  size_t hash = xe_hash64(src_ptr, length);
-  auto it = index_buffer_map_.find(key);
-  if (it != index_buffer_map_.end()) {
-    if (hash == it->second->hash()) {
-      return it->second;
-    } else {
-      return it->second->FetchDirty(hash) ? it->second : nullptr;
-    }
-  } else {
-    auto buffer = CreateIndexBuffer(info, src_ptr, length);
-    index_buffer_map_.insert({ key, buffer });
-    if (!buffer->FetchNew(hash)) {
-      return nullptr;
-    }
-    return buffer;
-  }
-}
-
-VertexBuffer* BufferCache::FetchVertexBuffer(
-    const VertexBufferInfo& info,
-    const uint8_t* src_ptr, size_t length) {
-  size_t key = reinterpret_cast<size_t>(src_ptr);
-  size_t hash = xe_hash64(src_ptr, length);
-  auto it = vertex_buffer_map_.find(key);
-  if (it != vertex_buffer_map_.end()) {
-    if (hash == it->second->hash()) {
-      return it->second;
-    } else {
-      return it->second->FetchDirty(hash) ? it->second : nullptr;
-    }
-  } else {
-    auto buffer = CreateVertexBuffer(info, src_ptr, length);
-    vertex_buffer_map_.insert({ key, buffer });
-    if (!buffer->FetchNew(hash)) {
-      return nullptr;
-    }
-    return buffer;
-  }
-}
-
-void BufferCache::Clear() {
-  for (auto it = index_buffer_map_.begin();
-       it != index_buffer_map_.end(); ++it) {
-    auto buffer = it->second;
-    delete buffer;
-  }
-  index_buffer_map_.clear();
-}
diff --git a/src/xenia/gpu/buffer_cache.h b/src/xenia/gpu/buffer_cache.h
deleted file mode 100644
index bcba6f9de..000000000
--- a/src/xenia/gpu/buffer_cache.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_BUFFER_CACHE_H_
-#define XENIA_GPU_BUFFER_CACHE_H_
-
-#include <xenia/core.h>
-#include <xenia/gpu/buffer.h>
-#include <xenia/gpu/xenos/xenos.h>
-
-
-namespace xe {
-namespace gpu {
-
-
-class BufferCache {
-public:
-  BufferCache();
-  virtual ~BufferCache();
-
-  IndexBuffer* FetchIndexBuffer(
-      const IndexBufferInfo& info,
-      const uint8_t* src_ptr, size_t length);
-
-  VertexBuffer* FetchVertexBuffer(
-      const VertexBufferInfo& info,
-      const uint8_t* src_ptr, size_t length);
-
-  void Clear();
-
-protected:
-  virtual IndexBuffer* CreateIndexBuffer(
-      const IndexBufferInfo& info,
-      const uint8_t* src_ptr, size_t length) = 0;
-  virtual VertexBuffer* CreateVertexBuffer(
-      const VertexBufferInfo& info,
-      const uint8_t* src_ptr, size_t length) = 0;
-
-private:
-  std::unordered_map<uint64_t, IndexBuffer*> index_buffer_map_;
-  std::unordered_map<uint64_t, VertexBuffer*> vertex_buffer_map_;
-};
-
-
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_BUFFER_CACHE_H_
diff --git a/src/xenia/gpu/buffer_resource.cc b/src/xenia/gpu/buffer_resource.cc
index d6019d95f..9f9accb9b 100644
--- a/src/xenia/gpu/buffer_resource.cc
+++ b/src/xenia/gpu/buffer_resource.cc
@@ -15,3 +15,42 @@ using namespace xe;
 using namespace xe::gpu;
 using namespace xe::gpu::xenos;
 
+
+BufferResource::BufferResource(const MemoryRange& memory_range)
+    : PagedResource(memory_range) {
+}
+
+BufferResource::~BufferResource() = default;
+
+int BufferResource::Prepare() {
+  if (!handle()) {
+    if (CreateHandle()) {
+      XELOGE("Unable to create buffer handle");
+      return 1;
+    }
+  }
+
+  if (!dirtied_) {
+    return 0;
+  }
+  dirtied_ = false;
+
+  // pass dirty regions?
+  return InvalidateRegion(memory_range_);
+}
+
+IndexBufferResource::IndexBufferResource(const MemoryRange& memory_range,
+                                         const Info& info)
+    : BufferResource(memory_range),
+      info_(info) {
+}
+
+IndexBufferResource::~IndexBufferResource() = default;
+
+VertexBufferResource::VertexBufferResource(const MemoryRange& memory_range,
+                                          const Info& info)
+    : BufferResource(memory_range),
+      info_(info) {
+}
+
+VertexBufferResource::~VertexBufferResource() = default;
diff --git a/src/xenia/gpu/buffer_resource.h b/src/xenia/gpu/buffer_resource.h
index 385a5049a..a88d1ae06 100644
--- a/src/xenia/gpu/buffer_resource.h
+++ b/src/xenia/gpu/buffer_resource.h
@@ -10,7 +10,8 @@
 #ifndef XENIA_GPU_BUFFER_RESOURCE_H_
 #define XENIA_GPU_BUFFER_RESOURCE_H_
 
-#include <xenia/core.h>
+#include <xenia/gpu/resource.h>
+#include <xenia/gpu/xenos/ucode.h>
 #include <xenia/gpu/xenos/xenos.h>
 
 
@@ -18,8 +19,76 @@ namespace xe {
 namespace gpu {
 
 
-class BufferResource : public Resource {
+class BufferResource : public PagedResource {
 public:
+  BufferResource(const MemoryRange& memory_range);
+  ~BufferResource() override;
+
+  virtual int Prepare();
+
+protected:
+  virtual int CreateHandle() = 0;
+  virtual int InvalidateRegion(const MemoryRange& memory_range) = 0;
+};
+
+
+enum IndexFormat {
+  INDEX_FORMAT_16BIT = 0,
+  INDEX_FORMAT_32BIT = 1,
+};
+
+class IndexBufferResource : public BufferResource {
+public:
+  struct Info {
+    IndexFormat format;
+    xenos::XE_GPU_ENDIAN endianness;
+  };
+
+  IndexBufferResource(const MemoryRange& memory_range,
+                      const Info& info);
+  ~IndexBufferResource() override;
+
+  const Info& info() const { return info_; }
+
+  bool Equals(const void* info_ptr, size_t info_length) override {
+    return info_length == sizeof(Info) &&
+           memcmp(info_ptr, &info_, info_length) == 0;
+  }
+
+protected:
+  Info info_;
+};
+
+
+class VertexBufferResource : public BufferResource {
+public:
+  struct DeclElement {
+    xenos::instr_fetch_vtx_t vtx_fetch;
+    uint32_t format;
+    uint32_t offset_words;
+    uint32_t size_words;
+    bool is_signed;
+    bool is_normalized;
+  };
+  struct Info {
+    uint32_t stride_words;
+    uint32_t element_count;
+    DeclElement elements[16];
+  };
+
+  VertexBufferResource(const MemoryRange& memory_range,
+                       const Info& info);
+  ~VertexBufferResource() override;
+
+  const Info& info() const { return info_; }
+
+  bool Equals(const void* info_ptr, size_t info_length) override {
+    return info_length == sizeof(Info) &&
+           memcmp(info_ptr, &info_, info_length) == 0;
+  }
+
+protected:
+  Info info_;
 };
 
 
diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc
index bb7a02bc7..c7a6a166b 100644
--- a/src/xenia/gpu/command_processor.cc
+++ b/src/xenia/gpu/command_processor.cc
@@ -9,9 +9,782 @@
 
 #include <xenia/gpu/command_processor.h>
 
+#include <xenia/gpu/gpu-private.h>
+#include <xenia/gpu/graphics_driver.h>
+#include <xenia/gpu/graphics_system.h>
+#include <xenia/gpu/xenos/packets.h>
+
 
-using namespace std;
 using namespace xe;
 using namespace xe::gpu;
 using namespace xe::gpu::xenos;
 
+
+#define XETRACECP(fmt, ...) if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__)
+
+
+CommandProcessor::CommandProcessor(
+    GraphicsSystem* graphics_system, Memory* memory) :
+    graphics_system_(graphics_system), memory_(memory), driver_(0) {
+  write_ptr_index_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
+
+  primary_buffer_ptr_     = 0;
+  primary_buffer_size_    = 0;
+  read_ptr_index_         = 0;
+  read_ptr_update_freq_   = 0;
+  read_ptr_writeback_ptr_ = 0;
+  write_ptr_index_        = 0;
+  write_ptr_max_index_    = 0;
+
+  LARGE_INTEGER perf_counter;
+  QueryPerformanceCounter(&perf_counter);
+  time_base_ = perf_counter.QuadPart;
+  counter_ = 0;
+}
+
+CommandProcessor::~CommandProcessor() {
+  SetEvent(write_ptr_index_event_);
+  CloseHandle(write_ptr_index_event_);
+}
+
+uint64_t CommandProcessor::QueryTime() {
+  LARGE_INTEGER perf_counter;
+  QueryPerformanceCounter(&perf_counter);
+  return perf_counter.QuadPart - time_base_;
+}
+
+void CommandProcessor::Initialize(GraphicsDriver* driver,
+                                  uint32_t ptr, uint32_t page_count) {
+  driver_               = driver;
+  primary_buffer_ptr_   = ptr;
+  // Not sure this is correct, but it's a way to take the page_count back to
+  // the number of bytes allocated by the physical alloc.
+  uint32_t original_size = 1 << (0x1C - page_count - 1);
+  primary_buffer_size_  = original_size;
+  read_ptr_index_       = 0;
+
+  // Tell the driver what to use for translation.
+  driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF);
+}
+
+void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr,
+                                                  uint32_t block_size) {
+  // CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C
+  // ptr = RB_RPTR_ADDR, pointer to write back the address to.
+  read_ptr_writeback_ptr_ = (primary_buffer_ptr_ & ~0x1FFFFFFF) + ptr;
+  // CP_RB_CNTL Ring Buffer Control 0x704
+  // block_size = RB_BLKSZ, number of quadwords read between updates of the
+  //              read pointer.
+  read_ptr_update_freq_ = (uint32_t)pow(2.0, (double)block_size) / 4;
+}
+
+void CommandProcessor::UpdateWritePointer(uint32_t value) {
+  write_ptr_max_index_  = MAX(write_ptr_max_index_, value);
+  write_ptr_index_      = value;
+  SetEvent(write_ptr_index_event_);
+}
+
+void CommandProcessor::Pump() {
+  uint8_t* p = memory_->membase();
+
+  while (write_ptr_index_ == 0xBAADF00D ||
+         read_ptr_index_ == write_ptr_index_) {
+    // Check if the pointer has moved.
+    // We wait a short bit here to yield time. Since we are also running the
+    // main window display we don't want to pause too long, though.
+    // YieldProcessor();
+    const int wait_time_ms = 1;
+    if (WaitForSingleObject(write_ptr_index_event_,
+                            wait_time_ms) == WAIT_TIMEOUT) {
+      return;
+    }
+  }
+
+  // Bring local so we don't have to worry about them changing out from under
+  // us.
+  uint32_t write_ptr_index = write_ptr_index_;
+  uint32_t write_ptr_max_index = write_ptr_max_index_;
+  if (read_ptr_index_ == write_ptr_index) {
+    return;
+  }
+
+  // Process the new commands.
+  XETRACECP("Command processor thread work");
+
+  // Execute. Note that we handle wraparound transparently.
+  ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index);
+  read_ptr_index_ = write_ptr_index;
+
+  // TODO(benvanik): use read_ptr_update_freq_ and only issue after moving
+  //     that many indices.
+  if (read_ptr_writeback_ptr_) {
+    XESETUINT32BE(p + read_ptr_writeback_ptr_, read_ptr_index_);
+  }
+}
+
+void CommandProcessor::ExecutePrimaryBuffer(
+    uint32_t start_index, uint32_t end_index) {
+  SCOPE_profile_cpu_f("gpu");
+
+  // Adjust pointer base.
+  uint32_t ptr = primary_buffer_ptr_ + start_index * 4;
+  ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF);
+  uint32_t end_ptr = primary_buffer_ptr_ + end_index * 4;
+  end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF);
+
+  XETRACECP("[%.8X] ExecutePrimaryBuffer(%dw -> %dw)",
+            ptr, start_index, end_index);
+
+  // Execute commands!
+  PacketArgs args;
+  args.ptr          = ptr;
+  args.base_ptr     = primary_buffer_ptr_;
+  args.max_address  = primary_buffer_ptr_ + primary_buffer_size_;
+  args.ptr_mask     = (primary_buffer_size_ / 4) - 1;
+  uint32_t n = 0;
+  while (args.ptr != end_ptr) {
+    n += ExecutePacket(args);
+    XEASSERT(args.ptr < args.max_address);
+  }
+  if (end_index > start_index) {
+    XEASSERT(n == (end_index - start_index));
+  }
+
+  XETRACECP("           ExecutePrimaryBuffer End");
+}
+
+void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) {
+  XETRACECP("[%.8X] ExecuteIndirectBuffer(%dw)", ptr, length);
+
+  // Execute commands!
+  PacketArgs args;
+  args.ptr          = ptr;
+  args.base_ptr     = ptr;
+  args.max_address  = ptr + length * 4;
+  args.ptr_mask     = 0;
+  for (uint32_t n = 0; n < length;) {
+    n += ExecutePacket(args);
+    XEASSERT(n <= length);
+  }
+
+  XETRACECP("           ExecuteIndirectBuffer End");
+}
+
+#define LOG_DATA(count) \
+  for (uint32_t __m = 0; __m < count; __m++) { \
+    XETRACECP("[%.8X]   %.8X", \
+              packet_ptr + (1 + __m) * 4, \
+              XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \
+  }
+
+void CommandProcessor::AdvancePtr(PacketArgs& args, uint32_t n) {
+  args.ptr = args.ptr + n * 4;
+  if (args.ptr_mask) {
+    args.ptr =
+        args.base_ptr + (((args.ptr - args.base_ptr) / 4) & args.ptr_mask) * 4;
+  }
+}
+#define ADVANCE_PTR(n) AdvancePtr(args, n)
+#define PEEK_PTR() \
+    XEGETUINT32BE(p + args.ptr)
+#define READ_PTR() \
+    XEGETUINT32BE(p + args.ptr); ADVANCE_PTR(1);
+
+uint32_t CommandProcessor::ExecutePacket(PacketArgs& args) {
+  uint8_t* p = memory_->membase();
+  RegisterFile* regs = driver_->register_file();
+
+  uint32_t packet_ptr = args.ptr;
+  const uint8_t* packet_base = p + packet_ptr;
+  const uint32_t packet = PEEK_PTR();
+  ADVANCE_PTR(1);
+  const uint32_t packet_type = packet >> 30;
+  if (packet == 0) {
+    XETRACECP("[%.8X] Packet(%.8X): 0?",
+              packet_ptr, packet);
+    return 1;
+  }
+
+  switch (packet_type) {
+  case 0x00:
+    {
+      // Type-0 packet.
+      // Write count registers in sequence to the registers starting at
+      // (base_index << 2).
+      XETRACECP("[%.8X] Packet(%.8X): set registers:",
+                packet_ptr, packet);
+      uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
+      uint32_t base_index = (packet & 0x7FFF);
+      uint32_t write_one_reg = (packet >> 15) & 0x1;
+      for (uint32_t m = 0; m < count; m++) {
+        uint32_t reg_data = PEEK_PTR();
+        uint32_t target_index = write_one_reg ? base_index : base_index + m;
+        const char* reg_name = regs->GetRegisterName(target_index);
+        XETRACECP("[%.8X]   %.8X -> %.4X %s",
+                  args.ptr,
+                  reg_data, target_index, reg_name ? reg_name : "");
+        ADVANCE_PTR(1);
+        WriteRegister(packet_ptr, target_index, reg_data);
+      }
+      return 1 + count;
+    }
+    break;
+  case 0x01:
+    {
+      // Type-1 packet.
+      // Contains two registers of data. Type-0 should be more common.
+      XETRACECP("[%.8X] Packet(%.8X): set registers:",
+                packet_ptr, packet);
+      uint32_t reg_index_1 = packet & 0x7FF;
+      uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
+      uint32_t reg_ptr_1 = args.ptr;
+      uint32_t reg_data_1 = READ_PTR();
+      uint32_t reg_ptr_2 = args.ptr;
+      uint32_t reg_data_2 = READ_PTR();
+      const char* reg_name_1 = regs->GetRegisterName(reg_index_1);
+      const char* reg_name_2 = regs->GetRegisterName(reg_index_2);
+      XETRACECP("[%.8X]   %.8X -> %.4X %s",
+                reg_ptr_1,
+                reg_data_1, reg_index_1, reg_name_1 ? reg_name_1 : "");
+      XETRACECP("[%.8X]   %.8X -> %.4X %s",
+                reg_ptr_2,
+                reg_data_2, reg_index_2, reg_name_2 ? reg_name_2 : "");
+      WriteRegister(packet_ptr, reg_index_1, reg_data_1);
+      WriteRegister(packet_ptr, reg_index_2, reg_data_2);
+      return 1 + 2;
+    }
+    break;
+  case 0x02:
+    // Type-2 packet.
+    // No-op. Do nothing.
+    XETRACECP("[%.8X] Packet(%.8X): padding",
+              packet_ptr, packet);
+    return 1;
+  case 0x03:
+    {
+      // Type-3 packet.
+      uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
+      uint32_t opcode = (packet >> 8) & 0x7F;
+      // & 1 == predicate, maybe?
+
+      switch (opcode) {
+      case PM4_ME_INIT:
+        // initialize CP's micro-engine
+        XETRACECP("[%.8X] Packet(%.8X): PM4_ME_INIT",
+                  packet_ptr, packet);
+        LOG_DATA(count);
+        ADVANCE_PTR(count);
+        break;
+
+      case PM4_NOP:
+        // skip N 32-bit words to get to the next packet
+        // No-op, ignore some data.
+        XETRACECP("[%.8X] Packet(%.8X): PM4_NOP",
+                  packet_ptr, packet);
+        LOG_DATA(count);
+        ADVANCE_PTR(count);
+        break;
+
+      case PM4_INTERRUPT:
+        // generate interrupt from the command stream
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_INTERRUPT",
+                    packet_ptr, packet);
+          LOG_DATA(count);
+          uint32_t cpu_mask = READ_PTR();
+          for (int n = 0; n < 6; n++) {
+            if (cpu_mask & (1 << n)) {
+              graphics_system_->DispatchInterruptCallback(1, n);
+            }
+          }
+        }
+        break;
+
+      case PM4_INDIRECT_BUFFER:
+        // indirect buffer dispatch
+        {
+          uint32_t list_ptr = READ_PTR();
+          uint32_t list_length = READ_PTR();
+          XETRACECP("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
+                    packet_ptr, packet, list_ptr, list_length);
+          ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length);
+        }
+        break;
+
+      case PM4_WAIT_REG_MEM:
+        // wait until a register or memory location is a specific value
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM",
+                    packet_ptr, packet);
+          LOG_DATA(count);
+          uint32_t wait_info = READ_PTR();
+          uint32_t poll_reg_addr = READ_PTR();
+          uint32_t ref = READ_PTR();
+          uint32_t mask = READ_PTR();
+          uint32_t wait = READ_PTR();
+          bool matched = false;
+          do {
+            uint32_t value;
+            if (wait_info & 0x10) {
+              // Memory.
+              XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
+              poll_reg_addr &= ~0x3;
+              value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr));
+              value = GpuSwap(value, endianness);
+            } else {
+              // Register.
+              XEASSERT(poll_reg_addr < RegisterFile::kRegisterCount);
+
+              if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) {
+                // Waiting for coherency. We should have all the info we need
+                // now (base+size+mode), so kick it off.
+                MakeCoherent();
+              }
+
+              value = regs->values[poll_reg_addr].u32;
+            }
+            switch (wait_info & 0x7) {
+            case 0x0: // Never.
+              matched = false;
+              break;
+            case 0x1: // Less than reference.
+              matched = (value & mask) < ref;
+              break;
+            case 0x2: // Less than or equal to reference.
+              matched = (value & mask) <= ref;
+              break;
+            case 0x3: // Equal to reference.
+              matched = (value & mask) == ref;
+              break;
+            case 0x4: // Not equal to reference.
+              matched = (value & mask) != ref;
+              break;
+            case 0x5: // Greater than or equal to reference.
+              matched = (value & mask) >= ref;
+              break;
+            case 0x6: // Greater than reference.
+              matched = (value & mask) > ref;
+              break;
+            case 0x7: // Always
+              matched = true;
+              break;
+            }
+            if (!matched) {
+              // Wait.
+              if (wait >= 0x100) {
+                Sleep(wait / 0x100);
+              } else {
+                SwitchToThread();
+              }
+            }
+          } while (!matched);
+        }
+        break;
+
+      case PM4_REG_RMW:
+        // register read/modify/write
+        // ? (used during shader upload and edram setup)
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_REG_RMW",
+                    packet_ptr, packet);
+          LOG_DATA(count);
+          uint32_t rmw_info = READ_PTR();
+          uint32_t and_mask = READ_PTR();
+          uint32_t or_mask = READ_PTR();
+          uint32_t value = regs->values[rmw_info & 0x1FFF].u32;
+          if ((rmw_info >> 30) & 0x1) {
+            // | reg
+            value |= regs->values[or_mask & 0x1FFF].u32;
+          } else {
+            // | imm
+            value |= or_mask;
+          }
+          if ((rmw_info >> 31) & 0x1) {
+            // & reg
+            value &= regs->values[and_mask & 0x1FFF].u32;
+          } else {
+            // & imm
+            value &= and_mask;
+          }
+          WriteRegister(packet_ptr, rmw_info & 0x1FFF, value);
+        }
+        break;
+
+      case PM4_COND_WRITE:
+        // conditional write to memory or register
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_COND_WRITE",
+                    packet_ptr, packet);
+          LOG_DATA(count);
+          uint32_t wait_info = READ_PTR();
+          uint32_t poll_reg_addr = READ_PTR();
+          uint32_t ref = READ_PTR();
+          uint32_t mask = READ_PTR();
+          uint32_t write_reg_addr = READ_PTR();
+          uint32_t write_data = READ_PTR();
+          uint32_t value;
+          if (wait_info & 0x10) {
+            // Memory.
+            XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
+            poll_reg_addr &= ~0x3;
+            value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr));
+            value = GpuSwap(value, endianness);
+          } else {
+            // Register.
+            XEASSERT(poll_reg_addr < RegisterFile::kRegisterCount);
+            value = regs->values[poll_reg_addr].u32;
+          }
+          bool matched = false;
+          switch (wait_info & 0x7) {
+          case 0x0: // Never.
+            matched = false;
+            break;
+          case 0x1: // Less than reference.
+            matched = (value & mask) < ref;
+            break;
+          case 0x2: // Less than or equal to reference.
+            matched = (value & mask) <= ref;
+            break;
+          case 0x3: // Equal to reference.
+            matched = (value & mask) == ref;
+            break;
+          case 0x4: // Not equal to reference.
+            matched = (value & mask) != ref;
+            break;
+          case 0x5: // Greater than or equal to reference.
+            matched = (value & mask) >= ref;
+            break;
+          case 0x6: // Greater than reference.
+            matched = (value & mask) > ref;
+            break;
+          case 0x7: // Always
+            matched = true;
+            break;
+          }
+          if (matched) {
+            // Write.
+            if (wait_info & 0x100) {
+              // Memory.
+              XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(write_reg_addr & 0x3);
+              write_reg_addr &= ~0x3;
+              write_data = GpuSwap(write_data, endianness);
+              XESETUINT32LE(p + GpuToCpu(packet_ptr, write_reg_addr),
+                            write_data);
+            } else {
+              // Register.
+              WriteRegister(packet_ptr, write_reg_addr, write_data);
+            }
+          }
+        }
+        break;
+
+      case PM4_EVENT_WRITE:
+        // generate an event that creates a write to memory when completed
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)",
+                    packet_ptr, packet);
+          LOG_DATA(count);
+          uint32_t initiator = READ_PTR();
+          if (count == 1) {
+            // Just an event flag? Where does this write?
+          } else {
+            // Write to an address.
+            XEASSERTALWAYS();
+            ADVANCE_PTR(count - 1);
+          }
+        }
+        break;
+      case PM4_EVENT_WRITE_SHD:
+        // generate a VS|PS_done event
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD",
+                    packet_ptr, packet);
+          LOG_DATA(count);
+          uint32_t initiator = READ_PTR();
+          uint32_t address = READ_PTR();
+          uint32_t value = READ_PTR();
+          // Writeback initiator.
+          WriteRegister(packet_ptr, XE_GPU_REG_VGT_EVENT_INITIATOR,
+                        initiator & 0x1F);
+          uint32_t data_value;
+          if ((initiator >> 31) & 0x1) {
+            // Write counter (GPU vblank counter?).
+            data_value = counter_;
+          } else {
+            // Write value.
+            data_value = value;
+          }
+          XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(address & 0x3);
+          address &= ~0x3;
+          data_value = GpuSwap(data_value, endianness);
+          XESETUINT32LE(p + GpuToCpu(address), data_value);
+        }
+        break;
+
+      case PM4_DRAW_INDX:
+        // initiate fetch of index buffer and draw
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX",
+                    packet_ptr, packet);
+          LOG_DATA(count);
+          // d0 = viz query info
+          uint32_t d0 = READ_PTR();
+          uint32_t d1 = READ_PTR();
+          uint32_t index_count = d1 >> 16;
+          uint32_t prim_type = d1 & 0x3F;
+          uint32_t src_sel = (d1 >> 6) & 0x3;
+          if (!driver_->PrepareDraw(draw_command_)) {
+            draw_command_.prim_type = (XE_GPU_PRIMITIVE_TYPE)prim_type;
+            draw_command_.start_index = 0;
+            draw_command_.index_count = index_count;
+            draw_command_.base_vertex = 0;
+            if (src_sel == 0x0) {
+              // Indexed draw.
+              // TODO(benvanik): detect subregions of larger index buffers!
+              uint32_t index_base = READ_PTR();
+              uint32_t index_size = READ_PTR();
+              uint32_t endianness = index_size >> 29;
+              index_size &= 0x00FFFFFF;
+              bool index_32bit = (d1 >> 11) & 0x1;
+              index_size *= index_32bit ? 4 : 2;
+              driver_->PrepareDrawIndexBuffer(
+                  draw_command_,
+                  index_base, index_size,
+                  (XE_GPU_ENDIAN)endianness,
+                  index_32bit ? INDEX_FORMAT_32BIT : INDEX_FORMAT_16BIT);
+            } else if (src_sel == 0x2) {
+              // Auto draw.
+              draw_command_.index_buffer = nullptr;
+            } else {
+              // Unknown source select.
+              XEASSERTALWAYS();
+            }
+            driver_->Draw(draw_command_);
+          } else {
+            if (src_sel == 0x0) {
+              ADVANCE_PTR(2);  // skip
+            }
+          }
+        }
+        break;
+      case PM4_DRAW_INDX_2:
+        // draw using supplied indices in packet
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2",
+                    packet_ptr, packet);
+          LOG_DATA(count);
+          uint32_t d0 = READ_PTR();
+          uint32_t index_count = d0 >> 16;
+          uint32_t prim_type = d0 & 0x3F;
+          uint32_t src_sel = (d0 >> 6) & 0x3;
+          XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex'
+          if (!driver_->PrepareDraw(draw_command_)) {
+            draw_command_.prim_type = (XE_GPU_PRIMITIVE_TYPE)prim_type;
+            draw_command_.start_index = 0;
+            draw_command_.index_count = index_count;
+            draw_command_.base_vertex = 0;
+            draw_command_.index_buffer = nullptr;
+            driver_->Draw(draw_command_);
+          }
+        }
+        break;
+
+      case PM4_SET_CONSTANT:
+        // load constant into chip and to memory
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_SET_CONSTANT",
+                    packet_ptr, packet);
+          // PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg)))
+          //                                     reg - 0x2000
+          uint32_t offset_type = READ_PTR();
+          uint32_t index = offset_type & 0x7FF;
+          uint32_t type = (offset_type >> 16) & 0xFF;
+          switch (type) {
+          case 0x4: // REGISTER
+            index += 0x2000; // registers
+            for (uint32_t n = 0; n < count - 1; n++, index++) {
+              uint32_t data = READ_PTR();
+              const char* reg_name = regs->GetRegisterName(index);
+              XETRACECP("[%.8X]   %.8X -> %.4X %s",
+                        packet_ptr + (1 + n) * 4,
+                        data, index, reg_name ? reg_name : "");
+              WriteRegister(packet_ptr, index, data);
+            }
+            break;
+          default:
+            XEASSERTALWAYS();
+            break;
+          }
+        }
+        break;
+      case PM4_LOAD_ALU_CONSTANT:
+        // load constants from memory
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_LOAD_ALU_CONSTANT",
+                    packet_ptr, packet);
+          uint32_t address = READ_PTR();
+          address &= 0x3FFFFFFF;
+          uint32_t offset_type = READ_PTR();
+          uint32_t index = offset_type & 0x7FF;
+          uint32_t size = READ_PTR();
+          size &= 0xFFF;
+          index += 0x4000; // alu constants
+          for (uint32_t n = 0; n < size; n++, index++) {
+            uint32_t data = XEGETUINT32BE(
+                p + GpuToCpu(packet_ptr, address + n * 4));
+            const char* reg_name = regs->GetRegisterName(index);
+            XETRACECP("[%.8X]   %.8X -> %.4X %s",
+                      packet_ptr,
+                      data, index, reg_name ? reg_name : "");
+            WriteRegister(packet_ptr, index, data);
+          }
+        }
+        break;
+
+      case PM4_IM_LOAD:
+        // load sequencer instruction memory (pointer-based)
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD",
+                    packet_ptr, packet);
+          LOG_DATA(count);
+          uint32_t addr_type = READ_PTR();
+          uint32_t type = addr_type & 0x3;
+          uint32_t addr = addr_type & ~0x3;
+          uint32_t start_size = READ_PTR();
+          uint32_t start = start_size >> 16;
+          uint32_t size = start_size & 0xFFFF; // dwords
+          XEASSERT(start == 0);
+          driver_->LoadShader((XE_GPU_SHADER_TYPE)type,
+                              GpuToCpu(packet_ptr, addr), size * 4, start);
+        }
+        break;
+      case PM4_IM_LOAD_IMMEDIATE:
+        // load sequencer instruction memory (code embedded in packet)
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE",
+                    packet_ptr, packet);
+          LOG_DATA(count);
+          uint32_t type = READ_PTR();
+          uint32_t start_size = READ_PTR();
+          uint32_t start = start_size >> 16;
+          uint32_t size = start_size & 0xFFFF; // dwords
+          XEASSERT(start == 0);
+          // TODO(benvanik): figure out if this could wrap.
+          XEASSERT(args.ptr + size * 4 < args.max_address);
+          driver_->LoadShader((XE_GPU_SHADER_TYPE)type,
+                              args.ptr, size * 4, start);
+          ADVANCE_PTR(size);
+        }
+        break;
+
+      case PM4_INVALIDATE_STATE:
+        // selective invalidation of state pointers
+        {
+          XETRACECP("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE",
+                    packet_ptr, packet);
+          LOG_DATA(count);
+          uint32_t mask = READ_PTR();
+          //driver_->InvalidateState(mask);
+        }
+        break;
+
+      case PM4_SET_BIN_MASK_LO:
+        {
+          uint32_t value = READ_PTR();
+          XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X",
+                    packet_ptr, packet, value);
+        }
+        break;
+      case PM4_SET_BIN_MASK_HI:
+        {
+          uint32_t value = READ_PTR();
+          XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X",
+                    packet_ptr, packet, value);
+        }
+        break;
+      case PM4_SET_BIN_SELECT_LO:
+        {
+          uint32_t value = READ_PTR();
+          XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X",
+                    packet_ptr, packet, value);
+        }
+        break;
+      case PM4_SET_BIN_SELECT_HI:
+        {
+          uint32_t value = READ_PTR();
+          XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X",
+                    packet_ptr, packet, value);
+        }
+        break;
+
+      // Ignored packets - useful if breaking on the default handler below.
+      case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000
+        XETRACECP("[%.8X] Packet(%.8X): unknown!",
+                  packet_ptr, packet);
+        LOG_DATA(count);
+        ADVANCE_PTR(count);
+        break;
+
+      default:
+        XETRACECP("[%.8X] Packet(%.8X): unknown!",
+                  packet_ptr, packet);
+        LOG_DATA(count);
+        ADVANCE_PTR(count);
+        break;
+      }
+
+      return 1 + count;
+    }
+    break;
+  }
+
+  return 0;
+}
+
+void CommandProcessor::WriteRegister(
+    uint32_t packet_ptr, uint32_t index, uint32_t value) {
+  RegisterFile* regs = driver_->register_file();
+  XEASSERT(index < RegisterFile::kRegisterCount);
+  regs->values[index].u32 = value;
+
+  // If this is a COHER register, set the dirty flag.
+  // This will block the command processor the next time it WAIT_MEM_REGs and
+  // allow us to synchronize the memory.
+  if (index == XE_GPU_REG_COHER_STATUS_HOST) {
+    regs->values[index].u32 |= 0x80000000ul;
+  }
+
+  // Scratch register writeback.
+  if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
+    uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
+    if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) {
+      // Enabled - write to address.
+      uint8_t* p = memory_->membase();
+      uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32;
+      uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
+      XESETUINT32BE(p + GpuToCpu(primary_buffer_ptr_, mem_addr), value);
+    }
+  }
+}
+
+void CommandProcessor::MakeCoherent() {
+  RegisterFile* regs = driver_->register_file();
+  auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32;
+  auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32;
+  auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32;
+
+  // Status host often has 0x01000000 or 0x03000000.
+  // This is likely toggling VC (vertex cache) or TC (texture cache).
+  // Or, it also has a direction in here maybe - there is probably
+  // some way to check for dest coherency (what all the COHER_DEST_BASE_*
+  // registers are for).
+
+  // TODO(benvanik): notify resource cache of base->size and type.
+  XETRACECP("Make %.8X -> %.8X (%db) coherent",
+            base_host, base_host + size_host, size_host);
+  driver_->resource_cache()->SyncRange(base_host, size_host);
+
+  // Mark coherent.
+  status_host &= ~0x80000000ul;
+  regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32 = status_host;
+}
diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h
index 65d5dfc71..ba081aefb 100644
--- a/src/xenia/gpu/command_processor.h
+++ b/src/xenia/gpu/command_processor.h
@@ -11,15 +11,70 @@
 #define XENIA_GPU_COMMAND_PROCESSOR_H_
 
 #include <xenia/core.h>
+#include <xenia/gpu/draw_command.h>
+#include <xenia/gpu/register_file.h>
 #include <xenia/gpu/xenos/xenos.h>
 
 
 namespace xe {
 namespace gpu {
 
+class GraphicsDriver;
+class GraphicsSystem;
+
 
 class CommandProcessor {
 public:
+  CommandProcessor(GraphicsSystem* graphics_system, Memory* memory);
+  virtual ~CommandProcessor();
+
+  Memory* memory() const { return memory_; }
+
+  uint64_t QueryTime();
+  uint32_t counter() const { return counter_; }
+  void increment_counter() { counter_++; }
+
+  void Initialize(GraphicsDriver* driver, uint32_t ptr, uint32_t page_count);
+  void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
+
+  void UpdateWritePointer(uint32_t value);
+
+  void Pump();
+
+private:
+  typedef struct {
+    uint32_t ptr;
+    uint32_t base_ptr;
+    uint32_t max_address;
+    uint32_t ptr_mask;
+  } PacketArgs;
+
+  void AdvancePtr(PacketArgs& args, uint32_t n);
+  void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index);
+  void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length);
+  uint32_t ExecutePacket(PacketArgs& args);
+  void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value);
+  void MakeCoherent();
+
+  Memory*           memory_;
+  GraphicsSystem*   graphics_system_;
+  GraphicsDriver*   driver_;
+
+  uint64_t          time_base_;
+  uint32_t          counter_;
+
+  uint32_t          primary_buffer_ptr_;
+  uint32_t          primary_buffer_size_;
+
+  uint32_t          read_ptr_index_;
+  uint32_t          read_ptr_update_freq_;
+  uint32_t          read_ptr_writeback_ptr_;
+
+  HANDLE            write_ptr_index_event_;
+  volatile uint32_t write_ptr_index_;
+  volatile uint32_t write_ptr_max_index_;
+
+  DrawCommand       draw_command_;
 };
 
 
diff --git a/src/xenia/gpu/d3d11/d3d11_buffer.cc b/src/xenia/gpu/d3d11/d3d11_buffer.cc
deleted file mode 100644
index 84c0d901e..000000000
--- a/src/xenia/gpu/d3d11/d3d11_buffer.cc
+++ /dev/null
@@ -1,150 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/d3d11/d3d11_buffer.h>
-
-#include <xenia/gpu/gpu-private.h>
-#include <xenia/gpu/d3d11/d3d11_buffer_cache.h>
-
-
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::d3d11;
-using namespace xe::gpu::xenos;
-
-
-D3D11IndexBuffer::D3D11IndexBuffer(
-    D3D11BufferCache* buffer_cache,
-    const IndexBufferInfo& info,
-    const uint8_t* src_ptr, size_t length)
-    : IndexBuffer(info, src_ptr, length),
-      buffer_cache_(buffer_cache),
-      handle_(nullptr) {
-}
-
-D3D11IndexBuffer::~D3D11IndexBuffer() {
-  XESAFERELEASE(handle_);
-}
-
-bool D3D11IndexBuffer::FetchNew(uint64_t hash) {
-  hash_ = hash;
-
-  D3D11_BUFFER_DESC buffer_desc;
-  xe_zero_struct(&buffer_desc, sizeof(buffer_desc));
-  buffer_desc.ByteWidth       = info_.index_size;
-  buffer_desc.Usage           = D3D11_USAGE_DYNAMIC;
-  buffer_desc.BindFlags       = D3D11_BIND_INDEX_BUFFER;
-  buffer_desc.CPUAccessFlags  = D3D11_CPU_ACCESS_WRITE;
-  HRESULT hr = buffer_cache_->device()->CreateBuffer(&buffer_desc, NULL, &handle_);
-  if (FAILED(hr)) {
-    XELOGW("D3D11: failed to create index buffer");
-    return false;
-  }
-
-  return FetchDirty(hash);
-}
-
-bool D3D11IndexBuffer::FetchDirty(uint64_t hash) {
-  hash_ = hash;
-  
-  // All that's done so far:
-  XEASSERT(info_.endianness == 0x2);
-
-  D3D11_MAPPED_SUBRESOURCE res;
-  HRESULT hr = buffer_cache_->context()->Map(
-      handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
-  if (FAILED(hr)) {
-    XELOGE("D3D11: unable to map index buffer");
-    return false;
-  }
-
-  if (info_.index_32bit) {
-    const uint32_t* src = reinterpret_cast<const uint32_t*>(src_);
-    uint32_t* dest = reinterpret_cast<uint32_t*>(res.pData);
-    for (uint32_t n = 0; n < info_.index_count; n++) {
-      uint32_t d = { XESWAP32(src[n]) };
-      dest[n] = d;
-    }
-  } else {
-    const uint16_t* src = reinterpret_cast<const uint16_t*>(src_);
-    uint16_t* dest = reinterpret_cast<uint16_t*>(res.pData);
-    for (uint32_t n = 0; n < info_.index_count; n++) {
-      uint16_t d = XESWAP16(src[n]);
-      dest[n] = d;
-    }
-  }
-  buffer_cache_->context()->Unmap(handle_, 0);
-
-  return true;
-}
-
-
-D3D11VertexBuffer::D3D11VertexBuffer(
-    D3D11BufferCache* buffer_cache,
-    const VertexBufferInfo& info,
-    const uint8_t* src_ptr, size_t length)
-    : VertexBuffer(info, src_ptr, length),
-      buffer_cache_(buffer_cache),
-      handle_(nullptr) {
-}
-
-D3D11VertexBuffer::~D3D11VertexBuffer() {
-  XESAFERELEASE(handle_);
-}
-
-bool D3D11VertexBuffer::FetchNew(uint64_t hash) {
-  hash_ = hash;
-
-  D3D11_BUFFER_DESC buffer_desc;
-  xe_zero_struct(&buffer_desc, sizeof(buffer_desc));
-  buffer_desc.ByteWidth       = static_cast<UINT>(length_);
-  buffer_desc.Usage           = D3D11_USAGE_DYNAMIC;
-  buffer_desc.BindFlags       = D3D11_BIND_VERTEX_BUFFER;
-  buffer_desc.CPUAccessFlags  = D3D11_CPU_ACCESS_WRITE;
-  HRESULT hr = buffer_cache_->device()->CreateBuffer(&buffer_desc, NULL, &handle_);
-  if (FAILED(hr)) {
-    XELOGW("D3D11: failed to create index buffer");
-    return false;
-  }
-
-  return FetchDirty(hash);
-}
-
-bool D3D11VertexBuffer::FetchDirty(uint64_t hash) {
-  hash_ = hash;
-
-  D3D11_MAPPED_SUBRESOURCE res;
-  HRESULT hr = buffer_cache_->context()->Map(
-      handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
-  if (FAILED(hr)) {
-    XELOGE("D3D11: unable to map vertex buffer");
-    return false;
-  }
-  uint8_t* dest = reinterpret_cast<uint8_t*>(res.pData);
-
-  // TODO(benvanik): rewrite to be faster/special case common/etc
-  uint32_t stride = info_.layout.stride_words;
-  size_t count = (length_ / 4) / stride;
-  for (size_t n = 0; n < info_.layout.element_count; n++) {
-    const auto& el = info_.layout.elements[n];
-    const uint32_t* src_ptr = (const uint32_t*)(src_ + el.offset_words * 4);
-    uint32_t* dest_ptr = (uint32_t*)(dest + el.offset_words * 4);
-    uint32_t o = 0;
-    for (uint32_t i = 0; i < count; i++) {
-      for (uint32_t j = 0; j < el.size_words; j++) {
-        dest_ptr[o + j] = XESWAP32(src_ptr[o + j]);
-      }
-      o += stride;
-    }
-  }
-
-
-  buffer_cache_->context()->Unmap(handle_, 0);
-  return true;
-}
diff --git a/src/xenia/gpu/d3d11/d3d11_buffer.h b/src/xenia/gpu/d3d11/d3d11_buffer.h
deleted file mode 100644
index 924fb3da4..000000000
--- a/src/xenia/gpu/d3d11/d3d11_buffer.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_D3D11_D3D11_BUFFER_H_
-#define XENIA_GPU_D3D11_D3D11_BUFFER_H_
-
-#include <xenia/core.h>
-
-#include <xenia/gpu/buffer.h>
-#include <xenia/gpu/xenos/xenos.h>
-
-#include <d3d11.h>
-
-
-namespace xe {
-namespace gpu {
-namespace d3d11 {
-
-class D3D11BufferCache;
-
-
-class D3D11IndexBuffer : public IndexBuffer {
-public:
-  D3D11IndexBuffer(D3D11BufferCache* buffer_cache,
-                   const IndexBufferInfo& info,
-                   const uint8_t* src_ptr, size_t length);
-  virtual ~D3D11IndexBuffer();
-
-  ID3D11Buffer* handle() const { return handle_; }
-
-  bool FetchNew(uint64_t hash) override;
-  bool FetchDirty(uint64_t hash) override;
-
-private:
-  D3D11BufferCache* buffer_cache_;
-  ID3D11Buffer* handle_;
-};
-
-
-class D3D11VertexBuffer : public VertexBuffer {
-public:
-  D3D11VertexBuffer(D3D11BufferCache* buffer_cache,
-                    const VertexBufferInfo& info,
-                    const uint8_t* src_ptr, size_t length);
-  virtual ~D3D11VertexBuffer();
-
-  ID3D11Buffer* handle() const { return handle_; }
-
-  bool FetchNew(uint64_t hash) override;
-  bool FetchDirty(uint64_t hash) override;
-
-private:
-  D3D11BufferCache* buffer_cache_;
-  ID3D11Buffer* handle_;
-};
-
-
-}  // namespace d3d11
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_D3D11_D3D11_BUFFER_H_
diff --git a/src/xenia/gpu/d3d11/d3d11_buffer_cache.cc b/src/xenia/gpu/d3d11/d3d11_buffer_cache.cc
deleted file mode 100644
index 48eb8fbf8..000000000
--- a/src/xenia/gpu/d3d11/d3d11_buffer_cache.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/d3d11/d3d11_buffer_cache.h>
-
-#include <xenia/gpu/gpu-private.h>
-#include <xenia/gpu/d3d11/d3d11_buffer.h>
-
-
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::d3d11;
-using namespace xe::gpu::xenos;
-
-
-D3D11BufferCache::D3D11BufferCache(ID3D11DeviceContext* context,
-                                   ID3D11Device* device)
-    : context_(context), device_(device) {
-  context->AddRef();
-  device_->AddRef();
-}
-
-D3D11BufferCache::~D3D11BufferCache() {
-  XESAFERELEASE(device_);
-  XESAFERELEASE(context_);
-}
-
-IndexBuffer* D3D11BufferCache::CreateIndexBuffer(
-    const IndexBufferInfo& info,
-    const uint8_t* src_ptr, size_t length) {
-  return new D3D11IndexBuffer(this, info, src_ptr, length);
-}
-
-VertexBuffer* D3D11BufferCache::CreateVertexBuffer(
-    const VertexBufferInfo& info,
-    const uint8_t* src_ptr, size_t length) {
-  return new D3D11VertexBuffer(this, info, src_ptr, length);
-}
diff --git a/src/xenia/gpu/d3d11/d3d11_buffer_cache.h b/src/xenia/gpu/d3d11/d3d11_buffer_cache.h
deleted file mode 100644
index 284536ab7..000000000
--- a/src/xenia/gpu/d3d11/d3d11_buffer_cache.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_D3D11_D3D11_BUFFER_CACHE_H_
-#define XENIA_GPU_D3D11_D3D11_BUFFER_CACHE_H_
-
-#include <xenia/core.h>
-
-#include <xenia/gpu/buffer_cache.h>
-#include <xenia/gpu/xenos/xenos.h>
-
-#include <d3d11.h>
-
-
-namespace xe {
-namespace gpu {
-namespace d3d11 {
-
-
-class D3D11BufferCache : public BufferCache {
-public:
-  D3D11BufferCache(ID3D11DeviceContext* context, ID3D11Device* device);
-  virtual ~D3D11BufferCache();
-
-  ID3D11DeviceContext* context() const { return context_; }
-  ID3D11Device* device() const { return device_; }
-
-protected:
-  IndexBuffer* CreateIndexBuffer(
-      const IndexBufferInfo& info,
-      const uint8_t* src_ptr, size_t length) override;
-  VertexBuffer* CreateVertexBuffer(
-      const VertexBufferInfo& info,
-      const uint8_t* src_ptr, size_t length) override;
-
-protected:
-  ID3D11DeviceContext* context_;
-  ID3D11Device* device_;
-};
-
-
-}  // namespace d3d11
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_D3D11_D3D11_BUFFER_CACHE_H_
diff --git a/src/xenia/gpu/d3d11/d3d11_buffer_resource.cc b/src/xenia/gpu/d3d11/d3d11_buffer_resource.cc
new file mode 100644
index 000000000..8f03cfe58
--- /dev/null
+++ b/src/xenia/gpu/d3d11/d3d11_buffer_resource.cc
@@ -0,0 +1,149 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include <xenia/gpu/d3d11/d3d11_buffer_resource.h>
+
+#include <xenia/gpu/gpu-private.h>
+#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
+
+
+using namespace xe;
+using namespace xe::gpu;
+using namespace xe::gpu::d3d11;
+using namespace xe::gpu::xenos;
+
+
+D3D11IndexBufferResource::D3D11IndexBufferResource(
+    D3D11ResourceCache* resource_cache,
+    const MemoryRange& memory_range,
+    const Info& info)
+    : IndexBufferResource(memory_range, info),
+      resource_cache_(resource_cache),
+      handle_(nullptr) {
+}
+
+D3D11IndexBufferResource::~D3D11IndexBufferResource() {
+  XESAFERELEASE(handle_);
+}
+
+int D3D11IndexBufferResource::CreateHandle() {
+  D3D11_BUFFER_DESC buffer_desc;
+  xe_zero_struct(&buffer_desc, sizeof(buffer_desc));
+  buffer_desc.ByteWidth       = static_cast<UINT>(memory_range_.length);
+  buffer_desc.Usage           = D3D11_USAGE_DYNAMIC;
+  buffer_desc.BindFlags       = D3D11_BIND_INDEX_BUFFER;
+  buffer_desc.CPUAccessFlags  = D3D11_CPU_ACCESS_WRITE;
+  HRESULT hr = resource_cache_->device()->CreateBuffer(
+      &buffer_desc, nullptr, &handle_);
+  if (FAILED(hr)) {
+    XELOGW("D3D11: failed to create index buffer");
+    return 1;
+  }
+  return 0;
+}
+
+int D3D11IndexBufferResource::InvalidateRegion(
+    const MemoryRange& memory_range) {
+  SCOPE_profile_cpu_f("gpu");
+
+  // All that's done so far:
+  XEASSERT(info_.endianness == 0x2);
+
+  D3D11_MAPPED_SUBRESOURCE res;
+  HRESULT hr = resource_cache_->context()->Map(
+      handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
+  if (FAILED(hr)) {
+    XELOGE("D3D11: unable to map index buffer");
+    return 1;
+  }
+
+  if (info_.format == INDEX_FORMAT_32BIT) {
+    uint32_t index_count = memory_range_.length / 4;
+    const uint32_t* src = reinterpret_cast<const uint32_t*>(
+        memory_range_.host_base);
+    uint32_t* dest = reinterpret_cast<uint32_t*>(res.pData);
+    for (uint32_t n = 0; n < index_count; n++) {
+      dest[n] = XESWAP32(src[n]);
+    }
+  } else {
+    uint32_t index_count = memory_range_.length / 2;
+    const uint16_t* src = reinterpret_cast<const uint16_t*>(
+        memory_range_.host_base);
+    uint16_t* dest = reinterpret_cast<uint16_t*>(res.pData);
+    for (uint32_t n = 0; n < index_count; n++) {
+      dest[n] = XESWAP16(src[n]);
+    }
+  }
+  resource_cache_->context()->Unmap(handle_, 0);
+
+  return 0;
+}
+
+D3D11VertexBufferResource::D3D11VertexBufferResource(
+    D3D11ResourceCache* resource_cache,
+    const MemoryRange& memory_range,
+    const Info& info)
+    : VertexBufferResource(memory_range, info),
+      resource_cache_(resource_cache),
+      handle_(nullptr) {
+}
+
+D3D11VertexBufferResource::~D3D11VertexBufferResource() {
+  XESAFERELEASE(handle_);
+}
+
+int D3D11VertexBufferResource::CreateHandle() {
+  D3D11_BUFFER_DESC buffer_desc;
+  xe_zero_struct(&buffer_desc, sizeof(buffer_desc));
+  buffer_desc.ByteWidth       = static_cast<UINT>(memory_range_.length);
+  buffer_desc.Usage           = D3D11_USAGE_DYNAMIC;
+  buffer_desc.BindFlags       = D3D11_BIND_VERTEX_BUFFER;
+  buffer_desc.CPUAccessFlags  = D3D11_CPU_ACCESS_WRITE;
+  HRESULT hr = resource_cache_->device()->CreateBuffer(
+      &buffer_desc, nullptr, &handle_);
+  if (FAILED(hr)) {
+    XELOGW("D3D11: failed to create vertex buffer");
+    return 1;
+  }
+  return 0;
+}
+
+int D3D11VertexBufferResource::InvalidateRegion(
+    const MemoryRange& memory_range) {
+  SCOPE_profile_cpu_f("gpu");
+
+  D3D11_MAPPED_SUBRESOURCE res;
+  HRESULT hr = resource_cache_->context()->Map(
+      handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
+  if (FAILED(hr)) {
+    XELOGE("D3D11: unable to map vertex buffer");
+    return 1;
+  }
+  uint8_t* dest = reinterpret_cast<uint8_t*>(res.pData);
+
+  // TODO(benvanik): rewrite to be faster/special case common/etc
+  uint32_t stride = info_.stride_words;
+  size_t count = (memory_range_.length / 4) / stride;
+  for (size_t n = 0; n < info_.element_count; n++) {
+    const auto& el = info_.elements[n];
+    const uint32_t* src_ptr = (const uint32_t*)(
+        memory_range_.host_base + el.offset_words * 4);
+    uint32_t* dest_ptr = (uint32_t*)(dest + el.offset_words * 4);
+    uint32_t o = 0;
+    for (uint32_t i = 0; i < count; i++) {
+      for (uint32_t j = 0; j < el.size_words; j++) {
+        dest_ptr[o + j] = XESWAP32(src_ptr[o + j]);
+      }
+      o += stride;
+    }
+  }
+
+  resource_cache_->context()->Unmap(handle_, 0);
+  return 0;
+}
diff --git a/src/xenia/gpu/d3d11/d3d11_buffer_resource.h b/src/xenia/gpu/d3d11/d3d11_buffer_resource.h
new file mode 100644
index 000000000..2e8071ae1
--- /dev/null
+++ b/src/xenia/gpu/d3d11/d3d11_buffer_resource.h
@@ -0,0 +1,69 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_D3D11_D3D11_BUFFER_RESOURCE_H_
+#define XENIA_GPU_D3D11_D3D11_BUFFER_RESOURCE_H_
+
+#include <xenia/gpu/buffer_resource.h>
+#include <xenia/gpu/xenos/xenos.h>
+
+#include <d3d11.h>
+
+
+namespace xe {
+namespace gpu {
+namespace d3d11 {
+
+class D3D11ResourceCache;
+
+
+class D3D11IndexBufferResource : public IndexBufferResource {
+public:
+  D3D11IndexBufferResource(D3D11ResourceCache* resource_cache,
+                           const MemoryRange& memory_range,
+                           const Info& info);
+  ~D3D11IndexBufferResource() override;
+
+  void* handle() const override { return handle_; }
+
+protected:
+  int CreateHandle() override;
+  int InvalidateRegion(const MemoryRange& memory_range) override;
+
+private:
+  D3D11ResourceCache* resource_cache_;
+  ID3D11Buffer* handle_;
+};
+
+
+class D3D11VertexBufferResource : public VertexBufferResource {
+public:
+  D3D11VertexBufferResource(D3D11ResourceCache* resource_cache,
+                            const MemoryRange& memory_range,
+                            const Info& info);
+  ~D3D11VertexBufferResource() override;
+
+  void* handle() const override { return handle_; }
+
+protected:
+  int CreateHandle() override;
+  int InvalidateRegion(const MemoryRange& memory_range) override;
+
+private:
+  D3D11ResourceCache* resource_cache_;
+  ID3D11Buffer* handle_;
+};
+
+
+}  // namespace d3d11
+}  // namespace gpu
+}  // namespace xe
+
+
+#endif  // XENIA_GPU_D3D11_D3D11_BUFFER_RESOURCE_H_
diff --git a/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc b/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc
index ba677f7a0..d8660cbfe 100644
--- a/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc
+++ b/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc
@@ -10,7 +10,8 @@
 #include <xenia/gpu/d3d11/d3d11_geometry_shader.h>
 
 #include <xenia/gpu/gpu-private.h>
-#include <xenia/gpu/d3d11/d3d11_shader.h>
+#include <xenia/gpu/d3d11/d3d11_shader_resource.h>
+#include <xenia/gpu/d3d11/d3d11_shader_translator.h>
 #include <xenia/gpu/xenos/ucode.h>
 
 #include <d3dcompiler.h>
@@ -22,8 +23,8 @@ using namespace xe::gpu::d3d11;
 using namespace xe::gpu::xenos;
 
 
-D3D11GeometryShader::D3D11GeometryShader(ID3D11Device* device, uint64_t hash) :
-    hash_(hash), handle_(NULL) {
+D3D11GeometryShader::D3D11GeometryShader(ID3D11Device* device)
+    : handle_(nullptr) {
   device_ = device;
   device_->AddRef();
 }
@@ -33,7 +34,7 @@ D3D11GeometryShader::~D3D11GeometryShader() {
   XESAFERELEASE(device_);
 }
 
-int D3D11GeometryShader::Prepare(D3D11VertexShader* vertex_shader) {
+int D3D11GeometryShader::Prepare(D3D11VertexShaderResource* vertex_shader) {
   SCOPE_profile_cpu_f("gpu");
 
   if (handle_) {
@@ -94,11 +95,12 @@ ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) {
   if (FLAGS_dump_shaders.size()) {
     base_path = FLAGS_dump_shaders.c_str();
   }
+  uint64_t hash = xe_hash64(shader_source, xestrlena(shader_source)); // ?
   char file_name[XE_MAX_PATH];
   xesnprintfa(file_name, XECOUNT(file_name),
       "%s/gen_%.16llX.gs",
       base_path,
-      hash_);
+      hash);
 
   if (FLAGS_dump_shaders.size()) {
     FILE* f = fopen(file_name, "w");
@@ -128,7 +130,7 @@ ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) {
   return shader_blob;
 }
 
-int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader,
+int D3D11GeometryShader::Generate(D3D11VertexShaderResource* vertex_shader,
                                   alloy::StringBuffer* output) {
   output->Append(
     "struct VERTEX {\n"
@@ -138,7 +140,7 @@ int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader,
     // TODO(benvanik): only add used ones?
     output->Append(
       "  float4 o[%d] : XE_O;\n",
-      D3D11Shader::MAX_INTERPOLATORS);
+      D3D11ShaderTranslator::kMaxInterpolators);
   }
   if (alloc_counts.point_size) {
     output->Append(
@@ -156,15 +158,15 @@ int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader,
 
 
 D3D11PointSpriteGeometryShader::D3D11PointSpriteGeometryShader(
-    ID3D11Device* device, uint64_t hash) :
-    D3D11GeometryShader(device, hash) {
+    ID3D11Device* device) : D3D11GeometryShader(device) {
 }
 
 D3D11PointSpriteGeometryShader::~D3D11PointSpriteGeometryShader() {
 }
 
-int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader,
-                                             alloy::StringBuffer* output) {
+int D3D11PointSpriteGeometryShader::Generate(
+    D3D11VertexShaderResource* vertex_shader,
+    alloy::StringBuffer* output) {
   SCOPE_profile_cpu_f("gpu");
   if (D3D11GeometryShader::Generate(vertex_shader, output)) {
     return 1;
@@ -211,15 +213,15 @@ int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader,
 
 
 D3D11RectListGeometryShader::D3D11RectListGeometryShader(
-    ID3D11Device* device, uint64_t hash) :
-    D3D11GeometryShader(device, hash) {
+    ID3D11Device* device) : D3D11GeometryShader(device) {
 }
 
 D3D11RectListGeometryShader::~D3D11RectListGeometryShader() {
 }
 
-int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
-                                          alloy::StringBuffer* output) {
+int D3D11RectListGeometryShader::Generate(
+    D3D11VertexShaderResource* vertex_shader,
+    alloy::StringBuffer* output) {
   SCOPE_profile_cpu_f("gpu");
   if (D3D11GeometryShader::Generate(vertex_shader, output)) {
     return 1;
@@ -256,15 +258,15 @@ int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
 
 
 D3D11QuadListGeometryShader::D3D11QuadListGeometryShader(
-    ID3D11Device* device, uint64_t hash) :
-    D3D11GeometryShader(device, hash) {
+    ID3D11Device* device) : D3D11GeometryShader(device) {
 }
 
 D3D11QuadListGeometryShader::~D3D11QuadListGeometryShader() {
 }
 
-int D3D11QuadListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
-                                          alloy::StringBuffer* output) {
+int D3D11QuadListGeometryShader::Generate(
+    D3D11VertexShaderResource* vertex_shader,
+    alloy::StringBuffer* output) {
   SCOPE_profile_cpu_f("gpu");
   if (D3D11GeometryShader::Generate(vertex_shader, output)) {
     return 1;
diff --git a/src/xenia/gpu/d3d11/d3d11_geometry_shader.h b/src/xenia/gpu/d3d11/d3d11_geometry_shader.h
index cdfebad5f..89529b2a4 100644
--- a/src/xenia/gpu/d3d11/d3d11_geometry_shader.h
+++ b/src/xenia/gpu/d3d11/d3d11_geometry_shader.h
@@ -21,7 +21,7 @@ namespace xe {
 namespace gpu {
 namespace d3d11 {
 
-class D3D11VertexShader;
+class D3D11VertexShaderResource;
 
 
 class D3D11GeometryShader {
@@ -30,53 +30,52 @@ public:
 
   ID3D11GeometryShader* handle() const { return handle_; }
 
-  int Prepare(D3D11VertexShader* vertex_shader);
+  int Prepare(D3D11VertexShaderResource* vertex_shader);
 
 protected:
-  D3D11GeometryShader(ID3D11Device* device, uint64_t hash);
+  D3D11GeometryShader(ID3D11Device* device);
 
   ID3D10Blob* Compile(const char* shader_source);
 
-  virtual int Generate(D3D11VertexShader* vertex_shader,
+  virtual int Generate(D3D11VertexShaderResource* vertex_shader,
                        alloy::StringBuffer* output);
 
 protected:
   ID3D11Device* device_;
-  uint64_t      hash_;
   ID3D11GeometryShader* handle_;
 };
 
 
 class D3D11PointSpriteGeometryShader : public D3D11GeometryShader {
 public:
-  D3D11PointSpriteGeometryShader(ID3D11Device* device, uint64_t hash);
-  virtual ~D3D11PointSpriteGeometryShader();
+  D3D11PointSpriteGeometryShader(ID3D11Device* device);
+  ~D3D11PointSpriteGeometryShader() override;
 
 protected:
-  virtual int Generate(D3D11VertexShader* vertex_shader,
-                       alloy::StringBuffer* output);
+  int Generate(D3D11VertexShaderResource* vertex_shader,
+               alloy::StringBuffer* output) override;
 };
 
 
 class D3D11RectListGeometryShader : public D3D11GeometryShader {
 public:
-  D3D11RectListGeometryShader(ID3D11Device* device, uint64_t hash);
-  virtual ~D3D11RectListGeometryShader();
+  D3D11RectListGeometryShader(ID3D11Device* device);
+  ~D3D11RectListGeometryShader() override;
 
 protected:
-  virtual int Generate(D3D11VertexShader* vertex_shader,
-                       alloy::StringBuffer* output);
+  int Generate(D3D11VertexShaderResource* vertex_shader,
+               alloy::StringBuffer* output) override;
 };
 
 
 class D3D11QuadListGeometryShader : public D3D11GeometryShader {
 public:
-  D3D11QuadListGeometryShader(ID3D11Device* device, uint64_t hash);
-  virtual ~D3D11QuadListGeometryShader();
+  D3D11QuadListGeometryShader(ID3D11Device* device);
+  ~D3D11QuadListGeometryShader() override;
 
 protected:
-  virtual int Generate(D3D11VertexShader* vertex_shader,
-                       alloy::StringBuffer* output);
+  int Generate(D3D11VertexShaderResource* vertex_shader,
+               alloy::StringBuffer* output) override;
 };
 
 
diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc
index 886643e32..a671b4626 100644
--- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc
+++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc
@@ -10,13 +10,12 @@
 #include <xenia/gpu/d3d11/d3d11_graphics_driver.h>
 
 #include <xenia/gpu/gpu-private.h>
-#include <xenia/gpu/d3d11/d3d11_buffer.h>
-#include <xenia/gpu/d3d11/d3d11_buffer_cache.h>
+#include <xenia/gpu/buffer_resource.h>
+#include <xenia/gpu/shader_resource.h>
+#include <xenia/gpu/texture_resource.h>
 #include <xenia/gpu/d3d11/d3d11_geometry_shader.h>
-#include <xenia/gpu/d3d11/d3d11_shader.h>
-#include <xenia/gpu/d3d11/d3d11_shader_cache.h>
-#include <xenia/gpu/d3d11/d3d11_texture.h>
-#include <xenia/gpu/d3d11/d3d11_texture_cache.h>
+#include <xenia/gpu/d3d11/d3d11_shader_resource.h>
+
 
 using namespace xe;
 using namespace xe::gpu;
@@ -35,9 +34,8 @@ D3D11GraphicsDriver::D3D11GraphicsDriver(
   device_ = device;
   device_->AddRef();
   device_->GetImmediateContext(&context_);
-  buffer_cache_ = new D3D11BufferCache(context_, device_);
-  shader_cache_ = new D3D11ShaderCache(device_);
-  texture_cache_ = new D3D11TextureCache(memory_, context_, device_);
+
+  resource_cache_ = new D3D11ResourceCache(memory, device_, context_);
 
   xe_zero_struct(&state_, sizeof(state_));
 
@@ -64,7 +62,29 @@ D3D11GraphicsDriver::D3D11GraphicsDriver(
   buffer_desc.ByteWidth       = (32) * sizeof(int);
   hr = device_->CreateBuffer(
       &buffer_desc, NULL, &state_.constant_buffers.gs_consts);
+}
 
+D3D11GraphicsDriver::~D3D11GraphicsDriver() {
+  RebuildRenderTargets(0, 0);
+  XESAFERELEASE(state_.constant_buffers.float_constants);
+  XESAFERELEASE(state_.constant_buffers.bool_constants);
+  XESAFERELEASE(state_.constant_buffers.loop_constants);
+  XESAFERELEASE(state_.constant_buffers.vs_consts);
+  XESAFERELEASE(state_.constant_buffers.gs_consts);
+  XESAFERELEASE(invalid_texture_view_);
+  XESAFERELEASE(invalid_texture_sampler_state_);
+  delete resource_cache_;
+  XESAFERELEASE(context_);
+  XESAFERELEASE(device_);
+  XESAFERELEASE(swap_chain_);
+}
+
+int D3D11GraphicsDriver::Initialize() {
+  InitializeInvalidTexture();
+  return 0;
+}
+
+void D3D11GraphicsDriver::InitializeInvalidTexture() {
   // TODO(benvanik): pattern?
   D3D11_TEXTURE2D_DESC texture_desc;
   xe_zero_struct(&texture_desc, sizeof(texture_desc));
@@ -90,7 +110,7 @@ D3D11GraphicsDriver::D3D11GraphicsDriver(
   initial_data.SysMemSlicePitch = 0;
   initial_data.pSysMem = texture_data;
   ID3D11Texture2D* texture = NULL;
-  hr = device_->CreateTexture2D(
+  HRESULT hr = device_->CreateTexture2D(
       &texture_desc, &initial_data, (ID3D11Texture2D**)&texture);
   if (FAILED(hr)) {
     XEFATAL("D3D11: unable to create invalid texture");
@@ -130,315 +150,53 @@ D3D11GraphicsDriver::D3D11GraphicsDriver(
   }
 }
 
-D3D11GraphicsDriver::~D3D11GraphicsDriver() {
-  RebuildRenderTargets(0, 0);
-  XESAFERELEASE(state_.constant_buffers.float_constants);
-  XESAFERELEASE(state_.constant_buffers.bool_constants);
-  XESAFERELEASE(state_.constant_buffers.loop_constants);
-  XESAFERELEASE(state_.constant_buffers.vs_consts);
-  XESAFERELEASE(state_.constant_buffers.gs_consts);
-  XESAFERELEASE(invalid_texture_view_);
-  XESAFERELEASE(invalid_texture_sampler_state_);
-  delete buffer_cache_;
-  delete texture_cache_;
-  delete shader_cache_;
-  XESAFERELEASE(context_);
-  XESAFERELEASE(device_);
-  XESAFERELEASE(swap_chain_);
-}
-
-void D3D11GraphicsDriver::Initialize() {
-}
-
-void D3D11GraphicsDriver::InvalidateState(
-    uint32_t mask) {
-  if (mask == XE_GPU_INVALIDATE_MASK_ALL) {
-    XETRACED3D("D3D11: (invalidate all)");
-  }
-  if (mask & XE_GPU_INVALIDATE_MASK_VERTEX_SHADER) {
-    XETRACED3D("D3D11: invalidate vertex shader");
-  }
-  if (mask & XE_GPU_INVALIDATE_MASK_PIXEL_SHADER) {
-    XETRACED3D("D3D11: invalidate pixel shader");
-  }
-}
-
-void D3D11GraphicsDriver::SetShader(
-    XE_GPU_SHADER_TYPE type,
-    uint32_t address,
-    uint32_t start,
-    uint32_t length) {
-  // Find or create shader in the cache.
-  uint8_t* p = memory_->Translate(address);
-  Shader* shader = shader_cache_->FindOrCreate(
-      type, p, length);
-
-  if (!shader->is_prepared()) {
-    // Disassemble.
-    const char* source = shader->disasm_src();
-    if (!source) {
-      source = "<failed to disassemble>";
-    }
-    XETRACED3D("D3D11: set shader %d at %0.8X (%db):\n%s",
-               type, address, length, source);
-  }
-
-  // Stash for later.
-  switch (type) {
-  case XE_GPU_SHADER_TYPE_VERTEX:
-    state_.vertex_shader = (D3D11VertexShader*)shader;
-    break;
-  case XE_GPU_SHADER_TYPE_PIXEL:
-    state_.pixel_shader = (D3D11PixelShader*)shader;
-    break;
-  }
-}
-
-int D3D11GraphicsDriver::SetupDraw(XE_GPU_PRIMITIVE_TYPE prim_type) {
+int D3D11GraphicsDriver::Draw(const DrawCommand& command) {
   SCOPE_profile_cpu_f("gpu");
 
-  RegisterFile& rf = register_file_;
-
-  // Ignore copies.
-  uint32_t enable_mode = rf.values[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7;
-  if (enable_mode != 4) {
-    XELOGW("D3D11: ignoring draw with enable mode %d", enable_mode);
-    return 1;
-  }
-
-  uint32_t state_overrides = 0;
-  if (prim_type == XE_GPU_PRIMITIVE_TYPE_RECTANGLE_LIST) {
-    // Rect lists aren't culled. There may be other things they skip too.
-    state_overrides |= STATE_OVERRIDE_DISABLE_CULLING;
-  }
-
   // Misc state.
-  if (UpdateState(state_overrides)) {
+  if (UpdateState(command)) {
     return 1;
   }
 
   // Build constant buffers.
-  if (UpdateConstantBuffers()) {
+  if (SetupConstantBuffers(command)) {
     return 1;
   }
 
   // Bind shaders.
-  if (BindShaders()) {
+  if (SetupShaders(command)) {
     return 1;
   }
 
-  // Switch primitive topology.
-  // Some are unsupported on D3D11 and must be emulated.
-  D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
-  D3D11GeometryShader* geometry_shader = NULL;
-  switch (prim_type) {
-  case XE_GPU_PRIMITIVE_TYPE_POINT_LIST:
-    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
-    if (state_.vertex_shader) {
-      if (state_.vertex_shader->DemandGeometryShader(
-          D3D11VertexShader::POINT_SPRITE_SHADER, &geometry_shader)) {
-        return 1;
-      }
-    }
-    break;
-  case XE_GPU_PRIMITIVE_TYPE_LINE_LIST:
-    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
-    break;
-  case XE_GPU_PRIMITIVE_TYPE_LINE_STRIP:
-    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
-    break;
-  case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_LIST:
-    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
-    break;
-  case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_STRIP:
-    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
-    break;
-  case XE_GPU_PRIMITIVE_TYPE_RECTANGLE_LIST:
-    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
-    if (state_.vertex_shader) {
-      if (state_.vertex_shader->DemandGeometryShader(
-          D3D11VertexShader::RECT_LIST_SHADER, &geometry_shader)) {
-        return 1;
-      }
-    }
-    break;
-  case XE_GPU_PRIMITIVE_TYPE_QUAD_LIST:
-    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
-    if (state_.vertex_shader) {
-      if (state_.vertex_shader->DemandGeometryShader(
-          D3D11VertexShader::QUAD_LIST_SHADER, &geometry_shader)) {
-        return 1;
-      }
-    }
-    break;
-  default:
-  case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_FAN:
-  case XE_GPU_PRIMITIVE_TYPE_UNKNOWN_07:
-  case XE_GPU_PRIMITIVE_TYPE_LINE_LOOP:
-    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
-    XELOGE("D3D11: unsupported primitive type %d", prim_type);
-    break;
+  // Bind vertex buffers/index buffer.
+  if (SetupInputAssembly(command)) {
+    return 1;
   }
-  context_->IASetPrimitiveTopology(primitive_topology);
 
-  if (geometry_shader) {
-    context_->GSSetShader(geometry_shader->handle(), NULL, NULL);
-    context_->GSSetConstantBuffers(
-        0, 1, &state_.constant_buffers.gs_consts);
+  // Bind texture fetchers.
+  if (SetupSamplers(command)) {
+    return 1;
+  }
+
+  if (command.index_buffer) {
+    // Have an actual index buffer.
+    XETRACED3D("D3D11: draw indexed %d (indicies [%d,%d] (%d))",
+               command.prim_type, command.start_index,
+               command.start_index + command.index_count, command.index_count);
+    context_->DrawIndexed(command.index_count, command.start_index,
+                          command.base_vertex);
   } else {
-    context_->GSSetShader(NULL, NULL, NULL);
+    // Auto draw.
+    XETRACED3D("D3D11: draw indexed auto %d (indicies [%d,%d] (%d))",
+               command.prim_type, command.start_index,
+               command.start_index + command.index_count, command.index_count);
+    context_->Draw(command.index_count, 0);
   }
 
-  // Setup all fetchers (vertices/textures).
-  if (PrepareFetchers()) {
-    return 1;
-  }
-
-  // All ready to draw (except index buffer)!
-
   return 0;
 }
 
-void D3D11GraphicsDriver::DrawIndexBuffer(
-    XE_GPU_PRIMITIVE_TYPE prim_type,
-    bool index_32bit, uint32_t index_count,
-    uint32_t index_base, uint32_t index_size, uint32_t endianness) {
-  SCOPE_profile_cpu_f("gpu");
-
-  RegisterFile& rf = register_file_;
-
-  XETRACED3D("D3D11: draw indexed %d (%d indicies) from %.8X",
-             prim_type, index_count, index_base);
-
-  // Setup shaders/etc.
-  if (SetupDraw(prim_type)) {
-    return;
-  }
-
-  // Setup index buffer.
-  if (PrepareIndexBuffer(
-      index_32bit, index_count, index_base, index_size, endianness)) {
-    return;
-  }
-
-  // Issue draw.
-  uint32_t start_index = rf.values[XE_GPU_REG_VGT_INDX_OFFSET].u32;
-  uint32_t base_vertex = 0;
-  context_->DrawIndexed(index_count, start_index, base_vertex);
-}
-
-void D3D11GraphicsDriver::DrawIndexAuto(
-    XE_GPU_PRIMITIVE_TYPE prim_type,
-    uint32_t index_count) {
-  SCOPE_profile_cpu_f("gpu");
-
-  RegisterFile& rf = register_file_;
-
-  XETRACED3D("D3D11: draw indexed %d (%d indicies)",
-             prim_type, index_count);
-
-  // Setup shaders/etc.
-  if (SetupDraw(prim_type)) {
-    return;
-  }
-
-  // Issue draw.
-  uint32_t start_index = rf.values[XE_GPU_REG_VGT_INDX_OFFSET].u32;
-  uint32_t base_vertex = 0;
-  //context_->DrawIndexed(index_count, start_index, base_vertex);
-  context_->Draw(index_count, 0);
-}
-
-int D3D11GraphicsDriver::RebuildRenderTargets(
-    uint32_t width, uint32_t height) {
-  if (width == render_targets_.width &&
-      height == render_targets_.height) {
-    // Cached copies are good.
-    return 0;
-  }
-
-  SCOPE_profile_cpu_f("gpu");
-
-  // Remove old versions.
-  for (int n = 0; n < XECOUNT(render_targets_.color_buffers); n++) {
-    auto& cb = render_targets_.color_buffers[n];
-    XESAFERELEASE(cb.buffer);
-    XESAFERELEASE(cb.color_view_8888);
-  }
-  XESAFERELEASE(render_targets_.depth_buffer);
-  XESAFERELEASE(render_targets_.depth_view_d28s8);
-  XESAFERELEASE(render_targets_.depth_view_d28fs8);
-
-  render_targets_.width   = width;
-  render_targets_.height  = height;
-
-  if (!width || !height) {
-    // This should only happen when cleaning up.
-    return 0;
-  }
-
-  for (int n = 0; n < XECOUNT(render_targets_.color_buffers); n++) {
-    auto& cb = render_targets_.color_buffers[n];
-    D3D11_TEXTURE2D_DESC color_buffer_desc;
-    xe_zero_struct(&color_buffer_desc, sizeof(color_buffer_desc));
-    color_buffer_desc.Width           = width;
-    color_buffer_desc.Height          = height;
-    color_buffer_desc.MipLevels       = 1;
-    color_buffer_desc.ArraySize       = 1;
-    color_buffer_desc.Format          = DXGI_FORMAT_R8G8B8A8_UNORM;
-    color_buffer_desc.SampleDesc.Count    = 1;
-    color_buffer_desc.SampleDesc.Quality  = 0;
-    color_buffer_desc.Usage           = D3D11_USAGE_DEFAULT;
-    color_buffer_desc.BindFlags       =
-        D3D11_BIND_SHADER_RESOURCE |
-        D3D11_BIND_RENDER_TARGET;
-    color_buffer_desc.CPUAccessFlags  = 0;
-    color_buffer_desc.MiscFlags       = 0;
-    device_->CreateTexture2D(
-        &color_buffer_desc, NULL, &cb.buffer);
-
-    D3D11_RENDER_TARGET_VIEW_DESC render_target_view_desc;
-    xe_zero_struct(&render_target_view_desc, sizeof(render_target_view_desc));
-    render_target_view_desc.Format        = DXGI_FORMAT_R8G8B8A8_UNORM;
-    render_target_view_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
-    // render_target_view_desc.Buffer ?
-    device_->CreateRenderTargetView(
-        cb.buffer,
-        &render_target_view_desc,
-        &cb.color_view_8888);
-  }
-
-  D3D11_TEXTURE2D_DESC depth_stencil_desc;
-  xe_zero_struct(&depth_stencil_desc, sizeof(depth_stencil_desc));
-  depth_stencil_desc.Width          = width;
-  depth_stencil_desc.Height         = height;
-  depth_stencil_desc.MipLevels      = 1;
-  depth_stencil_desc.ArraySize      = 1;
-  depth_stencil_desc.Format         = DXGI_FORMAT_D24_UNORM_S8_UINT;
-  depth_stencil_desc.SampleDesc.Count   = 1;
-  depth_stencil_desc.SampleDesc.Quality = 0;
-  depth_stencil_desc.Usage          = D3D11_USAGE_DEFAULT;
-  depth_stencil_desc.BindFlags      =
-      D3D11_BIND_DEPTH_STENCIL;
-  depth_stencil_desc.CPUAccessFlags = 0;
-  depth_stencil_desc.MiscFlags      = 0;
-  device_->CreateTexture2D(
-      &depth_stencil_desc, NULL, &render_targets_.depth_buffer);
-
-  D3D11_DEPTH_STENCIL_VIEW_DESC depth_stencil_view_desc;
-  xe_zero_struct(&depth_stencil_view_desc, sizeof(depth_stencil_view_desc));
-  depth_stencil_view_desc.Format        = DXGI_FORMAT_D24_UNORM_S8_UINT;
-  depth_stencil_view_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D;
-  depth_stencil_view_desc.Flags         = 0;
-  device_->CreateDepthStencilView(
-      render_targets_.depth_buffer,
-      &depth_stencil_view_desc,
-      &render_targets_.depth_view_d28s8);
-
-  return 0;
-}
-
-int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
+int D3D11GraphicsDriver::UpdateState(const DrawCommand& command) {
   SCOPE_profile_cpu_f("gpu");
 
   // Most information comes from here:
@@ -449,8 +207,8 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
 
   RegisterFile& rf = register_file_;
 
-  uint32_t window_scissor_tl = rf.values[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
-  uint32_t window_scissor_br = rf.values[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
+  uint32_t window_scissor_tl = register_file_[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
+  uint32_t window_scissor_br = register_file_[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
   //uint32_t window_width =
   //    (window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF);
   //uint32_t window_height =
@@ -466,16 +224,16 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
   // RB_SURFACE_INFO ?
 
   // Enable buffers.
-  uint32_t enable_mode = rf.values[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7;
+  uint32_t enable_mode = register_file_[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7;
   // 4 = color + depth
   // 6 = copy ?
 
   // color_info[0-3] has format 8888
   uint32_t color_info[4] = {
-    rf.values[XE_GPU_REG_RB_COLOR_INFO].u32,
-    rf.values[XE_GPU_REG_RB_COLOR1_INFO].u32,
-    rf.values[XE_GPU_REG_RB_COLOR2_INFO].u32,
-    rf.values[XE_GPU_REG_RB_COLOR3_INFO].u32,
+    register_file_[XE_GPU_REG_RB_COLOR_INFO].u32,
+    register_file_[XE_GPU_REG_RB_COLOR1_INFO].u32,
+    register_file_[XE_GPU_REG_RB_COLOR2_INFO].u32,
+    register_file_[XE_GPU_REG_RB_COLOR3_INFO].u32,
   };
   ID3D11RenderTargetView* render_target_views[4] = { 0 };
   for (int n = 0; n < XECOUNT(color_info); n++) {
@@ -494,7 +252,7 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
   }
 
   // depth_info has format 24_8
-  uint32_t depth_info = rf.values[XE_GPU_REG_RB_DEPTH_INFO].u32;
+  uint32_t depth_info = register_file_[XE_GPU_REG_RB_DEPTH_INFO].u32;
   uint32_t depth_format = (depth_info >> 16) & 0x1;
   ID3D11DepthStencilView* depth_stencil_view = 0;
   switch (depth_format) {
@@ -514,7 +272,7 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
   context_->OMSetRenderTargets(4, render_target_views, depth_stencil_view);
 
   // General rasterizer state.
-  uint32_t mode_control = rf.values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
+  uint32_t mode_control = register_file_[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
   D3D11_RASTERIZER_DESC rasterizer_desc;
   xe_zero_struct(&rasterizer_desc, sizeof(rasterizer_desc));
   rasterizer_desc.FillMode              = D3D11_FILL_SOLID; // D3D11_FILL_WIREFRAME;
@@ -529,7 +287,8 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
     rasterizer_desc.CullMode            = D3D11_CULL_BACK;
     break;
   }
-  if (state_overrides & STATE_OVERRIDE_DISABLE_CULLING) {
+  if (command.prim_type == XE_GPU_PRIMITIVE_TYPE_RECTANGLE_LIST) {
+    // Rect lists aren't culled. There may be other things they skip too.
     rasterizer_desc.CullMode            = D3D11_CULL_NONE;
   }
   rasterizer_desc.FrontCounterClockwise = (mode_control & 0x4) == 0;
@@ -547,7 +306,7 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
 
   // Viewport.
   // If we have resized the window we will want to change this.
-  uint32_t window_offset = rf.values[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
+  uint32_t window_offset = register_file_[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
   // signed?
   uint32_t window_offset_x = window_offset & 0x7FFF;
   uint32_t window_offset_y = (window_offset >> 16) & 0x7FFF;
@@ -555,19 +314,19 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
   // ?
   // TODO(benvanik): figure out how to emulate viewports in D3D11. Could use
   //     viewport above to scale, though that doesn't support negatives/etc.
-  uint32_t vte_control = rf.values[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
+  uint32_t vte_control = register_file_[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
   bool vport_xscale_enable = (vte_control & (1 << 0)) > 0;
-  float vport_xscale = rf.values[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; // 640
+  float vport_xscale = register_file_[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; // 640
   bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0;
-  float vport_xoffset = rf.values[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32; // 640
+  float vport_xoffset = register_file_[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32; // 640
   bool vport_yscale_enable = (vte_control & (1 << 2)) > 0;
-  float vport_yscale = rf.values[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; // -360
+  float vport_yscale = register_file_[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; // -360
   bool vport_yoffset_enable = (vte_control & (1 << 3)) > 0;
-  float vport_yoffset = rf.values[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; // 360
+  float vport_yoffset = register_file_[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; // 360
   bool vport_zscale_enable = (vte_control & (1 << 4)) > 0;
-  float vport_zscale = rf.values[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32; // 1
+  float vport_zscale = register_file_[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32; // 1
   bool vport_zoffset_enable = (vte_control & (1 << 5)) > 0;
-  float vport_zoffset = rf.values[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32; // 0
+  float vport_zoffset = register_file_[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32; // 0
 
   // TODO(benvanik): compute viewport values.
   D3D11_VIEWPORT viewport;
@@ -619,8 +378,8 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
   // Scissoring.
   // TODO(benvanik): pull from scissor registers.
   // ScissorEnable must be set in raster state above.
-  uint32_t screen_scissor_tl = rf.values[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32;
-  uint32_t screen_scissor_br = rf.values[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32;
+  uint32_t screen_scissor_tl = register_file_[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32;
+  uint32_t screen_scissor_br = register_file_[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32;
   if (screen_scissor_tl != 0 && screen_scissor_br != 0x20002000) {
     D3D11_RECT scissor_rect;
     scissor_rect.top = (screen_scissor_tl >> 16) & 0x7FFF;
@@ -654,8 +413,8 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
   };
 
   // Depth-stencil state.
-  uint32_t depth_control = rf.values[XE_GPU_REG_RB_DEPTHCONTROL].u32;
-  uint32_t stencil_ref_mask = rf.values[XE_GPU_REG_RB_STENCILREFMASK].u32;
+  uint32_t depth_control = register_file_[XE_GPU_REG_RB_DEPTHCONTROL].u32;
+  uint32_t stencil_ref_mask = register_file_[XE_GPU_REG_RB_STENCILREFMASK].u32;
   D3D11_DEPTH_STENCIL_DESC depth_stencil_desc;
   xe_zero_struct(&depth_stencil_desc, sizeof(depth_stencil_desc));
   // A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE
@@ -727,22 +486,22 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
   // alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
   // Not in D3D11!
   // http://msdn.microsoft.com/en-us/library/windows/desktop/bb205120(v=vs.85).aspx
-  uint32_t color_control = rf.values[XE_GPU_REG_RB_COLORCONTROL].u32;
+  uint32_t color_control = register_file_[XE_GPU_REG_RB_COLORCONTROL].u32;
 
   // Blend state.
-  uint32_t color_mask = rf.values[XE_GPU_REG_RB_COLOR_MASK].u32;
+  uint32_t color_mask = register_file_[XE_GPU_REG_RB_COLOR_MASK].u32;
   uint32_t sample_mask = 0xFFFFFFFF; // ?
   float blend_factor[4] = {
-    rf.values[XE_GPU_REG_RB_BLEND_RED].f32,
-    rf.values[XE_GPU_REG_RB_BLEND_GREEN].f32,
-    rf.values[XE_GPU_REG_RB_BLEND_BLUE].f32,
-    rf.values[XE_GPU_REG_RB_BLEND_ALPHA].f32,
+    register_file_[XE_GPU_REG_RB_BLEND_RED].f32,
+    register_file_[XE_GPU_REG_RB_BLEND_GREEN].f32,
+    register_file_[XE_GPU_REG_RB_BLEND_BLUE].f32,
+    register_file_[XE_GPU_REG_RB_BLEND_ALPHA].f32,
   };
   uint32_t blend_control[4] = {
-    rf.values[XE_GPU_REG_RB_BLENDCONTROL_0].u32,
-    rf.values[XE_GPU_REG_RB_BLENDCONTROL_1].u32,
-    rf.values[XE_GPU_REG_RB_BLENDCONTROL_2].u32,
-    rf.values[XE_GPU_REG_RB_BLENDCONTROL_3].u32,
+    register_file_[XE_GPU_REG_RB_BLENDCONTROL_0].u32,
+    register_file_[XE_GPU_REG_RB_BLENDCONTROL_1].u32,
+    register_file_[XE_GPU_REG_RB_BLENDCONTROL_2].u32,
+    register_file_[XE_GPU_REG_RB_BLENDCONTROL_3].u32,
   };
   D3D11_BLEND_DESC blend_desc;
   xe_zero_struct(&blend_desc, sizeof(blend_desc));
@@ -782,60 +541,43 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) {
   return 0;
 }
 
-int D3D11GraphicsDriver::UpdateConstantBuffers() {
+int D3D11GraphicsDriver::SetupConstantBuffers(const DrawCommand& command) {
   SCOPE_profile_cpu_f("gpu");
 
-  RegisterFile& rf = register_file_;
-
   D3D11_MAPPED_SUBRESOURCE res;
   context_->Map(
       state_.constant_buffers.float_constants, 0,
       D3D11_MAP_WRITE_DISCARD, 0, &res);
   memcpy(res.pData,
-         &rf.values[XE_GPU_REG_SHADER_CONSTANT_000_X],
-         (512 * 4) * sizeof(float));
+         command.float4_constants.values,
+         command.float4_constants.count * 4 * sizeof(float));
   context_->Unmap(state_.constant_buffers.float_constants, 0);
 
   context_->Map(
       state_.constant_buffers.loop_constants, 0,
       D3D11_MAP_WRITE_DISCARD, 0, &res);
   memcpy(res.pData,
-      &rf.values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00],
-      (32) * sizeof(int));
+         command.loop_constants.values,
+         command.loop_constants.count * sizeof(int));
   context_->Unmap(state_.constant_buffers.loop_constants, 0);
 
   context_->Map(
       state_.constant_buffers.bool_constants, 0,
       D3D11_MAP_WRITE_DISCARD, 0, &res);
   memcpy(res.pData,
-      &rf.values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031],
-      (8) * sizeof(int));
+         command.bool_constants.values,
+         command.bool_constants.count * sizeof(int));
   context_->Unmap(state_.constant_buffers.bool_constants, 0);
 
   return 0;
 }
 
-int D3D11GraphicsDriver::BindShaders() {
+int D3D11GraphicsDriver::SetupShaders(const DrawCommand& command) {
   SCOPE_profile_cpu_f("gpu");
 
-  RegisterFile& rf = register_file_;
-  xe_gpu_program_cntl_t program_cntl;
-  program_cntl.dword_0 = rf.values[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
-
-  // Vertex shader setup.
-  D3D11VertexShader* vs = state_.vertex_shader;
-  if (vs) {
-    if (!vs->is_prepared()) {
-      // Prepare for use.
-      if (vs->Prepare(&program_cntl)) {
-        XELOGGPU("D3D11: failed to prepare vertex shader");
-        state_.vertex_shader = NULL;
-        return 1;
-      }
-    }
-
-    // Bind.
-    context_->VSSetShader(vs->handle(), NULL, 0);
+  if (command.vertex_shader) {
+    context_->VSSetShader(
+        command.vertex_shader->handle_as<ID3D11VertexShader>(), nullptr, 0);
 
     // Set constant buffers.
     ID3D11Buffer* vs_constant_buffers[] = {
@@ -844,31 +586,22 @@ int D3D11GraphicsDriver::BindShaders() {
       state_.constant_buffers.loop_constants,
       state_.constant_buffers.vs_consts,
     };
-    context_->VSSetConstantBuffers(
-        0, XECOUNT(vs_constant_buffers), vs_constant_buffers);
+    context_->VSSetConstantBuffers(0, XECOUNT(vs_constant_buffers),
+                                   vs_constant_buffers);
 
     // Setup input layout (as encoded in vertex shader).
+    auto vs = static_cast<D3D11VertexShaderResource*>(command.vertex_shader);
     context_->IASetInputLayout(vs->input_layout());
   } else {
-    context_->VSSetShader(NULL, NULL, 0);
-    context_->IASetInputLayout(NULL);
+    context_->VSSetShader(nullptr, nullptr, 0);
+    context_->IASetInputLayout(nullptr);
     return 1;
   }
 
   // Pixel shader setup.
-  D3D11PixelShader* ps = state_.pixel_shader;
-  if (ps) {
-    if (!ps->is_prepared()) {
-      // Prepare for use.
-      if (ps->Prepare(&program_cntl, vs)) {
-        XELOGGPU("D3D11: failed to prepare pixel shader");
-        state_.pixel_shader = NULL;
-        return 1;
-      }
-    }
-
-    // Bind.
-    context_->PSSetShader(ps->handle(), NULL, 0);
+  if (command.pixel_shader) {
+    context_->PSSetShader(
+        command.pixel_shader->handle_as<ID3D11PixelShader>(), nullptr, 0);
 
     // Set constant buffers.
     ID3D11Buffer* vs_constant_buffers[] = {
@@ -876,232 +609,233 @@ int D3D11GraphicsDriver::BindShaders() {
       state_.constant_buffers.bool_constants,
       state_.constant_buffers.loop_constants,
     };
-    context_->PSSetConstantBuffers(
-        0, XECOUNT(vs_constant_buffers), vs_constant_buffers);
+    context_->PSSetConstantBuffers(0, XECOUNT(vs_constant_buffers),
+                                   vs_constant_buffers);
   } else {
-    context_->PSSetShader(NULL, NULL, 0);
+    context_->PSSetShader(nullptr, nullptr, 0);
     return 1;
   }
 
   return 0;
 }
 
-int D3D11GraphicsDriver::PrepareFetchers() {
+int D3D11GraphicsDriver::SetupInputAssembly(const DrawCommand& command) {
   SCOPE_profile_cpu_f("gpu");
 
-  // Input assembly.
-  XEASSERTNOTNULL(state_.vertex_shader);
-  auto vtx_inputs = state_.vertex_shader->GetVertexBufferInputs();
-  for (size_t n = 0; n < vtx_inputs->count; n++) {
-    auto input = vtx_inputs->descs[n];
-    if (PrepareVertexBuffer(input)) {
-      XELOGE("D3D11: unable to prepare vertex buffer");
-      return 1;
-    }
-  }
-
-  // All texture inputs.
-  if (PrepareTextureFetchers()) {
-    XELOGE("D3D11: unable to prepare texture fetchers");
-    return 1;
-  }
-
-  // Vertex texture samplers.
-  auto tex_inputs = state_.vertex_shader->GetTextureBufferInputs();
-  for (size_t n = 0; n < tex_inputs->count; n++) {
-    auto input = tex_inputs->descs[n];
-    if (PrepareTextureSampler(XE_GPU_SHADER_TYPE_VERTEX, input)) {
-      XELOGE("D3D11: unable to prepare texture buffer");
-      return 1;
-    }
-  }
-
-  // Pixel shader texture sampler.
-  XEASSERTNOTNULL(state_.pixel_shader);
-  tex_inputs = state_.pixel_shader->GetTextureBufferInputs();
-  for (size_t n = 0; n < tex_inputs->count; n++) {
-    auto input = tex_inputs->descs[n];
-    if (PrepareTextureSampler(XE_GPU_SHADER_TYPE_PIXEL, input)) {
-      XELOGE("D3D11: unable to prepare texture buffer");
-      return 1;
-    }
-  }
-
-  return 0;
-}
-
-int D3D11GraphicsDriver::PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc) {
-  SCOPE_profile_cpu_f("gpu");
-
-  D3D11VertexShader* vs = state_.vertex_shader;
+  auto vs = static_cast<D3D11VertexShaderResource*>(command.vertex_shader);
   if (!vs) {
     return 1;
   }
 
-  RegisterFile& rf = register_file_;
-  int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6;
-  xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r];
-  xe_gpu_vertex_fetch_t* fetch = NULL;
-  switch (desc.fetch_slot % 3) {
-  case 0:
-    fetch = &group->vertex_fetch_0;
+  // Switch primitive topology.
+  // Some are unsupported on D3D11 and must be emulated.
+  D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
+  D3D11GeometryShader* geometry_shader = NULL;
+  switch (command.prim_type) {
+  case XE_GPU_PRIMITIVE_TYPE_POINT_LIST:
+    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
+    if (vs->DemandGeometryShader(
+        D3D11VertexShaderResource::POINT_SPRITE_SHADER, &geometry_shader)) {
+      return 1;
+    }
     break;
-  case 1:
-    fetch = &group->vertex_fetch_1;
+  case XE_GPU_PRIMITIVE_TYPE_LINE_LIST:
+    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
     break;
-  case 2:
-    fetch = &group->vertex_fetch_2;
+  case XE_GPU_PRIMITIVE_TYPE_LINE_STRIP:
+    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
+    break;
+  case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_LIST:
+    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
+    break;
+  case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_STRIP:
+    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
+    break;
+  case XE_GPU_PRIMITIVE_TYPE_RECTANGLE_LIST:
+    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
+    if (vs->DemandGeometryShader(
+        D3D11VertexShaderResource::RECT_LIST_SHADER, &geometry_shader)) {
+      return 1;
+    }
+    break;
+  case XE_GPU_PRIMITIVE_TYPE_QUAD_LIST:
+    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
+    if (vs->DemandGeometryShader(
+        D3D11VertexShaderResource::QUAD_LIST_SHADER, &geometry_shader)) {
+      return 1;
+    }
+    break;
+  default:
+  case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_FAN:
+  case XE_GPU_PRIMITIVE_TYPE_UNKNOWN_07:
+  case XE_GPU_PRIMITIVE_TYPE_LINE_LOOP:
+    primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
+    XELOGE("D3D11: unsupported primitive type %d", command.prim_type);
     break;
   }
-  XEASSERTNOTNULL(fetch);
-  // If this assert doesn't hold, maybe we just abort?
-  XEASSERT(fetch->type == 0x3);
-  XEASSERTNOTZERO(fetch->size);
+  context_->IASetPrimitiveTopology(primitive_topology);
 
-  VertexBufferInfo info;
-  // TODO(benvanik): make these structs the same so we can share.
-  info.layout.stride_words = desc.stride_words;
-  info.layout.element_count = desc.element_count;
-  for (uint32_t i = 0; i < desc.element_count; ++i) {
-    const auto& src_el = desc.elements[i];
-    auto& dest_el = info.layout.elements[i];
-    dest_el.format = src_el.format;
-    dest_el.offset_words = src_el.offset_words;
-    dest_el.size_words = src_el.size_words;
+  // Set the geometry shader, if we are emulating a primitive type.
+  if (geometry_shader) {
+    context_->GSSetShader(geometry_shader->handle(), NULL, NULL);
+    context_->GSSetConstantBuffers(0, 1, &state_.constant_buffers.gs_consts);
+  } else {
+    context_->GSSetShader(NULL, NULL, NULL);
   }
 
-  uint32_t address = (fetch->address << 2) + address_translation_;
-  const uint8_t* src = reinterpret_cast<const uint8_t*>(
-      memory_->Translate(address));
-
-  VertexBuffer* vertex_buffer = buffer_cache_->FetchVertexBuffer(
-      info, src, fetch->size * 4);
-  if (!vertex_buffer) {
-    XELOGE("D3D11: unable to create vertex fetch buffer");
-    return 1;
+  // Index buffer, if any. May be auto draw.
+  if (command.index_buffer) {
+    DXGI_FORMAT format;
+    switch (command.index_buffer->info().format) {
+    case INDEX_FORMAT_16BIT:
+      format = DXGI_FORMAT_R16_UINT;
+      break;
+    case INDEX_FORMAT_32BIT:
+      format = DXGI_FORMAT_R32_UINT;
+      break;
+    }
+    context_->IASetIndexBuffer(
+        command.index_buffer->handle_as<ID3D11Buffer>(),
+        format, 0);
+  } else {
+    context_->IASetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0);
   }
-  auto d3d_vb = static_cast<D3D11VertexBuffer*>(vertex_buffer);
 
-  // TODO(benvanik): always dword aligned?
-  uint32_t stride = desc.stride_words * 4;
-  uint32_t offset = 0;
-  int vb_slot = desc.input_index;
-  ID3D11Buffer* buffers[] = { d3d_vb->handle() };
-  context_->IASetVertexBuffers(vb_slot, XECOUNT(buffers), buffers,
-                               &stride, &offset);
-
-  return 0;
-}
-
-int D3D11GraphicsDriver::PrepareTextureFetchers() {
-  SCOPE_profile_cpu_f("gpu");
-
-  RegisterFile& rf = register_file_;
-
-  for (int n = 0; n < XECOUNT(state_.texture_fetchers); n++) {
-    auto& fetcher = state_.texture_fetchers[n];
-
-    // TODO(benvanik): quick validate without refetching.
-    fetcher.enabled = false;
-    fetcher.view = NULL;
-
-    int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + n * 6;
-    xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r];
-    auto& fetch = group->texture_fetch;
-    if (fetch.type != 0x2) {
-      continue;
-    }
-
-    // Stash a copy of the fetch register.
-    fetcher.fetch = fetch;
-
-    // Fetch texture from the cache.
-    uint32_t address = (fetch.address << 12) + address_translation_;
-    auto texture_view = texture_cache_->FetchTexture(address, fetch);
-    if (!texture_view) {
-      XELOGW("D3D11: unable to fetch texture at %.8X", address);
-      continue;
-    }
-    if (texture_view->format == DXGI_FORMAT_UNKNOWN) {
-      XELOGW("D3D11: unknown texture format %d", fetch.format);
-      continue;
-    }
-    fetcher.view = static_cast<D3D11TextureView*>(texture_view);
-
-    // Only enable if we get all the way down here successfully.
-    fetcher.enabled = true;
+  // All vertex buffers.
+  for (auto i = 0; i < command.vertex_buffer_count; ++i) {
+    const auto& vb = command.vertex_buffers[i];
+    auto buffer = vb.buffer->handle_as<ID3D11Buffer>();
+    auto stride = vb.stride;
+    auto offset = vb.offset;
+    context_->IASetVertexBuffers(vb.input_index, 1, &buffer,
+                                 &stride, &offset);
   }
 
   return 0;
 }
 
-int D3D11GraphicsDriver::PrepareTextureSampler(
-    xenos::XE_GPU_SHADER_TYPE shader_type, Shader::tex_buffer_desc_t& desc) {
+int D3D11GraphicsDriver::SetupSamplers(const DrawCommand& command) {
   SCOPE_profile_cpu_f("gpu");
 
-  // If the fetcher is disabled or invalid, set some default textures.
-  auto& fetcher = state_.texture_fetchers[desc.fetch_slot];
-  if (!fetcher.enabled ||
-      fetcher.view->format == DXGI_FORMAT_UNKNOWN) {
-    XELOGW("D3D11: ignoring texture fetch: disabled or an unknown format");
-    if (shader_type == XE_GPU_SHADER_TYPE_VERTEX) {
-      context_->VSSetShaderResources(desc.input_index,
-                                     1, &invalid_texture_view_);
-      context_->VSSetSamplers(desc.input_index,
-                              1, &invalid_texture_sampler_state_);
+  for (auto i = 0; i < command.vertex_shader_sampler_count; ++i) {
+    const auto& input = command.vertex_shader_samplers[i];
+    if (input.texture) {
+      auto texture = input.texture->handle_as<ID3D11ShaderResourceView>();
+      context_->VSSetShaderResources(input.input_index, 1, &texture);
     } else {
-      context_->PSSetShaderResources(desc.input_index,
-                                     1, &invalid_texture_view_);
-      context_->PSSetSamplers(desc.input_index,
-                              1, &invalid_texture_sampler_state_);
+      context_->VSSetShaderResources(input.input_index, 1, &invalid_texture_view_);
     }
+    if (input.sampler_state) {
+      auto sampler_state = input.sampler_state->handle_as<ID3D11SamplerState>();
+      context_->VSSetSamplers(input.input_index, 1, &sampler_state);
+    } else {
+      context_->VSSetSamplers(input.input_index, 1, &invalid_texture_sampler_state_);
+    }
+  }
+
+  for (auto i = 0; i < command.pixel_shader_sampler_count; ++i) {
+    const auto& input = command.pixel_shader_samplers[i];
+    if (input.texture) {
+      auto texture = input.texture->handle_as<ID3D11ShaderResourceView>();
+      context_->PSSetShaderResources(input.input_index, 1, &texture);
+    } else {
+      context_->PSSetShaderResources(input.input_index, 1, &invalid_texture_view_);
+    }
+    if (input.sampler_state) {
+      auto sampler_state = input.sampler_state->handle_as<ID3D11SamplerState>();
+      context_->PSSetSamplers(input.input_index, 1, &sampler_state);
+    } else {
+      context_->PSSetSamplers(input.input_index, 1, &invalid_texture_sampler_state_);
+    }
+  }
+
+  return 0;
+}
+
+int D3D11GraphicsDriver::RebuildRenderTargets(uint32_t width,
+                                              uint32_t height) {
+  if (width == render_targets_.width &&
+      height == render_targets_.height) {
+    // Cached copies are good.
     return 0;
   }
 
-  // Get and set the real shader resource views/samplers.
-  if (shader_type == XE_GPU_SHADER_TYPE_VERTEX) {
-    context_->VSSetShaderResources(desc.input_index, 1, &fetcher.view->srv);
-  } else {
-    context_->PSSetShaderResources(desc.input_index, 1, &fetcher.view->srv);
-  }
-  ID3D11SamplerState* sampler_state = texture_cache_->GetSamplerState(
-      fetcher.fetch, desc);
-  if (!sampler_state) {
-    XELOGW("D3D11: failed to set sampler state; ignoring texture");
-    return 1;
-  }
-  if (shader_type == XE_GPU_SHADER_TYPE_VERTEX) {
-    context_->VSSetSamplers(desc.input_index, 1, &sampler_state);
-  } else {
-    context_->PSSetSamplers(desc.input_index, 1, &sampler_state);
-  }
-
-  return 0;
-}
-
-int D3D11GraphicsDriver::PrepareIndexBuffer(
-    bool index_32bit, uint32_t index_count,
-    uint32_t index_base, uint32_t index_size, uint32_t endianness) {
   SCOPE_profile_cpu_f("gpu");
 
-  RegisterFile& rf = register_file_;
+  // Remove old versions.
+  for (int n = 0; n < XECOUNT(render_targets_.color_buffers); n++) {
+    auto& cb = render_targets_.color_buffers[n];
+    XESAFERELEASE(cb.buffer);
+    XESAFERELEASE(cb.color_view_8888);
+  }
+  XESAFERELEASE(render_targets_.depth_buffer);
+  XESAFERELEASE(render_targets_.depth_view_d28s8);
+  XESAFERELEASE(render_targets_.depth_view_d28fs8);
 
-  uint32_t address = index_base + address_translation_;
+  render_targets_.width   = width;
+  render_targets_.height  = height;
 
-  IndexBufferInfo info;
-  info.endianness = endianness;
-  info.index_32bit = index_32bit;
-  info.index_count = index_count;
-  info.index_size = index_size;
-  auto ib = static_cast<D3D11IndexBuffer*>(buffer_cache_->FetchIndexBuffer(
-      info, memory_->Translate(address), index_size));
-  if (!ib) {
-    return 1;
+  if (!width || !height) {
+    // This should only happen when cleaning up.
+    return 0;
   }
 
-  DXGI_FORMAT format;
-  format = index_32bit ? DXGI_FORMAT_R32_UINT : DXGI_FORMAT_R16_UINT;
-  context_->IASetIndexBuffer(ib->handle(), format, 0);
+  for (int n = 0; n < XECOUNT(render_targets_.color_buffers); n++) {
+    auto& cb = render_targets_.color_buffers[n];
+    D3D11_TEXTURE2D_DESC color_buffer_desc;
+    xe_zero_struct(&color_buffer_desc, sizeof(color_buffer_desc));
+    color_buffer_desc.Width           = width;
+    color_buffer_desc.Height          = height;
+    color_buffer_desc.MipLevels       = 1;
+    color_buffer_desc.ArraySize       = 1;
+    color_buffer_desc.Format          = DXGI_FORMAT_R8G8B8A8_UNORM;
+    color_buffer_desc.SampleDesc.Count    = 1;
+    color_buffer_desc.SampleDesc.Quality  = 0;
+    color_buffer_desc.Usage           = D3D11_USAGE_DEFAULT;
+    color_buffer_desc.BindFlags       =
+        D3D11_BIND_SHADER_RESOURCE |
+        D3D11_BIND_RENDER_TARGET;
+    color_buffer_desc.CPUAccessFlags  = 0;
+    color_buffer_desc.MiscFlags       = 0;
+    device_->CreateTexture2D(
+        &color_buffer_desc, NULL, &cb.buffer);
+
+    D3D11_RENDER_TARGET_VIEW_DESC render_target_view_desc;
+    xe_zero_struct(&render_target_view_desc, sizeof(render_target_view_desc));
+    render_target_view_desc.Format        = DXGI_FORMAT_R8G8B8A8_UNORM;
+    render_target_view_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
+    // render_target_view_desc.Buffer ?
+    device_->CreateRenderTargetView(
+        cb.buffer,
+        &render_target_view_desc,
+        &cb.color_view_8888);
+  }
+
+  D3D11_TEXTURE2D_DESC depth_stencil_desc;
+  xe_zero_struct(&depth_stencil_desc, sizeof(depth_stencil_desc));
+  depth_stencil_desc.Width          = width;
+  depth_stencil_desc.Height         = height;
+  depth_stencil_desc.MipLevels      = 1;
+  depth_stencil_desc.ArraySize      = 1;
+  depth_stencil_desc.Format         = DXGI_FORMAT_D24_UNORM_S8_UINT;
+  depth_stencil_desc.SampleDesc.Count   = 1;
+  depth_stencil_desc.SampleDesc.Quality = 0;
+  depth_stencil_desc.Usage          = D3D11_USAGE_DEFAULT;
+  depth_stencil_desc.BindFlags      = D3D11_BIND_DEPTH_STENCIL;
+  depth_stencil_desc.CPUAccessFlags = 0;
+  depth_stencil_desc.MiscFlags      = 0;
+  device_->CreateTexture2D(
+      &depth_stencil_desc, NULL, &render_targets_.depth_buffer);
+
+  D3D11_DEPTH_STENCIL_VIEW_DESC depth_stencil_view_desc;
+  xe_zero_struct(&depth_stencil_view_desc, sizeof(depth_stencil_view_desc));
+  depth_stencil_view_desc.Format        = DXGI_FORMAT_D24_UNORM_S8_UINT;
+  depth_stencil_view_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D;
+  depth_stencil_view_desc.Flags         = 0;
+  device_->CreateDepthStencilView(
+      render_targets_.depth_buffer,
+      &depth_stencil_view_desc,
+      &render_targets_.depth_view_d28s8);
 
   return 0;
 }
diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.h b/src/xenia/gpu/d3d11/d3d11_graphics_driver.h
index 2f2316488..4faa493ee 100644
--- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.h
+++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.h
@@ -13,8 +13,8 @@
 #include <xenia/core.h>
 
 #include <xenia/gpu/graphics_driver.h>
-#include <xenia/gpu/shader.h>
 #include <xenia/gpu/d3d11/d3d11_gpu-private.h>
+#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
 #include <xenia/gpu/xenos/xenos.h>
 
 #include <d3d11.h>
@@ -24,13 +24,6 @@ namespace xe {
 namespace gpu {
 namespace d3d11 {
 
-class D3D11BufferCache;
-class D3D11PixelShader;
-class D3D11ShaderCache;
-class D3D11TextureCache;
-struct D3D11TextureView;
-class D3D11VertexShader;
-
 
 class D3D11GraphicsDriver : public GraphicsDriver {
 public:
@@ -38,48 +31,32 @@ public:
       Memory* memory, IDXGISwapChain* swap_chain, ID3D11Device* device);
   virtual ~D3D11GraphicsDriver();
 
-  virtual void Initialize();
+  ResourceCache* resource_cache() const override { return resource_cache_; }
 
-  virtual void InvalidateState(
-      uint32_t mask);
-  virtual void SetShader(
-      xenos::XE_GPU_SHADER_TYPE type,
-      uint32_t address,
-      uint32_t start,
-      uint32_t length);
-  virtual void DrawIndexBuffer(
-      xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
-      bool index_32bit, uint32_t index_count,
-      uint32_t index_base, uint32_t index_size, uint32_t endianness);
-  virtual void DrawIndexAuto(
-      xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
-      uint32_t index_count);
+  int Initialize() override;
+
+  int Draw(const DrawCommand& command) override;
 
   // TODO(benvanik): figure this out.
-  virtual int Resolve();
+  int Resolve() override;
 
 private:
-  int SetupDraw(xenos::XE_GPU_PRIMITIVE_TYPE prim_type);
+  void InitializeInvalidTexture();
+
+  int UpdateState(const DrawCommand& command);
+  int SetupConstantBuffers(const DrawCommand& command);
+  int SetupShaders(const DrawCommand& command);
+  int SetupInputAssembly(const DrawCommand& command);
+  int SetupSamplers(const DrawCommand& command);
+
   int RebuildRenderTargets(uint32_t width, uint32_t height);
-  int UpdateState(uint32_t state_overrides = 0);
-  int UpdateConstantBuffers();
-  int BindShaders();
-  int PrepareFetchers();
-  int PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc);
-  int PrepareTextureFetchers();
-  int PrepareTextureSampler(xenos::XE_GPU_SHADER_TYPE shader_type,
-                            Shader::tex_buffer_desc_t& desc);
-  int PrepareIndexBuffer(
-      bool index_32bit, uint32_t index_count,
-      uint32_t index_base, uint32_t index_size, uint32_t endianness);
 
 private:
   IDXGISwapChain*       swap_chain_;
   ID3D11Device*         device_;
   ID3D11DeviceContext*  context_;
-  D3D11BufferCache*     buffer_cache_;
-  D3D11ShaderCache*     shader_cache_;
-  D3D11TextureCache*    texture_cache_;
+
+  D3D11ResourceCache*   resource_cache_;
 
   ID3D11ShaderResourceView* invalid_texture_view_;
   ID3D11SamplerState*       invalid_texture_sampler_state_;
@@ -97,9 +74,6 @@ private:
   } render_targets_;
 
   struct {
-    D3D11VertexShader*  vertex_shader;
-    D3D11PixelShader*   pixel_shader;
-
     struct {
       ID3D11Buffer*     float_constants;
       ID3D11Buffer*     bool_constants;
@@ -107,17 +81,7 @@ private:
       ID3D11Buffer*     vs_consts;
       ID3D11Buffer*     gs_consts;
     } constant_buffers;
-
-    struct {
-      bool        enabled;
-      xenos::xe_gpu_texture_fetch_t fetch;
-      D3D11TextureView* view;
-    } texture_fetchers[32];
   } state_;
-
-  enum StateOverrides {
-    STATE_OVERRIDE_DISABLE_CULLING  = (1 << 0),
-  };
 };
 
 
diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_system.cc b/src/xenia/gpu/d3d11/d3d11_graphics_system.cc
index 553ed8828..7258195d3 100644
--- a/src/xenia/gpu/d3d11/d3d11_graphics_system.cc
+++ b/src/xenia/gpu/d3d11/d3d11_graphics_system.cc
@@ -146,12 +146,18 @@ void D3D11GraphicsSystem::Initialize() {
   XEASSERTNULL(driver_);
   driver_ = new D3D11GraphicsDriver(
       memory_, window_->swap_chain(), device_);
+  if (driver_->Initialize()) {
+    XELOGE("Unable to initialize D3D11 driver");
+    return;
+  }
 
   // Initial vsync kick.
   DispatchInterruptCallback(0);
 }
 
 void D3D11GraphicsSystem::Pump() {
+  SCOPE_profile_cpu_f("gpu");
+
   if (swap_pending_) {
     swap_pending_ = false;
 
diff --git a/src/xenia/gpu/d3d11/d3d11_resource_cache.cc b/src/xenia/gpu/d3d11/d3d11_resource_cache.cc
new file mode 100644
index 000000000..145e3d395
--- /dev/null
+++ b/src/xenia/gpu/d3d11/d3d11_resource_cache.cc
@@ -0,0 +1,71 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
+
+#include <xenia/gpu/gpu-private.h>
+#include <xenia/gpu/d3d11/d3d11_buffer_resource.h>
+#include <xenia/gpu/d3d11/d3d11_sampler_state_resource.h>
+#include <xenia/gpu/d3d11/d3d11_shader_resource.h>
+#include <xenia/gpu/d3d11/d3d11_texture_resource.h>
+
+
+using namespace xe;
+using namespace xe::gpu;
+using namespace xe::gpu::d3d11;
+
+
+D3D11ResourceCache::D3D11ResourceCache(Memory* memory,
+                                       ID3D11Device* device,
+                                       ID3D11DeviceContext* context)
+    : ResourceCache(memory),
+      device_(device), context_(context) {
+  device_->AddRef();
+  context_->AddRef();
+}
+
+D3D11ResourceCache::~D3D11ResourceCache() {
+  XESAFERELEASE(device_);
+  XESAFERELEASE(context_);
+}
+
+VertexShaderResource* D3D11ResourceCache::CreateVertexShader(
+    const MemoryRange& memory_range,
+    const VertexShaderResource::Info& info) {
+  return new D3D11VertexShaderResource(this, memory_range, info);
+}
+
+PixelShaderResource* D3D11ResourceCache::CreatePixelShader(
+    const MemoryRange& memory_range,
+    const PixelShaderResource::Info& info) {
+  return new D3D11PixelShaderResource(this, memory_range, info);
+}
+
+TextureResource* D3D11ResourceCache::CreateTexture(
+    const MemoryRange& memory_range,
+    const TextureResource::Info& info) {
+  return new D3D11TextureResource(this, memory_range, info);
+}
+
+SamplerStateResource* D3D11ResourceCache::CreateSamplerState(
+    const SamplerStateResource::Info& info) {
+  return new D3D11SamplerStateResource(this, info);
+}
+
+IndexBufferResource* D3D11ResourceCache::CreateIndexBuffer(
+    const MemoryRange& memory_range,
+    const IndexBufferResource::Info& info) {
+  return new D3D11IndexBufferResource(this, memory_range, info);
+}
+
+VertexBufferResource* D3D11ResourceCache::CreateVertexBuffer(
+    const MemoryRange& memory_range,
+    const VertexBufferResource::Info& info) {
+  return new D3D11VertexBufferResource(this, memory_range, info);
+}
diff --git a/src/xenia/gpu/d3d11/d3d11_resource_cache.h b/src/xenia/gpu/d3d11/d3d11_resource_cache.h
new file mode 100644
index 000000000..27248eb9c
--- /dev/null
+++ b/src/xenia/gpu/d3d11/d3d11_resource_cache.h
@@ -0,0 +1,64 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_D3D11_D3D11_RESOURCE_CACHE_H_
+#define XENIA_GPU_D3D11_D3D11_RESOURCE_CACHE_H_
+
+#include <xenia/core.h>
+
+#include <xenia/gpu/resource_cache.h>
+
+#include <d3d11.h>
+
+
+namespace xe {
+namespace gpu {
+namespace d3d11 {
+
+
+class D3D11ResourceCache : public ResourceCache {
+public:
+  D3D11ResourceCache(Memory* memory,
+                     ID3D11Device* device, ID3D11DeviceContext* context);
+  virtual ~D3D11ResourceCache();
+  
+  ID3D11Device* device() const { return device_; }
+  ID3D11DeviceContext* context() const { return context_; }
+
+protected:
+  VertexShaderResource* CreateVertexShader(
+      const MemoryRange& memory_range,
+      const VertexShaderResource::Info& info) override;
+  PixelShaderResource* CreatePixelShader(
+      const MemoryRange& memory_range,
+      const PixelShaderResource::Info& info) override;
+  TextureResource* CreateTexture(
+      const MemoryRange& memory_range,
+      const TextureResource::Info& info) override;
+  SamplerStateResource* CreateSamplerState(
+      const SamplerStateResource::Info& info) override;
+  IndexBufferResource* CreateIndexBuffer(
+      const MemoryRange& memory_range,
+      const IndexBufferResource::Info& info) override;
+  VertexBufferResource* CreateVertexBuffer(
+      const MemoryRange& memory_range,
+      const VertexBufferResource::Info& info) override;
+
+private:
+  ID3D11Device* device_;
+  ID3D11DeviceContext* context_;
+};
+
+
+}  // namespace d3d11
+}  // namespace gpu
+}  // namespace xe
+
+
+#endif  // XENIA_GPU_D3D11_D3D11_RESOURCE_CACHE_H_
diff --git a/src/xenia/gpu/d3d11/d3d11_texture_cache.cc b/src/xenia/gpu/d3d11/d3d11_sampler_state_resource.cc
similarity index 51%
rename from src/xenia/gpu/d3d11/d3d11_texture_cache.cc
rename to src/xenia/gpu/d3d11/d3d11_sampler_state_resource.cc
index eb3442bfc..7fb09858a 100644
--- a/src/xenia/gpu/d3d11/d3d11_texture_cache.cc
+++ b/src/xenia/gpu/d3d11/d3d11_sampler_state_resource.cc
@@ -7,53 +7,36 @@
  ******************************************************************************
  */
 
-#include <xenia/gpu/d3d11/d3d11_texture_cache.h>
+#include <xenia/gpu/d3d11/d3d11_sampler_state_resource.h>
 
-#include <xenia/gpu/gpu-private.h>
+#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
 
 
+using namespace std;
 using namespace xe;
 using namespace xe::gpu;
 using namespace xe::gpu::d3d11;
+using namespace xe::gpu::xenos;
 
 
-D3D11TextureCache::D3D11TextureCache(
-    Memory* memory,
-    ID3D11DeviceContext* context, ID3D11Device* device)
-    : TextureCache(memory),
-      context_(context), device_(device) {
-  context_->AddRef();
-  device_->AddRef();
+D3D11SamplerStateResource::D3D11SamplerStateResource(
+    D3D11ResourceCache* resource_cache, const Info& info)
+    : SamplerStateResource(info),
+      resource_cache_(resource_cache),
+      handle_(nullptr) {
 }
 
-D3D11TextureCache::~D3D11TextureCache() {
-  for (auto it = samplers_.begin(); it != samplers_.end(); ++it) {
-    auto& cached_state = it->second;
-    XESAFERELEASE(cached_state.state);
+D3D11SamplerStateResource::~D3D11SamplerStateResource() {
+  XESAFERELEASE(handle_);
+}
+
+int D3D11SamplerStateResource::Prepare() {
+  if (handle_) {
+    return 0;
   }
-  samplers_.clear();
 
-  XESAFERELEASE(device_);
-  XESAFERELEASE(context_);
-}
-
-Texture* D3D11TextureCache::CreateTexture(
-    uint32_t address, const uint8_t* host_address,
-    const xenos::xe_gpu_texture_fetch_t& fetch) {
-  return new D3D11Texture(this, address, host_address);
-}
-
-ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
-    const xenos::xe_gpu_texture_fetch_t& fetch,
-    const Shader::tex_buffer_desc_t& desc) {
   D3D11_SAMPLER_DESC sampler_desc;
   xe_zero_struct(&sampler_desc, sizeof(sampler_desc));
-  uint32_t min_filter = desc.tex_fetch.min_filter == 3 ?
-      fetch.min_filter : desc.tex_fetch.min_filter;
-  uint32_t mag_filter = desc.tex_fetch.mag_filter == 3 ?
-      fetch.mag_filter : desc.tex_fetch.mag_filter;
-  uint32_t mip_filter = desc.tex_fetch.mip_filter == 3 ?
-      fetch.mip_filter : desc.tex_fetch.mip_filter;
   // MIN, MAG, MIP
   static const D3D11_FILTER filter_matrix[2][2][3] = {
     {
@@ -87,7 +70,8 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
       },
     },
   };
-  sampler_desc.Filter = filter_matrix[min_filter][mag_filter][mip_filter];
+  sampler_desc.Filter =
+      filter_matrix[info_.min_filter][info_.mag_filter][info_.mip_filter];
   static const D3D11_TEXTURE_ADDRESS_MODE mode_map[] = {
     D3D11_TEXTURE_ADDRESS_WRAP,
     D3D11_TEXTURE_ADDRESS_MIRROR,
@@ -98,9 +82,9 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
     D3D11_TEXTURE_ADDRESS_BORDER,       // ?
     D3D11_TEXTURE_ADDRESS_MIRROR,       // ?
   };
-  sampler_desc.AddressU = mode_map[fetch.clamp_x];
-  sampler_desc.AddressV = mode_map[fetch.clamp_y];
-  sampler_desc.AddressW = mode_map[fetch.clamp_z];
+  sampler_desc.AddressU = mode_map[info_.clamp_u];
+  sampler_desc.AddressV = mode_map[info_.clamp_v];
+  sampler_desc.AddressW = mode_map[info_.clamp_w];
   sampler_desc.MipLODBias;
   sampler_desc.MaxAnisotropy = 1;
   sampler_desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS;
@@ -111,29 +95,12 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
   sampler_desc.MinLOD;
   sampler_desc.MaxLOD;
 
-  // TODO(benvanik): do this earlier without having to setup the whole struct?
-  size_t hash = hash_combine(
-      sampler_desc.Filter,
-      sampler_desc.AddressU,
-      sampler_desc.AddressV,
-      sampler_desc.AddressW);
-  auto range = samplers_.equal_range(hash);
-  for (auto it = range.first; it != range.second; ++it) {
-    const auto& cached_state = it->second;
-    // TODO(benvanik): faster compare?
-    if (memcmp(&sampler_desc, &cached_state.desc, sizeof(sampler_desc)) == 0) {
-      return cached_state.state;
-    }
-  }
-
-  ID3D11SamplerState* sampler_state = NULL;
-  HRESULT hr = device_->CreateSamplerState(&sampler_desc, &sampler_state);
+  HRESULT hr = resource_cache_->device()->CreateSamplerState(
+      &sampler_desc, &handle_);
   if (FAILED(hr)) {
     XELOGE("D3D11: unable to create sampler state");
-    return nullptr;
+    return 1;
   }
 
-  samplers_.insert({ hash, { sampler_desc, sampler_state } });
-
-  return sampler_state;
+  return 0;
 }
diff --git a/src/xenia/gpu/d3d11/d3d11_sampler_state_resource.h b/src/xenia/gpu/d3d11/d3d11_sampler_state_resource.h
new file mode 100644
index 000000000..6097339b4
--- /dev/null
+++ b/src/xenia/gpu/d3d11/d3d11_sampler_state_resource.h
@@ -0,0 +1,48 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_D3D11_D3D11_SAMPLER_STATE_RESOURCE_H_
+#define XENIA_GPU_D3D11_D3D11_SAMPLER_STATE_RESOURCE_H_
+
+#include <xenia/gpu/sampler_state_resource.h>
+#include <xenia/gpu/xenos/ucode.h>
+#include <xenia/gpu/xenos/xenos.h>
+
+#include <d3d11.h>
+
+
+namespace xe {
+namespace gpu {
+namespace d3d11 {
+
+class D3D11ResourceCache;
+
+
+class D3D11SamplerStateResource : public SamplerStateResource {
+public:
+  D3D11SamplerStateResource(D3D11ResourceCache* resource_cache,
+                            const Info& info);
+  ~D3D11SamplerStateResource() override;
+
+  void* handle() const override { return handle_; }
+
+  int Prepare() override;
+
+protected:
+  D3D11ResourceCache* resource_cache_;
+  ID3D11SamplerState* handle_;
+};
+
+
+}  // namespace d3d11
+}  // namespace gpu
+}  // namespace xe
+
+
+#endif  // XENIA_GPU_D3D11_D3D11_SAMPLER_STATE_RESOURCE_H_
diff --git a/src/xenia/gpu/d3d11/d3d11_shader.cc b/src/xenia/gpu/d3d11/d3d11_shader.cc
deleted file mode 100644
index 97e0cb295..000000000
--- a/src/xenia/gpu/d3d11/d3d11_shader.cc
+++ /dev/null
@@ -1,2059 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/d3d11/d3d11_shader.h>
-
-#include <xenia/gpu/gpu-private.h>
-#include <xenia/gpu/d3d11/d3d11_geometry_shader.h>
-#include <xenia/gpu/xenos/ucode.h>
-
-#include <d3dcompiler.h>
-
-
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::d3d11;
-using namespace xe::gpu::xenos;
-
-
-namespace {
-
-const int OUTPUT_CAPACITY = 64 * 1024;
-
-int GetFormatComponentCount(uint32_t format) {
-  switch (format) {
-  case FMT_32:
-  case FMT_32_FLOAT:
-    return 1;
-  case FMT_16_16:
-  case FMT_16_16_FLOAT:
-  case FMT_32_32:
-  case FMT_32_32_FLOAT:
-    return 2;
-  case FMT_10_11_11:
-  case FMT_11_11_10:
-  case FMT_32_32_32_FLOAT:
-    return 3;
-  case FMT_8_8_8_8:
-  case FMT_2_10_10_10:
-  case FMT_16_16_16_16:
-  case FMT_16_16_16_16_FLOAT:
-  case FMT_32_32_32_32:
-  case FMT_32_32_32_32_FLOAT:
-    return 4;
-  default:
-    XELOGE("Unknown vertex format: %d", format);
-    XEASSERTALWAYS();
-    return 4;
-  }
-}
-
-const char* GetFormatTypeName(
-    uint32_t format, uint32_t format_comp_all, uint32_t num_format_all) {
-  switch (format) {
-  case FMT_32:
-    return format_comp_all ? "int" : "uint";
-  case FMT_32_FLOAT:
-    return "float";
-  case FMT_16_16:
-  case FMT_32_32:
-    if (!num_format_all) {
-      return format_comp_all ? "snorm float2" : "unorm float2";
-    } else {
-      return format_comp_all ? "int2" : "uint2";
-    }
-  case FMT_16_16_FLOAT:
-  case FMT_32_32_FLOAT:
-    return "float2";
-  case FMT_10_11_11:
-  case FMT_11_11_10:
-    return "int3"; // ?
-  case FMT_32_32_32_FLOAT:
-    return "float3";
-  case FMT_8_8_8_8:
-  case FMT_2_10_10_10:
-  case FMT_16_16_16_16:
-  case FMT_32_32_32_32:
-    if (!num_format_all) {
-      return format_comp_all ? "snorm float4" : "unorm float4";
-    } else {
-      return format_comp_all ? "int4" : "uint4";
-    }
-  case FMT_16_16_16_16_FLOAT:
-  case FMT_32_32_32_32_FLOAT:
-    return "float4";
-  default:
-    XELOGE("Unknown vertex format: %d", format);
-    XEASSERTALWAYS();
-    return "float4";
-  }
-}
-
-}  // anonymous namespace
-
-
-struct xe::gpu::d3d11::Output {
-  char buffer[OUTPUT_CAPACITY];
-  size_t capacity;
-  size_t offset;
-  Output() :
-      capacity(OUTPUT_CAPACITY),
-      offset(0) {
-    buffer[0] = 0;
-  }
-  void append(const char* format, ...) {
-    va_list args;
-    va_start(args, format);
-    int len = xevsnprintfa(
-        buffer + offset, capacity - offset, format, args);
-    va_end(args);
-    offset += len;
-    buffer[offset] = 0;
-  }
-};
-
-
-D3D11Shader::D3D11Shader(
-    ID3D11Device* device,
-    XE_GPU_SHADER_TYPE type,
-    const uint8_t* src_ptr, size_t length,
-    uint64_t hash) :
-    translated_src_(NULL),
-    Shader(type, src_ptr, length, hash) {
-  device_ = device;
-  device_->AddRef();
-}
-
-D3D11Shader::~D3D11Shader() {
-  if (translated_src_) {
-    xe_free(translated_src_);
-  }
-  XESAFERELEASE(device_);
-}
-
-void D3D11Shader::set_translated_src(char* value) {
-  if (translated_src_) {
-    xe_free(translated_src_);
-  }
-  translated_src_ = xestrdupa(value);
-}
-
-ID3D10Blob* D3D11Shader::Compile(const char* shader_source) {
-  SCOPE_profile_cpu_f("gpu");
-
-  // TODO(benvanik): pick shared runtime mode defines.
-  D3D10_SHADER_MACRO defines[] = {
-    "TEST_DEFINE", "1",
-    0, 0,
-  };
-
-  uint32_t flags1 = 0;
-  flags1 |= D3D10_SHADER_DEBUG;
-  flags1 |= D3D10_SHADER_ENABLE_STRICTNESS;
-  uint32_t flags2 = 0;
-
-  // Create a name.
-  const char* base_path = "";
-  if (FLAGS_dump_shaders.size()) {
-    base_path = FLAGS_dump_shaders.c_str();
-  }
-  char file_name[XE_MAX_PATH];
-  xesnprintfa(file_name, XECOUNT(file_name),
-      "%s/gen_%.16llX.%s",
-      base_path,
-      hash_,
-      type_ == XE_GPU_SHADER_TYPE_VERTEX ? "vs" : "ps");
-
-  if (FLAGS_dump_shaders.size()) {
-    FILE* f = fopen(file_name, "w");
-    fprintf(f, shader_source);
-    fprintf(f, "\n\n");
-    fprintf(f, "/*\n");
-    fprintf(f, disasm_src_);
-    fprintf(f, " */\n");
-    fclose(f);
-  }
-
-  // Compile shader to bytecode blob.
-  ID3D10Blob* shader_blob = 0;
-  ID3D10Blob* error_blob = 0;
-  HRESULT hr = D3DCompile(
-      shader_source, strlen(shader_source),
-      file_name,
-      defines, NULL,
-      "main",
-      type_ == XE_GPU_SHADER_TYPE_VERTEX ?
-          "vs_5_0" : "ps_5_0",
-      flags1, flags2,
-      &shader_blob, &error_blob);
-  if (error_blob) {
-    char* msg = (char*)error_blob->GetBufferPointer();
-    XELOGE("D3D11: shader compile failed with %s", msg);
-  }
-  XESAFERELEASE(error_blob);
-  if (FAILED(hr)) {
-    return NULL;
-  }
-  return shader_blob;
-}
-
-void D3D11Shader::AppendTextureHeader(Output* output) {
-  bool fetch_setup[32] = { false };
-
-  // 1 texture per constant slot, 1 sampler per fetch.
-  for (uint32_t  n = 0; n < tex_buffer_inputs_.count; n++) {
-    auto& input = tex_buffer_inputs_.descs[n];
-    auto& fetch = input.tex_fetch;
-
-    // Add texture, if needed.
-    if (!fetch_setup[fetch.const_idx]) {
-      fetch_setup[fetch.const_idx] = true;
-      const char* texture_type = NULL;
-      switch (fetch.dimension) {
-      case DIMENSION_1D:
-        texture_type = "Texture1D";
-        break;
-      default:
-      case DIMENSION_2D:
-        texture_type = "Texture2D";
-        break;
-      case DIMENSION_3D:
-        texture_type = "Texture3D";
-        break;
-      case DIMENSION_CUBE:
-        texture_type = "TextureCube";
-        break;
-      }
-      output->append("%s x_texture_%d;\n", texture_type, fetch.const_idx);
-    }
-
-    // Add sampler.
-    output->append("SamplerState x_sampler_%d;\n", n);
-  }
-}
-
-
-D3D11VertexShader::D3D11VertexShader(
-    ID3D11Device* device,
-    const uint8_t* src_ptr, size_t length,
-    uint64_t hash) :
-    handle_(0), input_layout_(0),
-    D3D11Shader(device, XE_GPU_SHADER_TYPE_VERTEX,
-                src_ptr, length, hash) {
-  xe_zero_struct(geometry_shaders_, sizeof(geometry_shaders_));
-}
-
-D3D11VertexShader::~D3D11VertexShader() {
-  for (size_t n = 0; n < XECOUNT(geometry_shaders_); n++) {
-    delete geometry_shaders_[n];
-  }
-  XESAFERELEASE(input_layout_);
-  XESAFERELEASE(handle_);
-}
-
-int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) {
-  SCOPE_profile_cpu_f("gpu");
-  if (handle_) {
-    return 0;
-  }
-
-  // TODO(benvanik): look in file based on hash/etc.
-  void* byte_code = NULL;
-  size_t byte_code_length = 0;
-
-  // Translate and compile source.
-  const char* shader_source = Translate(program_cntl);
-  if (!shader_source) {
-    return 1;
-  }
-  ID3D10Blob* shader_blob = Compile(shader_source);
-  if (!shader_blob) {
-    return 1;
-  }
-  byte_code_length = shader_blob->GetBufferSize();
-  byte_code = xe_malloc(byte_code_length);
-  xe_copy_struct(
-      byte_code, shader_blob->GetBufferPointer(), byte_code_length);
-  XESAFERELEASE(shader_blob);
-
-  // Create shader.
-  HRESULT hr = device_->CreateVertexShader(
-      byte_code, byte_code_length,
-      NULL,
-      &handle_);
-  if (FAILED(hr)) {
-    XELOGE("D3D11: failed to create vertex shader");
-    xe_free(byte_code);
-    return 1;
-  }
-
-  // Create input layout.
-  size_t element_count = 0;
-  for (uint32_t n = 0; n < vtx_buffer_inputs_.count; n++) {
-    element_count += vtx_buffer_inputs_.descs[n].element_count;
-  }
-  if (!element_count) {
-    XELOGW("D3D11: vertex shader with zero inputs -- retaining previous values?");
-    input_layout_ = NULL;
-    return 0;
-  }
-
-  D3D11_INPUT_ELEMENT_DESC* element_descs =
-      (D3D11_INPUT_ELEMENT_DESC*)xe_alloca(
-          sizeof(D3D11_INPUT_ELEMENT_DESC) * element_count);
-  uint32_t el_index = 0;
-  for (uint32_t n = 0; n < vtx_buffer_inputs_.count; n++) {
-    auto& input = vtx_buffer_inputs_.descs[n];
-    for (uint32_t m = 0; m < input.element_count; m++) {
-      auto& el = input.elements[m];
-      uint32_t vb_slot = input.input_index;
-      uint32_t num_format_all = el.vtx_fetch.num_format_all;
-      uint32_t format_comp_all = el.vtx_fetch.format_comp_all;
-      DXGI_FORMAT vtx_format;
-      switch (el.format) {
-      case FMT_8_8_8_8:
-        if (!num_format_all) {
-          vtx_format = format_comp_all ?
-              DXGI_FORMAT_R8G8B8A8_SNORM : DXGI_FORMAT_R8G8B8A8_UNORM;
-        } else {
-          vtx_format = format_comp_all ?
-              DXGI_FORMAT_R8G8B8A8_SINT : DXGI_FORMAT_R8G8B8A8_UINT;
-        }
-        break;
-      case FMT_2_10_10_10:
-        if (!num_format_all) {
-          vtx_format = DXGI_FORMAT_R10G10B10A2_UNORM;
-        } else {
-          vtx_format = DXGI_FORMAT_R10G10B10A2_UINT;
-        }
-        break;
-      // DXGI_FORMAT_R11G11B10_FLOAT?
-      case FMT_16_16:
-        if (!num_format_all) {
-          vtx_format = format_comp_all ?
-              DXGI_FORMAT_R16G16_SNORM : DXGI_FORMAT_R16G16_UNORM;
-        } else {
-          vtx_format = format_comp_all ?
-              DXGI_FORMAT_R16G16_SINT : DXGI_FORMAT_R16G16_UINT;
-        }
-        break;
-      case FMT_16_16_16_16:
-        if (!num_format_all) {
-          vtx_format = format_comp_all ?
-              DXGI_FORMAT_R16G16B16A16_SNORM : DXGI_FORMAT_R16G16B16A16_UNORM;
-        } else {
-          vtx_format = format_comp_all ?
-              DXGI_FORMAT_R16G16B16A16_SINT : DXGI_FORMAT_R16G16B16A16_UINT;
-        }
-        break;
-      case FMT_16_16_FLOAT:
-        vtx_format = DXGI_FORMAT_R16G16_FLOAT;
-        break;
-      case FMT_16_16_16_16_FLOAT:
-        vtx_format = DXGI_FORMAT_R16G16B16A16_FLOAT;
-        break;
-      case FMT_32:
-        vtx_format = format_comp_all ?
-            DXGI_FORMAT_R32_SINT : DXGI_FORMAT_R32_UINT;
-        break;
-      case FMT_32_32:
-        vtx_format = format_comp_all ?
-            DXGI_FORMAT_R32G32_SINT : DXGI_FORMAT_R32G32_UINT;
-        break;
-      case FMT_32_32_32_32:
-        vtx_format = format_comp_all ?
-            DXGI_FORMAT_R32G32B32A32_SINT : DXGI_FORMAT_R32G32B32A32_UINT;
-        break;
-      case FMT_32_FLOAT:
-        vtx_format = DXGI_FORMAT_R32_FLOAT;
-        break;
-      case FMT_32_32_FLOAT:
-        vtx_format = DXGI_FORMAT_R32G32_FLOAT;
-        break;
-      case FMT_32_32_32_FLOAT:
-        vtx_format = DXGI_FORMAT_R32G32B32_FLOAT;
-        break;
-      case FMT_32_32_32_32_FLOAT:
-        vtx_format = DXGI_FORMAT_R32G32B32A32_FLOAT;
-        break;
-      default:
-        XEASSERTALWAYS();
-        break;
-      }
-      element_descs[el_index].SemanticName         = "XE_VF";
-      element_descs[el_index].SemanticIndex        = el_index;
-      element_descs[el_index].Format               = vtx_format;
-      element_descs[el_index].InputSlot            = vb_slot;
-      element_descs[el_index].AlignedByteOffset    = el.offset_words * 4;
-      element_descs[el_index].InputSlotClass       = D3D11_INPUT_PER_VERTEX_DATA;
-      element_descs[el_index].InstanceDataStepRate = 0;
-      el_index++;
-    }
-  }
-  hr = device_->CreateInputLayout(
-      element_descs,
-      (UINT)element_count,
-      byte_code, byte_code_length,
-      &input_layout_);
-  if (FAILED(hr)) {
-    XELOGE("D3D11: failed to create vertex shader input layout");
-    xe_free(byte_code);
-    return 1;
-  }
-
-  xe_free(byte_code);
-
-  is_prepared_ = true;
-  return 0;
-}
-
-const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) {
-  SCOPE_profile_cpu_f("gpu");
-
-  Output* output = new Output();
-  xe_gpu_translate_ctx_t ctx;
-  ctx.output  = output;
-  ctx.type    = type_;
-  ctx.tex_fetch_index = 0;
-
-  // Add constants buffers.
-  // We could optimize this by only including used buffers, but the compiler
-  // seems to do a good job of doing this for us.
-  // It also does read detection, so c[512] can end up c[4] in the asm -
-  // instead of doing this optimization ourselves we could maybe just query
-  // this from the compiler.
-  output->append(
-    "cbuffer float_consts : register(b0) {\n"
-    "  float4 c[512];\n"
-    "};\n");
-  // TODO(benvanik): add bool/loop constants.
-
-  AppendTextureHeader(output);
-
-  // Transform utilities. We adjust the output position in various ways
-  // as we can't do this via D3D11 APIs.
-  output->append(
-    "cbuffer vs_consts : register(b3) {\n"
-    "  float4 window;\n"              // x,y,w,h
-    "  float4 viewport_z_enable;\n"   // min,(max - min),?,enabled
-    "  float4 viewport_size;\n"       // x,y,w,h
-    "};"
-    "float4 applyViewport(float4 pos) {\n"
-    "  if (viewport_z_enable.w) {\n"
-    //"    pos.x = (pos.x + 1) * viewport_size.z * 0.5 + viewport_size.x;\n"
-    //"    pos.y = (1 - pos.y) * viewport_size.w * 0.5 + viewport_size.y;\n"
-    //"    pos.z = viewport_z_enable.x + pos.z * viewport_z_enable.y;\n"
-    // w?
-    "  } else {\n"
-    "    pos.xy = pos.xy / float2(window.z / 2.0, -window.w / 2.0) + float2(-1.0, 1.0);\n"
-    "    pos.zw = float2(0.0, 1.0);\n"
-    "  }\n"
-    "  pos.xy += window.xy;\n"
-    "  return pos;\n"
-    "}\n");
-
-  // Add vertex shader input.
-  output->append(
-    "struct VS_INPUT {\n");
-  uint32_t el_index = 0;
-  for (uint32_t n = 0; n < vtx_buffer_inputs_.count; n++) {
-    auto& input = vtx_buffer_inputs_.descs[n];
-    for (uint32_t m = 0; m < input.element_count; m++) {
-      auto& el = input.elements[m];
-      auto& vtx = el.vtx_fetch;
-      const char* type_name = GetFormatTypeName(
-          el.format, el.vtx_fetch.format_comp_all, el.vtx_fetch.num_format_all);
-      uint32_t fetch_slot = vtx.const_index * 3 + vtx.const_index_sel;
-      output->append(
-        "  %s vf%u_%d : XE_VF%u;\n",
-        type_name, fetch_slot, vtx.offset, el_index);
-      el_index++;
-    }
-  }
-  output->append(
-    "};\n");
-
-  // Add vertex shader output (pixel shader input).
-  output->append(
-    "struct VS_OUTPUT {\n");
-  if (alloc_counts_.positions) {
-    XEASSERT(alloc_counts_.positions == 1);
-    output->append(
-      "  float4 oPos : SV_POSITION;\n");
-  }
-  if (alloc_counts_.params) {
-    output->append(
-      "  float4 o[%d] : XE_O;\n",
-      MAX_INTERPOLATORS);
-  }
-  if (alloc_counts_.point_size) {
-    output->append(
-      "  float4 oPointSize : PSIZE;\n");
-  }
-  output->append(
-    "};\n");
-
-  // Vertex shader main() header.
-  output->append(
-    "VS_OUTPUT main(VS_INPUT i) {\n"
-    "  VS_OUTPUT o;\n");
-
-  // Always write position, as some shaders seem to only write certain values.
-  output->append(
-    "  o.oPos = float4(0.0, 0.0, 0.0, 0.0);\n");
-  if (alloc_counts_.point_size) {
-    output->append(
-      "  o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n");
-  }
-
-  // TODO(benvanik): remove this, if possible (though the compiler may be smart
-  //     enough to do it for us).
-  if (alloc_counts_.params) {
-    for (uint32_t n = 0; n < MAX_INTERPOLATORS; n++) {
-      output->append(
-        "  o.o[%d] = float4(0.0, 0.0, 0.0, 0.0);\n", n);
-    }
-  }
-
-  // Add temporaries for any registers we may use.
-  uint32_t temp_regs = program_cntl->vs_regs + program_cntl->ps_regs;
-  for (uint32_t n = 0; n <= temp_regs; n++) {
-    output->append(
-      "  float4 r%d = c[%d];\n", n, n);
-  }
-  output->append("  float4 t;\n");
-
-  // Execute blocks.
-  for (std::vector<instr_cf_exec_t>::iterator it = execs_.begin();
-       it != execs_.end(); ++it) {
-    instr_cf_exec_t& cf = *it;
-    // TODO(benvanik): figure out how sequences/jmps/loops/etc work.
-    if (TranslateExec(ctx, cf)) {
-      delete output;
-      return NULL;
-    }
-  }
-
-  // main footer.
-  output->append(
-    "  o.oPos = applyViewport(o.oPos);\n"
-    "  return o;\n"
-    "};\n");
-
-  set_translated_src(output->buffer);
-  delete output;
-  return translated_src_;
-}
-
-int D3D11VertexShader::DemandGeometryShader(GeometryShaderType type,
-                                            D3D11GeometryShader** out_shader) {
-  if (geometry_shaders_[type]) {
-    *out_shader = geometry_shaders_[type];
-    return 0;
-  }
-
-  // Demand generate.
-  D3D11GeometryShader* shader = NULL;
-  switch (type) {
-  case POINT_SPRITE_SHADER:
-    shader = new D3D11PointSpriteGeometryShader(device_, hash_);
-    break;
-  case RECT_LIST_SHADER:
-    shader = new D3D11RectListGeometryShader(device_, hash_);
-    break;
-  case QUAD_LIST_SHADER:
-    shader = new D3D11QuadListGeometryShader(device_, hash_);
-    break;
-  default:
-    XEASSERTALWAYS();
-    return 1;
-  }
-  if (!shader) {
-    return 1;
-  }
-
-  if (shader->Prepare(this)) {
-    delete shader;
-    return 1;
-  }
-
-  geometry_shaders_[type] = shader;
-  *out_shader = geometry_shaders_[type];
-  return 0;
-}
-
-
-D3D11PixelShader::D3D11PixelShader(
-    ID3D11Device* device,
-    const uint8_t* src_ptr, size_t length,
-    uint64_t hash) :
-    handle_(0),
-    D3D11Shader(device, XE_GPU_SHADER_TYPE_PIXEL,
-                src_ptr, length, hash) {
-}
-
-D3D11PixelShader::~D3D11PixelShader() {
-  XESAFERELEASE(handle_);
-}
-
-int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl,
-                              D3D11VertexShader* input_shader) {
-  SCOPE_profile_cpu_f("gpu");
-  if (handle_) {
-    return 0;
-  }
-
-  // TODO(benvanik): look in file based on hash/etc.
-  void* byte_code = NULL;
-  size_t byte_code_length = 0;
-
-  // Translate and compile source.
-  const char* shader_source = Translate(program_cntl, input_shader);
-  if (!shader_source) {
-    return 1;
-  }
-  ID3D10Blob* shader_blob = Compile(shader_source);
-  if (!shader_blob) {
-    return 1;
-  }
-  byte_code_length = shader_blob->GetBufferSize();
-  byte_code = xe_malloc(byte_code_length);
-  xe_copy_struct(
-      byte_code, shader_blob->GetBufferPointer(), byte_code_length);
-  XESAFERELEASE(shader_blob);
-
-  // Create shader.
-  HRESULT hr = device_->CreatePixelShader(
-      byte_code, byte_code_length,
-      NULL,
-      &handle_);
-  if (FAILED(hr)) {
-    XELOGE("D3D11: failed to create pixel shader");
-    xe_free(byte_code);
-    return 1;
-  }
-
-  xe_free(byte_code);
-
-  is_prepared_ = true;
-  return 0;
-}
-
-const char* D3D11PixelShader::Translate(
-    xe_gpu_program_cntl_t* program_cntl, D3D11VertexShader* input_shader) {
-  SCOPE_profile_cpu_f("gpu");
-  Output* output = new Output();
-  xe_gpu_translate_ctx_t ctx;
-  ctx.output  = output;
-  ctx.type    = type_;
-  ctx.tex_fetch_index = 0;
-
-  // We need an input VS to make decisions here.
-  // TODO(benvanik): do we need to pair VS/PS up and store the combination?
-  // If the same PS is used with different VS that output different amounts
-  // (and less than the number of required registers), things may die.
-  XEASSERTNOTNULL(input_shader);
-  const Shader::alloc_counts_t& input_alloc_counts =
-      input_shader->alloc_counts();
-
-  // Add constants buffers.
-  // We could optimize this by only including used buffers, but the compiler
-  // seems to do a good job of doing this for us.
-  // It also does read detection, so c[512] can end up c[4] in the asm -
-  // instead of doing this optimization ourselves we could maybe just query
-  // this from the compiler.
-  output->append(
-    "cbuffer float_consts : register(b0) {\n"
-    "  float4 c[512];\n"
-    "};\n");
-  // TODO(benvanik): add bool/loop constants.
-
-  AppendTextureHeader(output);
-
-  // Add vertex shader output (pixel shader input).
-  output->append(
-    "struct VS_OUTPUT {\n");
-  if (input_alloc_counts.positions) {
-    XEASSERT(input_alloc_counts.positions == 1);
-    output->append(
-      "  float4 oPos : SV_POSITION;\n");
-  }
-  if (input_alloc_counts.params) {
-    output->append(
-      "  float4 o[%d] : XE_O;\n",
-      MAX_INTERPOLATORS);
-  }
-  output->append(
-    "};\n");
-
-  // Add pixel shader output.
-  output->append(
-    "struct PS_OUTPUT {\n");
-  for (uint32_t n = 0; n < alloc_counts_.params; n++) {
-    output->append(
-      "  float4 oC%d   : SV_TARGET%d;\n", n, n);
-    if (program_cntl->ps_export_depth) {
-      // Is this per render-target?
-      output->append(
-        "  float oD%d   : SV_DEPTH%d;\n", n, n);
-    }
-  }
-  output->append(
-    "};\n");
-
-  // Pixel shader main() header.
-  output->append(
-    "PS_OUTPUT main(VS_OUTPUT i) {\n"
-    "  PS_OUTPUT o;\n");
-
-  // Add temporary registers.
-  uint32_t temp_regs = program_cntl->vs_regs + program_cntl->ps_regs;
-  for (uint32_t n = 0; n <= MAX(15, temp_regs); n++) {
-    output->append(
-      "  float4 r%d = c[%d];\n", n, n);
-  }
-  output->append("  float4 t;\n");
-
-  // Bring registers local.
-  if (input_alloc_counts.params) {
-    for (uint32_t n = 0; n < MAX_INTERPOLATORS; n++) {
-      output->append(
-        "  r%d = i.o[%d];\n", n, n);
-    }
-  }
-
-  // Execute blocks.
-  for (std::vector<instr_cf_exec_t>::iterator it = execs_.begin();
-       it != execs_.end(); ++it) {
-    instr_cf_exec_t& cf = *it;
-    // TODO(benvanik): figure out how sequences/jmps/loops/etc work.
-    if (TranslateExec(ctx, cf)) {
-      delete output;
-      return NULL;
-    }
-  }
-
-  // main footer.
-  output->append(
-    "  return o;\n"
-    "}\n");
-
-  set_translated_src(output->buffer);
-  delete output;
-  return translated_src_;
-}
-
-
-namespace {
-
-static const char chan_names[] = {
-  'x', 'y', 'z', 'w',
-  // these only apply to FETCH dst's, and we shouldn't be using them:
-  '0', '1', '?', '_',
-};
-
-void AppendSrcReg(
-    xe_gpu_translate_ctx_t& ctx,
-    uint32_t num, uint32_t type,
-    uint32_t swiz, uint32_t negate, uint32_t abs) {
-  if (negate) {
-    ctx.output->append("-");
-  }
-  if (abs) {
-    ctx.output->append("abs(");
-  }
-  if (type) {
-    // Register.
-    ctx.output->append("r%u", num);
-  } else {
-    // Constant.
-    ctx.output->append("c[%u]", num);
-  }
-  if (swiz) {
-    ctx.output->append(".");
-    for (int i = 0; i < 4; i++) {
-      ctx.output->append("%c", chan_names[(swiz + i) & 0x3]);
-      swiz >>= 2;
-    }
-  }
-  if (abs) {
-    ctx.output->append(")");
-  }
-}
-
-void AppendDestRegName(
-    xe_gpu_translate_ctx_t& ctx,
-    uint32_t num, uint32_t dst_exp) {
-  if (!dst_exp) {
-    // Register.
-    ctx.output->append("r%u", num);
-  } else {
-    // Export.
-    switch (ctx.type) {
-    case XE_GPU_SHADER_TYPE_VERTEX:
-      switch (num) {
-      case 62:
-        ctx.output->append("o.oPos");
-        break;
-      case 63:
-        ctx.output->append("o.oPointSize");
-        break;
-      default:
-        // Varying.
-        ctx.output->append("o.o[%u]", num);;
-        break;
-      }
-      break;
-    case XE_GPU_SHADER_TYPE_PIXEL:
-      switch (num) {
-      case 0:
-        ctx.output->append("o.oC0");
-        break;
-      default:
-        // TODO(benvanik): other render targets?
-        // TODO(benvanik): depth?
-        XEASSERTALWAYS();
-        break;
-      }
-      break;
-    }
-  }
-}
-
-void AppendDestReg(
-    xe_gpu_translate_ctx_t& ctx,
-    uint32_t num, uint32_t mask, uint32_t dst_exp) {
-  if (mask != 0xF) {
-    // If masking, store to a temporary variable and clean it up later.
-    ctx.output->append("t");
-  } else {
-    // Store directly to output.
-    AppendDestRegName(ctx, num, dst_exp);
-  }
-}
-
-void AppendDestRegPost(
-    xe_gpu_translate_ctx_t& ctx,
-    uint32_t num, uint32_t mask, uint32_t dst_exp) {
-  if (mask != 0xF) {
-    // Masking.
-    ctx.output->append("  ");
-    AppendDestRegName(ctx, num, dst_exp);
-    ctx.output->append(" = float4(");
-    for (int i = 0; i < 4; i++) {
-      // TODO(benvanik): mask out values? mix in old value as temp?
-      // ctx.output->append("%c", (mask & 0x1) ? chan_names[i] : 'w');
-      if (!(mask & 0x1)) {
-        AppendDestRegName(ctx, num, dst_exp);
-      } else {
-        ctx.output->append("t");
-      }
-      ctx.output->append(".%c", chan_names[i]);
-      mask >>= 1;
-      if (i < 3) {
-        ctx.output->append(", ");
-      }
-    }
-    ctx.output->append(");\n");
-  }
-}
-
-void print_srcreg(
-    Output* output,
-    uint32_t num, uint32_t type,
-    uint32_t swiz, uint32_t negate, uint32_t abs) {
-  if (negate) {
-    output->append("-");
-  }
-  if (abs) {
-    output->append("|");
-  }
-  output->append("%c%u", type ? 'R' : 'C', num);
-  if (swiz) {
-    output->append(".");
-    for (int i = 0; i < 4; i++) {
-      output->append("%c", chan_names[(swiz + i) & 0x3]);
-      swiz >>= 2;
-    }
-  }
-  if (abs) {
-    output->append("|");
-  }
-}
-
-void print_dstreg(
-    Output* output, uint32_t num, uint32_t mask, uint32_t dst_exp) {
-  output->append("%s%u", dst_exp ? "export" : "R", num);
-  if (mask != 0xf) {
-    output->append(".");
-    for (int i = 0; i < 4; i++) {
-      output->append("%c", (mask & 0x1) ? chan_names[i] : '_');
-      mask >>= 1;
-    }
-  }
-}
-
-void print_export_comment(
-    Output* output, uint32_t num, XE_GPU_SHADER_TYPE type) {
-  const char *name = NULL;
-  switch (type) {
-  case XE_GPU_SHADER_TYPE_VERTEX:
-    switch (num) {
-    case 62: name = "gl_Position";  break;
-    case 63: name = "gl_PointSize"; break;
-    }
-    break;
-  case XE_GPU_SHADER_TYPE_PIXEL:
-    switch (num) {
-    case 0:  name = "gl_FragColor"; break;
-    }
-    break;
-  }
-  /* if we had a symbol table here, we could look
-   * up the name of the varying..
-   */
-  if (name) {
-    output->append("\t; %s", name);
-  }
-}
-
-int TranslateALU_ADDv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("(");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(" + ");
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(")");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_MULv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("(");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(" * ");
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(")");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_MAXv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  if (alu.src1_reg == alu.src2_reg &&
-      alu.src1_sel == alu.src2_sel &&
-      alu.src1_swiz == alu.src2_swiz &&
-      alu.src1_reg_negate == alu.src2_reg_negate &&
-      alu.src1_reg_abs == alu.src2_reg_abs) {
-    // This is a mov.
-    AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  } else {
-    ctx.output->append("max(");
-    AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-    ctx.output->append(", ");
-    AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-    ctx.output->append(")");
-  }
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_MINv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("min(");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(", ");
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(")");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_SETXXv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu, const char* op) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("float4((");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(").x %s (", op);
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(").x ? 1.0 : 0.0, (");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(").y %s (", op);
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(").y ? 1.0 : 0.0, (");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(").z %s (", op);
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(").z ? 1.0 : 0.0, (");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(").w %s (", op);
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(").w ? 1.0 : 0.0)");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-int TranslateALU_SETEv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_SETXXv(ctx, alu, "==");
-}
-int TranslateALU_SETGTv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_SETXXv(ctx, alu, ">");
-}
-int TranslateALU_SETGTEv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_SETXXv(ctx, alu, ">=");
-}
-int TranslateALU_SETNEv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_SETXXv(ctx, alu, "!=");
-}
-
-int TranslateALU_FRACv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("frac(");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(")");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_TRUNCv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("trunc(");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(")");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_FLOORv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("floor(");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(")");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_MULADDv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("mad(");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(", ");
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(", ");
-  AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(")");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_CNDXXv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu, const char* op) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  // TODO(benvanik): check argument order - could be 3 as compare and 1 and 2 as values.
-  ctx.output->append("float4((");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(").x %s 0.0 ? (", op);
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(").x : (");
-  AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(").x, (");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(").y %s 0.0 ? (", op);
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(").y : (");
-  AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(").y, (");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(").z %s 0.0 ? (", op);
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(").z : (");
-  AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(").z, (");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(").w %s 0.0 ? (", op);
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(").w : (");
-  AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(").w)");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-int TranslateALU_CNDEv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_CNDXXv(ctx, alu, "==");
-}
-int TranslateALU_CNDGTEv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_CNDXXv(ctx, alu, ">=");
-}
-int TranslateALU_CNDGTv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_CNDXXv(ctx, alu, ">");
-}
-
-int TranslateALU_DOT4v(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("dot(");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(", ");
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(")");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_DOT3v(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("dot(float4(");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(").xyz, float4(");
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(").xyz)");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_DOT2ADDv(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("dot(float4(");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(").xy, float4(");
-  AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
-  ctx.output->append(").xy) + ");
-  AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(".x");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-
-// CUBEv
-
-int TranslateALU_MAX4v(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.vector_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("max(");
-  ctx.output->append("max(");
-  ctx.output->append("max(");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(".x, ");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(".y), ");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(".z), ");
-  AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
-  ctx.output->append(".w)");
-  if (alu.vector_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data);
-  return 0;
-}
-
-// ...
-
-int TranslateALU_MAXs(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.scalar_clamp) {
-    ctx.output->append("saturate(");
-  }
-  if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) {
-    // This is a mov.
-    AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-  } else {
-    ctx.output->append("max(");
-    AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-    ctx.output->append(".x, ");
-    AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-    ctx.output->append(".y).xxxx");
-  }
-  if (alu.scalar_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_MINs(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.scalar_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("min(");
-  AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(".x, ");
-  AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(".y).xxxx");
-  if (alu.scalar_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_SETXXs(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu, const char* op) {
-  AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.scalar_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("((");
-  AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(".x %s 0.0) ? 1.0 : 0.0).xxxx", op);
-  if (alu.scalar_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  return 0;
-}
-int TranslateALU_SETEs(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_SETXXs(ctx, alu, "==");
-}
-int TranslateALU_SETGTs(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_SETXXs(ctx, alu, ">");
-}
-int TranslateALU_SETGTEs(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_SETXXs(ctx, alu, ">=");
-}
-int TranslateALU_SETNEs(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_SETXXs(ctx, alu, "!=");
-}
-
-int TranslateALU_RECIP_IEEE(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.scalar_clamp) {
-    ctx.output->append("saturate(");
-  }
-  ctx.output->append("(1.0 / ");
-  AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(")");
-  if (alu.scalar_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  return 0;
-}
-
-int TranslateALU_MUL_CONST_0(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.scalar_clamp) {
-    ctx.output->append("saturate(");
-  }
-  uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
-  uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
-  uint32_t swiz_b = (src3_swiz & 0x3);
-  uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
-  ctx.output->append("(");
-  AppendSrcReg(ctx, alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(".%c * ", chan_names[swiz_a]);
-  AppendSrcReg(ctx, reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(".%c", chan_names[swiz_b]);
-  ctx.output->append(").xxxx");
-  if (alu.scalar_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  return 0;
-}
-int TranslateALU_MUL_CONST_1(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_MUL_CONST_0(ctx, alu);
-}
-
-int TranslateALU_ADD_CONST_0(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.scalar_clamp) {
-    ctx.output->append("saturate(");
-  }
-  uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
-  uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
-  uint32_t swiz_b = (src3_swiz & 0x3);
-  uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
-  ctx.output->append("(");
-  AppendSrcReg(ctx, alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(".%c + ", chan_names[swiz_a]);
-  AppendSrcReg(ctx, reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(".%c", chan_names[swiz_b]);
-  ctx.output->append(").xxxx");
-  if (alu.scalar_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  return 0;
-}
-int TranslateALU_ADD_CONST_1(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_ADD_CONST_0(ctx, alu);
-}
-
-int TranslateALU_SUB_CONST_0(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  ctx.output->append(" = ");
-  if (alu.scalar_clamp) {
-    ctx.output->append("saturate(");
-  }
-  uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
-  uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
-  uint32_t swiz_b = (src3_swiz & 0x3);
-  uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
-  ctx.output->append("(");
-  AppendSrcReg(ctx, alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(".%c - ", chan_names[swiz_a]);
-  AppendSrcReg(ctx, reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs);
-  ctx.output->append(".%c", chan_names[swiz_b]);
-  ctx.output->append(").xxxx");
-  if (alu.scalar_clamp) {
-    ctx.output->append(")");
-  }
-  ctx.output->append(";\n");
-  AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
-  return 0;
-}
-int TranslateALU_SUB_CONST_1(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) {
-  return TranslateALU_SUB_CONST_0(ctx, alu);
-}
-
-typedef int (*xe_gpu_translate_alu_fn)(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu);
-typedef struct {
-  uint32_t    num_srcs;
-  const char* name;
-  xe_gpu_translate_alu_fn   fn;
-} xe_gpu_translate_alu_info_t;
-#define ALU_INSTR(opc, num_srcs) \
-    { num_srcs, #opc, 0 }
-#define ALU_INSTR_IMPL(opc, num_srcs) \
-    { num_srcs, #opc, TranslateALU_##opc }
-static xe_gpu_translate_alu_info_t vector_alu_instrs[0x20] = {
-  ALU_INSTR_IMPL(ADDv,               2),  // 0
-  ALU_INSTR_IMPL(MULv,               2),  // 1
-  ALU_INSTR_IMPL(MAXv,               2),  // 2
-  ALU_INSTR_IMPL(MINv,               2),  // 3
-  ALU_INSTR_IMPL(SETEv,              2),  // 4
-  ALU_INSTR_IMPL(SETGTv,             2),  // 5
-  ALU_INSTR_IMPL(SETGTEv,            2),  // 6
-  ALU_INSTR_IMPL(SETNEv,             2),  // 7
-  ALU_INSTR_IMPL(FRACv,              1),  // 8
-  ALU_INSTR_IMPL(TRUNCv,             1),  // 9
-  ALU_INSTR_IMPL(FLOORv,             1),  // 10
-  ALU_INSTR_IMPL(MULADDv,            3),  // 11
-  ALU_INSTR_IMPL(CNDEv,              3),  // 12
-  ALU_INSTR_IMPL(CNDGTEv,            3),  // 13
-  ALU_INSTR_IMPL(CNDGTv,             3),  // 14
-  ALU_INSTR_IMPL(DOT4v,              2),  // 15
-  ALU_INSTR_IMPL(DOT3v,              2),  // 16
-  ALU_INSTR_IMPL(DOT2ADDv,           3),  // 17 -- ???
-  ALU_INSTR(CUBEv,              2),  // 18
-  ALU_INSTR_IMPL(MAX4v,              1),  // 19
-  ALU_INSTR(PRED_SETE_PUSHv,    2),  // 20
-  ALU_INSTR(PRED_SETNE_PUSHv,   2),  // 21
-  ALU_INSTR(PRED_SETGT_PUSHv,   2),  // 22
-  ALU_INSTR(PRED_SETGTE_PUSHv,  2),  // 23
-  ALU_INSTR(KILLEv,             2),  // 24
-  ALU_INSTR(KILLGTv,            2),  // 25
-  ALU_INSTR(KILLGTEv,           2),  // 26
-  ALU_INSTR(KILLNEv,            2),  // 27
-  ALU_INSTR(DSTv,               2),  // 28
-  ALU_INSTR(MOVAv,              1),  // 29
-};
-static xe_gpu_translate_alu_info_t scalar_alu_instrs[0x40] = {
-  ALU_INSTR(ADDs,               1),  // 0
-  ALU_INSTR(ADD_PREVs,          1),  // 1
-  ALU_INSTR(MULs,               1),  // 2
-  ALU_INSTR(MUL_PREVs,          1),  // 3
-  ALU_INSTR(MUL_PREV2s,         1),  // 4
-  ALU_INSTR_IMPL(MAXs,               1),  // 5
-  ALU_INSTR_IMPL(MINs,               1),  // 6
-  ALU_INSTR_IMPL(SETEs,              1),  // 7
-  ALU_INSTR_IMPL(SETGTs,             1),  // 8
-  ALU_INSTR_IMPL(SETGTEs,            1),  // 9
-  ALU_INSTR_IMPL(SETNEs,             1),  // 10
-  ALU_INSTR(FRACs,              1),  // 11
-  ALU_INSTR(TRUNCs,             1),  // 12
-  ALU_INSTR(FLOORs,             1),  // 13
-  ALU_INSTR(EXP_IEEE,           1),  // 14
-  ALU_INSTR(LOG_CLAMP,          1),  // 15
-  ALU_INSTR(LOG_IEEE,           1),  // 16
-  ALU_INSTR(RECIP_CLAMP,        1),  // 17
-  ALU_INSTR(RECIP_FF,           1),  // 18
-  ALU_INSTR_IMPL(RECIP_IEEE,         1),  // 19
-  ALU_INSTR(RECIPSQ_CLAMP,      1),  // 20
-  ALU_INSTR(RECIPSQ_FF,         1),  // 21
-  ALU_INSTR(RECIPSQ_IEEE,       1),  // 22
-  ALU_INSTR(MOVAs,              1),  // 23
-  ALU_INSTR(MOVA_FLOORs,        1),  // 24
-  ALU_INSTR(SUBs,               1),  // 25
-  ALU_INSTR(SUB_PREVs,          1),  // 26
-  ALU_INSTR(PRED_SETEs,         1),  // 27
-  ALU_INSTR(PRED_SETNEs,        1),  // 28
-  ALU_INSTR(PRED_SETGTs,        1),  // 29
-  ALU_INSTR(PRED_SETGTEs,       1),  // 30
-  ALU_INSTR(PRED_SET_INVs,      1),  // 31
-  ALU_INSTR(PRED_SET_POPs,      1),  // 32
-  ALU_INSTR(PRED_SET_CLRs,      1),  // 33
-  ALU_INSTR(PRED_SET_RESTOREs,  1),  // 34
-  ALU_INSTR(KILLEs,             1),  // 35
-  ALU_INSTR(KILLGTs,            1),  // 36
-  ALU_INSTR(KILLGTEs,           1),  // 37
-  ALU_INSTR(KILLNEs,            1),  // 38
-  ALU_INSTR(KILLONEs,           1),  // 39
-  ALU_INSTR(SQRT_IEEE,          1),  // 40
-  { 0, 0, false },
-  ALU_INSTR_IMPL(MUL_CONST_0,        2),  // 42
-  ALU_INSTR_IMPL(MUL_CONST_1,        2),  // 43
-  ALU_INSTR_IMPL(ADD_CONST_0,        2),  // 44
-  ALU_INSTR_IMPL(ADD_CONST_1,        2),  // 45
-  ALU_INSTR_IMPL(SUB_CONST_0,        2),  // 46
-  ALU_INSTR_IMPL(SUB_CONST_1,        2),  // 47
-  ALU_INSTR(SIN,                1),  // 48
-  ALU_INSTR(COS,                1),  // 49
-  ALU_INSTR(RETAIN_PREV,        1),  // 50
-};
-#undef ALU_INSTR
-
-int TranslateALU(
-    xe_gpu_translate_ctx_t& ctx, const instr_alu_t* alu, int sync) {
-  Output* output = ctx.output;
-
-  if (!alu->scalar_write_mask && !alu->vector_write_mask) {
-    output->append("  //   <nop>\n");
-    return 0;
-  }
-
-  if (alu->vector_write_mask) {
-    // Disassemble vector op.
-    xe_gpu_translate_alu_info_t& iv = vector_alu_instrs[alu->vector_opc];
-    output->append("  //   %sALU:\t", sync ? "(S)" : "   ");
-    output->append("%s", iv.name);
-    if (alu->pred_select & 0x2) {
-      // seems to work similar to conditional execution in ARM instruction
-      // set, so let's use a similar syntax for now:
-      output->append((alu->pred_select & 0x1) ? "EQ" : "NE");
-    }
-    output->append("\t");
-    print_dstreg(output,
-                  alu->vector_dest, alu->vector_write_mask, alu->export_data);
-    output->append(" = ");
-    if (iv.num_srcs == 3) {
-      print_srcreg(output,
-                    alu->src3_reg, alu->src3_sel, alu->src3_swiz,
-                    alu->src3_reg_negate, alu->src3_reg_abs);
-      output->append(", ");
-    }
-    print_srcreg(output,
-                  alu->src1_reg, alu->src1_sel, alu->src1_swiz,
-                  alu->src1_reg_negate, alu->src1_reg_abs);
-    if (iv.num_srcs > 1) {
-      output->append(", ");
-      print_srcreg(output,
-                    alu->src2_reg, alu->src2_sel, alu->src2_swiz,
-                    alu->src2_reg_negate, alu->src2_reg_abs);
-    }
-    if (alu->vector_clamp) {
-      output->append(" CLAMP");
-    }
-    if (alu->export_data) {
-      print_export_comment(output, alu->vector_dest, ctx.type);
-    }
-    output->append("\n");
-
-    // Translate vector op.
-    if (iv.fn) {
-      output->append("  ");
-      if (iv.fn(ctx, *alu)) {
-        return 1;
-      }
-    } else {
-      output->append("  // <UNIMPLEMENTED>\n");
-    }
-  }
-
-  if (alu->scalar_write_mask || !alu->vector_write_mask) {
-    // 2nd optional scalar op:
-
-    // Disassemble scalar op.
-    xe_gpu_translate_alu_info_t& is = scalar_alu_instrs[alu->scalar_opc];
-    output->append("  //  ");
-    output->append("\t");
-    if (is.name) {
-      output->append("\t    \t%s\t", is.name);
-    } else {
-      output->append("\t    \tOP(%u)\t", alu->scalar_opc);
-    }
-    print_dstreg(output,
-                 alu->scalar_dest, alu->scalar_write_mask, alu->export_data);
-    output->append(" = ");
-    if (is.num_srcs == 2) {
-      // ADD_CONST_0 dest, [const], [reg]
-      uint32_t src3_swiz = alu->src3_swiz & ~0x3C;
-      uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
-      uint32_t swiz_b = (src3_swiz & 0x3);
-      print_srcreg(output,
-                   alu->src3_reg, 0, 0,
-                   alu->src3_reg_negate, alu->src3_reg_abs);
-      output->append(".%c", chan_names[swiz_a]);
-      output->append(", ");
-      uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | (alu->src3_sel << 1);
-      print_srcreg(output,
-                   reg2, 1, 0,
-                   alu->src3_reg_negate, alu->src3_reg_abs);
-      output->append(".%c", chan_names[swiz_b]);
-    } else {
-      print_srcreg(output,
-                   alu->src3_reg, alu->src3_sel, alu->src3_swiz,
-                   alu->src3_reg_negate, alu->src3_reg_abs);
-    }
-    if (alu->scalar_clamp) {
-      output->append(" CLAMP");
-    }
-    if (alu->export_data) {
-      print_export_comment(output, alu->scalar_dest, ctx.type);
-    }
-    output->append("\n");
-
-    // Translate scalar op.
-    if (is.fn) {
-      output->append("  ");
-      if (is.fn(ctx, *alu)) {
-        return 1;
-      }
-    } else {
-      output->append("  // <UNIMPLEMENTED>\n");
-    }
-  }
-
-  return 0;
-}
-
-struct {
-  const char *name;
-} fetch_types[0xff] = {
-#define TYPE(id) { #id }
-    TYPE(FMT_1_REVERSE), // 0
-    {0},
-    TYPE(FMT_8), // 2
-    {0},
-    {0},
-    {0},
-    TYPE(FMT_8_8_8_8), // 6
-    TYPE(FMT_2_10_10_10), // 7
-    {0},
-    {0},
-    TYPE(FMT_8_8), // 10
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    TYPE(FMT_16), // 24
-    TYPE(FMT_16_16), // 25
-    TYPE(FMT_16_16_16_16), // 26
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    TYPE(FMT_32), // 33
-    TYPE(FMT_32_32), // 34
-    TYPE(FMT_32_32_32_32), // 35
-    TYPE(FMT_32_FLOAT), // 36
-    TYPE(FMT_32_32_FLOAT), // 37
-    TYPE(FMT_32_32_32_32_FLOAT), // 38
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    {0},
-    TYPE(FMT_32_32_32_FLOAT), // 57
-#undef TYPE
-};
-
-void print_fetch_dst(Output* output, uint32_t dst_reg, uint32_t dst_swiz) {
-  output->append("\tR%u.", dst_reg);
-  for (int i = 0; i < 4; i++) {
-    output->append("%c", chan_names[dst_swiz & 0x7]);
-    dst_swiz >>= 3;
-  }
-}
-
-void AppendFetchDest(Output* output, uint32_t dst_reg, uint32_t dst_swiz) {
-  output->append("r%u.", dst_reg);
-  for (int i = 0; i < 4; i++) {
-    output->append("%c", chan_names[dst_swiz & 0x7]);
-    dst_swiz >>= 3;
-  }
-}
-
-int TranslateVertexFetch(
-    xe_gpu_translate_ctx_t& ctx, const instr_fetch_vtx_t* vtx, int sync) {
-  Output* output = ctx.output;
-
-  // Disassemble.
-  output->append("  //   %sFETCH:\t", sync ? "(S)" : "   ");
-  if (vtx->pred_select) {
-    output->append(vtx->pred_condition ? "EQ" : "NE");
-  }
-  print_fetch_dst(output, vtx->dst_reg, vtx->dst_swiz);
-  output->append(" = R%u.", vtx->src_reg);
-  output->append("%c", chan_names[vtx->src_swiz & 0x3]);
-  if (fetch_types[vtx->format].name) {
-    output->append(" %s", fetch_types[vtx->format].name);
-  } else  {
-    output->append(" TYPE(0x%x)", vtx->format);
-  }
-  output->append(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED");
-  if (!vtx->num_format_all) {
-    output->append(" NORMALIZED");
-  }
-  output->append(" STRIDE(%u)", vtx->stride);
-  if (vtx->offset) {
-    output->append(" OFFSET(%u)", vtx->offset);
-  }
-  output->append(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel);
-  if (1) {
-    // XXX
-    output->append(" src_reg_am=%u", vtx->src_reg_am);
-    output->append(" dst_reg_am=%u", vtx->dst_reg_am);
-    output->append(" num_format_all=%u", vtx->num_format_all);
-    output->append(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all);
-    output->append(" exp_adjust_all=%u", vtx->exp_adjust_all);
-  }
-  output->append("\n");
-
-  // Translate.
-  output->append("  ");
-  output->append("r%u.xyzw", vtx->dst_reg);
-  output->append(" = float4(");
-  uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
-  // TODO(benvanik): detect xyzw = xyzw, etc.
-  // TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc
-  uint32_t component_count = GetFormatComponentCount(vtx->format);
-  uint32_t dst_swiz = vtx->dst_swiz;
-  for (int i = 0; i < 4; i++) {
-    if ((dst_swiz & 0x7) == 4) {
-      output->append("0.0");
-    } else if ((dst_swiz & 0x7) == 5) {
-      output->append("1.0");
-    } else if ((dst_swiz & 0x7) == 6) {
-      // ?
-      output->append("?");
-    } else if ((dst_swiz & 0x7) == 7) {
-      output->append("r%u.%c", vtx->dst_reg, chan_names[i]);
-    } else {
-      output->append("i.vf%u_%d.%c",
-                     fetch_slot, vtx->offset,
-                     chan_names[dst_swiz & 0x3]);
-    }
-    if (i < 3) {
-      output->append(", ");
-    }
-    dst_swiz >>= 3;
-  }
-  output->append(");\n");
-  return 0;
-}
-
-int TranslateTextureFetch(
-  xe_gpu_translate_ctx_t& ctx, const instr_fetch_tex_t* tex, int sync) {
-  Output* output = ctx.output;
-
-  // Disassemble.
-  static const char *filter[] = {
-    "POINT",    // TEX_FILTER_POINT
-    "LINEAR",   // TEX_FILTER_LINEAR
-    "BASEMAP",  // TEX_FILTER_BASEMAP
-  };
-  static const char *aniso_filter[] = {
-    "DISABLED", // ANISO_FILTER_DISABLED
-    "MAX_1_1",  // ANISO_FILTER_MAX_1_1
-    "MAX_2_1",  // ANISO_FILTER_MAX_2_1
-    "MAX_4_1",  // ANISO_FILTER_MAX_4_1
-    "MAX_8_1",  // ANISO_FILTER_MAX_8_1
-    "MAX_16_1", // ANISO_FILTER_MAX_16_1
-  };
-  static const char *arbitrary_filter[] = {
-    "2x4_SYM",  // ARBITRARY_FILTER_2X4_SYM
-    "2x4_ASYM", // ARBITRARY_FILTER_2X4_ASYM
-    "4x2_SYM",  // ARBITRARY_FILTER_4X2_SYM
-    "4x2_ASYM", // ARBITRARY_FILTER_4X2_ASYM
-    "4x4_SYM",  // ARBITRARY_FILTER_4X4_SYM
-    "4x4_ASYM", // ARBITRARY_FILTER_4X4_ASYM
-  };
-  static const char *sample_loc[] = {
-    "CENTROID", // SAMPLE_CENTROID
-    "CENTER",   // SAMPLE_CENTER
-  };
-  uint32_t src_swiz = tex->src_swiz;
-  output->append("  //   %sFETCH:\t", sync ? "(S)" : "   ");
-  if (tex->pred_select) {
-    output->append(tex->pred_condition ? "EQ" : "NE");
-  }
-  print_fetch_dst(output, tex->dst_reg, tex->dst_swiz);
-  output->append(" = R%u.", tex->src_reg);
-  for (int i = 0; i < 3; i++) {
-    output->append("%c", chan_names[src_swiz & 0x3]);
-    src_swiz >>= 2;
-  }
-  output->append(" CONST(%u)", tex->const_idx);
-  if (tex->fetch_valid_only) {
-    output->append(" VALID_ONLY");
-  }
-  if (tex->tx_coord_denorm) {
-    output->append(" DENORM");
-  }
-  if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST) {
-    output->append(" MAG(%s)", filter[tex->mag_filter]);
-  }
-  if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST) {
-    output->append(" MIN(%s)", filter[tex->min_filter]);
-  }
-  if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST) {
-    output->append(" MIP(%s)", filter[tex->mip_filter]);
-  }
-  if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST) {
-    output->append(" ANISO(%s)", aniso_filter[tex->aniso_filter]);
-  }
-  if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST) {
-    output->append(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]);
-  }
-  if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST) {
-    output->append(" VOL_MAG(%s)", filter[tex->vol_mag_filter]);
-  }
-  if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST) {
-    output->append(" VOL_MIN(%s)", filter[tex->vol_min_filter]);
-  }
-  if (!tex->use_comp_lod) {
-    output->append(" LOD(%u)", tex->use_comp_lod);
-    output->append(" LOD_BIAS(%u)", tex->lod_bias);
-  }
-  if (tex->use_reg_lod) {
-    output->append(" REG_LOD(%u)", tex->use_reg_lod);
-  }
-  if (tex->use_reg_gradients) {
-    output->append(" USE_REG_GRADIENTS");
-  }
-  output->append(" LOCATION(%s)", sample_loc[tex->sample_location]);
-  if (tex->offset_x || tex->offset_y || tex->offset_z) {
-    output->append(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z);
-  }
-  output->append("\n");
-
-  int src_component_count = 0;
-  switch (tex->dimension) {
-  case DIMENSION_1D:
-    src_component_count = 1;
-    break;
-  default:
-  case DIMENSION_2D:
-    src_component_count = 2;
-    break;
-  case DIMENSION_3D:
-    src_component_count = 3;
-    break;
-  case DIMENSION_CUBE:
-    src_component_count = 3;
-    break;
-  }
-
-  // Translate.
-  output->append("  ");
-  output->append("r%u.xyzw", tex->dst_reg);
-  output->append(" = ");
-  output->append(
-      "x_texture_%d.Sample(x_sampler_%d, r%u.",
-      tex->const_idx,
-      ctx.tex_fetch_index++, // hacky way to line up to tex buffers
-      tex->src_reg);
-  src_swiz = tex->src_swiz;
-  for (int i = 0; i < src_component_count; i++) {
-    output->append("%c", chan_names[src_swiz & 0x3]);
-    src_swiz >>= 2;
-  }
-  output->append(").");
-
-  // Pass one over dest does xyzw and fakes the special values.
-  // TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc
-  uint32_t dst_swiz = tex->dst_swiz;
-  for (int i = 0; i < 4; i++) {
-    output->append("%c", chan_names[dst_swiz & 0x3]);
-    dst_swiz >>= 3;
-  }
-  output->append(";\n");
-  // Do another pass to set constant values.
-  dst_swiz = tex->dst_swiz;
-  for (int i = 0; i < 4; i++) {
-    if ((dst_swiz & 0x7) == 4) {
-      output->append("  r%u.%c = 0.0;\n", tex->dst_reg, chan_names[i]);
-    } else if ((dst_swiz & 0x7) == 5) {
-      output->append("  r%u.%c = 1.0;\n", tex->dst_reg, chan_names[i]);
-    }
-    dst_swiz >>= 3;
-  }
-  return 0;
-}
-
-struct {
-  const char *name;
-} cf_instructions[] = {
-#define INSTR(opc, fxn) { #opc }
-    INSTR(NOP, print_cf_nop),
-    INSTR(EXEC, print_cf_exec),
-    INSTR(EXEC_END, print_cf_exec),
-    INSTR(COND_EXEC, print_cf_exec),
-    INSTR(COND_EXEC_END, print_cf_exec),
-    INSTR(COND_PRED_EXEC, print_cf_exec),
-    INSTR(COND_PRED_EXEC_END, print_cf_exec),
-    INSTR(LOOP_START, print_cf_loop),
-    INSTR(LOOP_END, print_cf_loop),
-    INSTR(COND_CALL, print_cf_jmp_call),
-    INSTR(RETURN, print_cf_jmp_call),
-    INSTR(COND_JMP, print_cf_jmp_call),
-    INSTR(ALLOC, print_cf_alloc),
-    INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec),
-    INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec),
-    INSTR(MARK_VS_FETCH_DONE, print_cf_nop),  // ??
-#undef INSTR
-};
-
-}  // anonymous namespace
-
-
-int D3D11Shader::TranslateExec(xe_gpu_translate_ctx_t& ctx, const instr_cf_exec_t& cf) {
-  Output* output = ctx.output;
-
-  output->append(
-    "  // %s ADDR(0x%x) CNT(0x%x)",
-    cf_instructions[cf.opc].name, cf.address, cf.count);
-  if (cf.yeild) {
-    output->append(" YIELD");
-  }
-  uint8_t vc = cf.vc_hi | (cf.vc_lo << 2);
-  if (vc) {
-    output->append(" VC(0x%x)", vc);
-  }
-  if (cf.bool_addr) {
-    output->append(" BOOL_ADDR(0x%x)", cf.bool_addr);
-  }
-  if (cf.address_mode == ABSOLUTE_ADDR) {
-    output->append(" ABSOLUTE_ADDR");
-  }
-  if (cf.is_cond_exec()) {
-    output->append(" COND(%d)", cf.condition);
-  }
-  output->append("\n");
-
-  uint32_t sequence = cf.serialize;
-  for (uint32_t i = 0; i < cf.count; i++) {
-    uint32_t alu_off = (cf.address + i);
-    int sync = sequence & 0x2;
-    if (sequence & 0x1) {
-      const instr_fetch_t* fetch =
-          (const instr_fetch_t*)(dwords_ + alu_off * 3);
-      switch (fetch->opc) {
-      case VTX_FETCH:
-        if (TranslateVertexFetch(ctx, &fetch->vtx, sync)) {
-          return 1;
-        }
-        break;
-      case TEX_FETCH:
-        if (TranslateTextureFetch(ctx, &fetch->tex, sync)) {
-          return 1;
-        }
-        break;
-      case TEX_GET_BORDER_COLOR_FRAC:
-      case TEX_GET_COMP_TEX_LOD:
-      case TEX_GET_GRADIENTS:
-      case TEX_GET_WEIGHTS:
-      case TEX_SET_TEX_LOD:
-      case TEX_SET_GRADIENTS_H:
-      case TEX_SET_GRADIENTS_V:
-      default:
-        XEASSERTALWAYS();
-        break;
-      }
-    } else {
-      const instr_alu_t* alu =
-          (const instr_alu_t*)(dwords_ + alu_off * 3);
-      if (TranslateALU(ctx, alu, sync)) {
-        return 1;
-      }
-    }
-    sequence >>= 2;
-  }
-
-  return 0;
-}
diff --git a/src/xenia/gpu/d3d11/d3d11_shader.h b/src/xenia/gpu/d3d11/d3d11_shader.h
deleted file mode 100644
index 0b0bb492c..000000000
--- a/src/xenia/gpu/d3d11/d3d11_shader.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_D3D11_D3D11_SHADER_H_
-#define XENIA_GPU_D3D11_D3D11_SHADER_H_
-
-#include <xenia/core.h>
-
-#include <xenia/gpu/shader.h>
-#include <xenia/gpu/xenos/xenos.h>
-
-#include <d3d11.h>
-
-
-namespace xe {
-namespace gpu {
-namespace d3d11 {
-
-struct Output;
-
-typedef struct {
-  Output*       output;
-  xenos::XE_GPU_SHADER_TYPE type;
-  uint32_t      tex_fetch_index;
-} xe_gpu_translate_ctx_t;
-
-class D3D11GeometryShader;
-
-
-class D3D11Shader : public Shader {
-public:
-  virtual ~D3D11Shader();
-
-  const static uint32_t MAX_INTERPOLATORS = 16;
-
-protected:
-  D3D11Shader(
-      ID3D11Device* device,
-      xenos::XE_GPU_SHADER_TYPE type,
-      const uint8_t* src_ptr, size_t length,
-      uint64_t hash);
-
-  const char* translated_src() const { return translated_src_; }
-  void set_translated_src(char* value);
-
-  void AppendTextureHeader(Output* output);
-  int TranslateExec(
-      xe_gpu_translate_ctx_t& ctx, const xenos::instr_cf_exec_t& cf);
-
-  ID3D10Blob* Compile(const char* shader_source);
-
-protected:
-  ID3D11Device* device_;
-
-  char*   translated_src_;
-};
-
-
-class D3D11VertexShader : public D3D11Shader {
-public:
-  D3D11VertexShader(
-      ID3D11Device* device,
-      const uint8_t* src_ptr, size_t length,
-      uint64_t hash);
-  virtual ~D3D11VertexShader();
-
-  ID3D11VertexShader* handle() const { return handle_; }
-  ID3D11InputLayout* input_layout() const { return input_layout_; }
-
-  int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl);
-
-  enum GeometryShaderType {
-    POINT_SPRITE_SHADER,
-    RECT_LIST_SHADER,
-    QUAD_LIST_SHADER,
-
-    MAX_GEOMETRY_SHADER_TYPE,
-  };
-  int DemandGeometryShader(GeometryShaderType type,
-                           D3D11GeometryShader** out_shader);
-
-private:
-  const char* Translate(xenos::xe_gpu_program_cntl_t* program_cntl);
-
-private:
-  ID3D11VertexShader*   handle_;
-  ID3D11InputLayout*    input_layout_;
-  D3D11GeometryShader*  geometry_shaders_[MAX_GEOMETRY_SHADER_TYPE];
-};
-
-
-class D3D11PixelShader : public D3D11Shader {
-public:
-  D3D11PixelShader(
-      ID3D11Device* device,
-      const uint8_t* src_ptr, size_t length,
-      uint64_t hash);
-  virtual ~D3D11PixelShader();
-
-  ID3D11PixelShader* handle() const { return handle_; }
-
-  int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl,
-              D3D11VertexShader* input_shader);
-
-private:
-  const char* Translate(xenos::xe_gpu_program_cntl_t* program_cntl,
-                        D3D11VertexShader* input_shader);
-
-private:
-  ID3D11PixelShader*  handle_;
-};
-
-
-}  // namespace d3d11
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_D3D11_D3D11_SHADER_H_
diff --git a/src/xenia/gpu/d3d11/d3d11_shader_cache.cc b/src/xenia/gpu/d3d11/d3d11_shader_cache.cc
deleted file mode 100644
index 7f6a5a722..000000000
--- a/src/xenia/gpu/d3d11/d3d11_shader_cache.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/d3d11/d3d11_shader_cache.h>
-
-#include <xenia/gpu/d3d11/d3d11_shader.h>
-
-
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::d3d11;
-using namespace xe::gpu::xenos;
-
-
-D3D11ShaderCache::D3D11ShaderCache(ID3D11Device* device) {
-  device_ = device;
-  device_->AddRef();
-}
-
-D3D11ShaderCache::~D3D11ShaderCache() {
-  device_->Release();
-}
-
-Shader* D3D11ShaderCache::CreateCore(
-    xenos::XE_GPU_SHADER_TYPE type,
-    const uint8_t* src_ptr, size_t length,
-    uint64_t hash) {
-  switch (type) {
-  case XE_GPU_SHADER_TYPE_VERTEX:
-    return new D3D11VertexShader(
-        device_, src_ptr, length, hash);
-  case XE_GPU_SHADER_TYPE_PIXEL:
-    return new D3D11PixelShader(
-        device_, src_ptr, length, hash);
-  default:
-    XEASSERTALWAYS();
-    return NULL;
-  }
-}
\ No newline at end of file
diff --git a/src/xenia/gpu/d3d11/d3d11_shader_cache.h b/src/xenia/gpu/d3d11/d3d11_shader_cache.h
deleted file mode 100644
index 8c33523b4..000000000
--- a/src/xenia/gpu/d3d11/d3d11_shader_cache.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_D3D11_D3D11_SHADER_CACHE_H_
-#define XENIA_GPU_D3D11_D3D11_SHADER_CACHE_H_
-
-#include <xenia/core.h>
-
-#include <xenia/gpu/shader_cache.h>
-
-#include <D3D11.h>
-
-
-namespace xe {
-namespace gpu {
-namespace d3d11 {
-
-
-class D3D11ShaderCache : public ShaderCache {
-public:
-  D3D11ShaderCache(ID3D11Device* device);
-  virtual ~D3D11ShaderCache();
-
-protected:
-  Shader* CreateCore(
-      xenos::XE_GPU_SHADER_TYPE type,
-      const uint8_t* src_ptr, size_t length,
-      uint64_t hash) override;
-
-protected:
-  ID3D11Device* device_;
-};
-
-
-}  // namespace d3d11
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_D3D11_D3D11_SHADER_CACHE_H_
diff --git a/src/xenia/gpu/d3d11/d3d11_shader_resource.cc b/src/xenia/gpu/d3d11/d3d11_shader_resource.cc
new file mode 100644
index 000000000..e4be7e2cf
--- /dev/null
+++ b/src/xenia/gpu/d3d11/d3d11_shader_resource.cc
@@ -0,0 +1,381 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include <xenia/gpu/d3d11/d3d11_shader_resource.h>
+
+#include <xenia/gpu/gpu-private.h>
+#include <xenia/gpu/d3d11/d3d11_geometry_shader.h>
+#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
+#include <xenia/gpu/d3d11/d3d11_shader_translator.h>
+#include <xenia/gpu/xenos/ucode.h>
+
+#include <d3dcompiler.h>
+
+using namespace xe;
+using namespace xe::gpu;
+using namespace xe::gpu::d3d11;
+using namespace xe::gpu::xenos;
+
+
+namespace {
+
+ID3D10Blob* D3D11ShaderCompile(XE_GPU_SHADER_TYPE type,
+                               const char* shader_source,
+                               const char* disasm_source) {
+  SCOPE_profile_cpu_f("gpu");
+
+  // TODO(benvanik): pick shared runtime mode defines.
+  D3D10_SHADER_MACRO defines[] = {
+    "TEST_DEFINE", "1",
+    0, 0,
+  };
+
+  uint32_t flags1 = 0;
+  flags1 |= D3D10_SHADER_DEBUG;
+  flags1 |= D3D10_SHADER_ENABLE_STRICTNESS;
+  uint32_t flags2 = 0;
+
+  // Create a name.
+  const char* base_path = "";
+  if (FLAGS_dump_shaders.size()) {
+    base_path = FLAGS_dump_shaders.c_str();
+  }
+  size_t hash = xe_hash64(disasm_source, xestrlena(disasm_source)); // ?
+  char file_name[XE_MAX_PATH];
+  xesnprintfa(file_name, XECOUNT(file_name),
+      "%s/gen_%.16llX.%s",
+      base_path,
+      hash,
+      type == XE_GPU_SHADER_TYPE_VERTEX ? "vs" : "ps");
+
+  if (FLAGS_dump_shaders.size()) {
+    FILE* f = fopen(file_name, "w");
+    fprintf(f, shader_source);
+    fprintf(f, "\n\n");
+    fprintf(f, "/*\n");
+    fprintf(f, disasm_source);
+    fprintf(f, " */\n");
+    fclose(f);
+  }
+
+  // Compile shader to bytecode blob.
+  ID3D10Blob* shader_blob = 0;
+  ID3D10Blob* error_blob = 0;
+  HRESULT hr = D3DCompile(
+      shader_source, strlen(shader_source),
+      file_name,
+      defines, nullptr,
+      "main",
+      type == XE_GPU_SHADER_TYPE_VERTEX ? "vs_5_0" : "ps_5_0",
+      flags1, flags2,
+      &shader_blob, &error_blob);
+  if (error_blob) {
+    char* msg = (char*)error_blob->GetBufferPointer();
+    XELOGE("D3D11: shader compile failed with %s", msg);
+  }
+  XESAFERELEASE(error_blob);
+  if (FAILED(hr)) {
+    return nullptr;
+  }
+  return shader_blob;
+}
+
+}  // namespace
+
+
+D3D11VertexShaderResource::D3D11VertexShaderResource(
+    D3D11ResourceCache* resource_cache,
+    const MemoryRange& memory_range,
+    const Info& info)
+    : VertexShaderResource(memory_range, info),
+      resource_cache_(resource_cache),
+      handle_(nullptr),
+      input_layout_(nullptr),
+      translated_src_(nullptr) {
+  xe_zero_struct(geometry_shaders_, sizeof(geometry_shaders_));
+}
+
+D3D11VertexShaderResource::~D3D11VertexShaderResource() {
+  XESAFERELEASE(handle_);
+  XESAFERELEASE(input_layout_);
+  for (int i = 0; i < XECOUNT(geometry_shaders_); ++i) {
+    delete geometry_shaders_[i];
+  }
+  xe_free(translated_src_);
+}
+
+int D3D11VertexShaderResource::Prepare(
+    const xe_gpu_program_cntl_t& program_cntl) {
+  SCOPE_profile_cpu_f("gpu");
+  if (is_prepared_ || handle_) {
+    return 0;
+  }
+
+  // TODO(benvanik): look in file based on hash/etc.
+  void* byte_code = NULL;
+  size_t byte_code_length = 0;
+
+  // Translate and compile source.
+  D3D11ShaderTranslator translator;
+  int ret = translator.TranslateVertexShader(this, program_cntl);
+  if (ret) {
+    XELOGE("D3D11: failed to translate vertex shader");
+    return ret;
+  }
+  translated_src_ = xestrdupa(translator.translated_src());
+
+  ID3D10Blob* shader_blob = D3D11ShaderCompile(
+      XE_GPU_SHADER_TYPE_VERTEX, translated_src_, disasm_src());
+  if (!shader_blob) {
+    return 1;
+  }
+  byte_code_length = shader_blob->GetBufferSize();
+  byte_code = xe_malloc(byte_code_length);
+  xe_copy_struct(
+      byte_code, shader_blob->GetBufferPointer(), byte_code_length);
+  XESAFERELEASE(shader_blob);
+
+  // Create shader.
+  HRESULT hr = resource_cache_->device()->CreateVertexShader(
+      byte_code, byte_code_length,
+      nullptr,
+      &handle_);
+  if (FAILED(hr)) {
+    XELOGE("D3D11: failed to create vertex shader");
+    xe_free(byte_code);
+    return 1;
+  }
+
+  // Create input layout.
+  ret = CreateInputLayout(byte_code, byte_code_length);
+  xe_free(byte_code);
+  if (ret) {
+    return 1;
+  }
+  is_prepared_ = true;
+  return 0;
+}
+
+int D3D11VertexShaderResource::CreateInputLayout(const void* byte_code,
+                                                 size_t byte_code_length) {
+  size_t element_count = 0;
+  const auto& inputs = buffer_inputs();
+  for (uint32_t n = 0; n < inputs.count; n++) {
+    element_count += inputs.descs[n].info.element_count;
+  }
+  if (!element_count) {
+    XELOGW("D3D11: vertex shader with zero inputs -- retaining previous values?");
+    input_layout_ = NULL;
+    return 0;
+  }
+
+  D3D11_INPUT_ELEMENT_DESC* element_descs =
+      (D3D11_INPUT_ELEMENT_DESC*)xe_alloca(
+          sizeof(D3D11_INPUT_ELEMENT_DESC) * element_count);
+  uint32_t el_index = 0;
+  for (uint32_t n = 0; n < inputs.count; n++) {
+    const auto& input = inputs.descs[n];
+    for (uint32_t m = 0; m < input.info.element_count; m++) {
+      const auto& el = input.info.elements[m];
+      uint32_t vb_slot = input.input_index;
+      DXGI_FORMAT vtx_format;
+      switch (el.format) {
+      case FMT_8_8_8_8:
+        if (el.is_normalized) {
+          vtx_format = el.is_signed ?
+              DXGI_FORMAT_R8G8B8A8_SNORM : DXGI_FORMAT_R8G8B8A8_UNORM;
+        } else {
+          vtx_format = el.is_signed ?
+              DXGI_FORMAT_R8G8B8A8_SINT : DXGI_FORMAT_R8G8B8A8_UINT;
+        }
+        break;
+      case FMT_2_10_10_10:
+        if (el.is_normalized) {
+          vtx_format = DXGI_FORMAT_R10G10B10A2_UNORM;
+        } else {
+          vtx_format = DXGI_FORMAT_R10G10B10A2_UINT;
+        }
+        break;
+      // DXGI_FORMAT_R11G11B10_FLOAT?
+      case FMT_16_16:
+        if (el.is_normalized) {
+          vtx_format = el.is_signed ?
+              DXGI_FORMAT_R16G16_SNORM : DXGI_FORMAT_R16G16_UNORM;
+        } else {
+          vtx_format = el.is_signed ?
+              DXGI_FORMAT_R16G16_SINT : DXGI_FORMAT_R16G16_UINT;
+        }
+        break;
+      case FMT_16_16_16_16:
+        if (el.is_normalized) {
+          vtx_format = el.is_signed ?
+              DXGI_FORMAT_R16G16B16A16_SNORM : DXGI_FORMAT_R16G16B16A16_UNORM;
+        } else {
+          vtx_format = el.is_signed ?
+              DXGI_FORMAT_R16G16B16A16_SINT : DXGI_FORMAT_R16G16B16A16_UINT;
+        }
+        break;
+      case FMT_16_16_FLOAT:
+        vtx_format = DXGI_FORMAT_R16G16_FLOAT;
+        break;
+      case FMT_16_16_16_16_FLOAT:
+        vtx_format = DXGI_FORMAT_R16G16B16A16_FLOAT;
+        break;
+      case FMT_32:
+        vtx_format = el.is_signed ?
+            DXGI_FORMAT_R32_SINT : DXGI_FORMAT_R32_UINT;
+        break;
+      case FMT_32_32:
+        vtx_format = el.is_signed ?
+            DXGI_FORMAT_R32G32_SINT : DXGI_FORMAT_R32G32_UINT;
+        break;
+      case FMT_32_32_32_32:
+        vtx_format = el.is_signed ?
+            DXGI_FORMAT_R32G32B32A32_SINT : DXGI_FORMAT_R32G32B32A32_UINT;
+        break;
+      case FMT_32_FLOAT:
+        vtx_format = DXGI_FORMAT_R32_FLOAT;
+        break;
+      case FMT_32_32_FLOAT:
+        vtx_format = DXGI_FORMAT_R32G32_FLOAT;
+        break;
+      case FMT_32_32_32_FLOAT:
+        vtx_format = DXGI_FORMAT_R32G32B32_FLOAT;
+        break;
+      case FMT_32_32_32_32_FLOAT:
+        vtx_format = DXGI_FORMAT_R32G32B32A32_FLOAT;
+        break;
+      default:
+        XEASSERTALWAYS();
+        break;
+      }
+      element_descs[el_index].SemanticName         = "XE_VF";
+      element_descs[el_index].SemanticIndex        = el_index;
+      element_descs[el_index].Format               = vtx_format;
+      element_descs[el_index].InputSlot            = vb_slot;
+      element_descs[el_index].AlignedByteOffset    = el.offset_words * 4;
+      element_descs[el_index].InputSlotClass       = D3D11_INPUT_PER_VERTEX_DATA;
+      element_descs[el_index].InstanceDataStepRate = 0;
+      el_index++;
+    }
+  }
+  HRESULT hr = resource_cache_->device()->CreateInputLayout(
+      element_descs,
+      (UINT)element_count,
+      byte_code, byte_code_length,
+      &input_layout_);
+  if (FAILED(hr)) {
+    XELOGE("D3D11: failed to create vertex shader input layout");
+    return 1;
+  }
+
+  return 0;
+}
+
+int D3D11VertexShaderResource::DemandGeometryShader(
+    GeometryShaderType type, D3D11GeometryShader** out_shader) {
+  if (geometry_shaders_[type]) {
+    *out_shader = geometry_shaders_[type];
+    return 0;
+  }
+
+  // Demand generate.
+  auto device = resource_cache_->device();
+  D3D11GeometryShader* shader = nullptr;
+  switch (type) {
+  case POINT_SPRITE_SHADER:
+    shader = new D3D11PointSpriteGeometryShader(device);
+    break;
+  case RECT_LIST_SHADER:
+    shader = new D3D11RectListGeometryShader(device);
+    break;
+  case QUAD_LIST_SHADER:
+    shader = new D3D11QuadListGeometryShader(device);
+    break;
+  default:
+    XEASSERTALWAYS();
+    return 1;
+  }
+  if (!shader) {
+    return 1;
+  }
+
+  if (shader->Prepare(this)) {
+    delete shader;
+    return 1;
+  }
+
+  geometry_shaders_[type] = shader;
+  *out_shader = geometry_shaders_[type];
+  return 0;
+}
+
+D3D11PixelShaderResource::D3D11PixelShaderResource(
+    D3D11ResourceCache* resource_cache,
+    const MemoryRange& memory_range,
+    const Info& info)
+    : PixelShaderResource(memory_range, info),
+      resource_cache_(resource_cache),
+      handle_(nullptr),
+      translated_src_(nullptr) {
+}
+
+D3D11PixelShaderResource::~D3D11PixelShaderResource() {
+  XESAFERELEASE(handle_);
+  xe_free(translated_src_);
+}
+
+int D3D11PixelShaderResource::Prepare(const xe_gpu_program_cntl_t& program_cntl,
+                                      VertexShaderResource* input_shader) {
+  SCOPE_profile_cpu_f("gpu");
+  if (is_prepared_ || handle_) {
+    return 0;
+  }
+
+  // TODO(benvanik): look in file based on hash/etc.
+  void* byte_code = NULL;
+  size_t byte_code_length = 0;
+
+  // Translate and compile source.
+  D3D11ShaderTranslator translator;
+  int ret = translator.TranslatePixelShader(this,
+                                            program_cntl,
+                                            input_shader->alloc_counts());
+  if (ret) {
+    XELOGE("D3D11: failed to translate pixel shader");
+    return ret;
+  }
+  translated_src_ = xestrdupa(translator.translated_src());
+
+  ID3D10Blob* shader_blob = D3D11ShaderCompile(
+      XE_GPU_SHADER_TYPE_PIXEL, translated_src_, disasm_src());
+  if (!shader_blob) {
+    return 1;
+  }
+  byte_code_length = shader_blob->GetBufferSize();
+  byte_code = xe_malloc(byte_code_length);
+  xe_copy_struct(
+      byte_code, shader_blob->GetBufferPointer(), byte_code_length);
+  XESAFERELEASE(shader_blob);
+
+  // Create shader.
+  HRESULT hr = resource_cache_->device()->CreatePixelShader(
+      byte_code, byte_code_length,
+      nullptr,
+      &handle_);
+  if (FAILED(hr)) {
+    XELOGE("D3D11: failed to create pixel shader");
+    xe_free(byte_code);
+    return 1;
+  }
+
+  xe_free(byte_code);
+  is_prepared_ = true;
+  return 0;
+}
diff --git a/src/xenia/gpu/d3d11/d3d11_shader_resource.h b/src/xenia/gpu/d3d11/d3d11_shader_resource.h
new file mode 100644
index 000000000..5c0da8242
--- /dev/null
+++ b/src/xenia/gpu/d3d11/d3d11_shader_resource.h
@@ -0,0 +1,91 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_D3D11_D3D11_SHADER_RESOURCE_H_
+#define XENIA_GPU_D3D11_D3D11_SHADER_RESOURCE_H_
+
+#include <xenia/gpu/shader_resource.h>
+#include <xenia/gpu/xenos/xenos.h>
+
+#include <d3d11.h>
+
+
+namespace xe {
+namespace gpu {
+namespace d3d11 {
+
+class D3D11GeometryShader;
+class D3D11ResourceCache;
+
+struct Output;
+typedef struct {
+  Output*       output;
+  xenos::XE_GPU_SHADER_TYPE type;
+  uint32_t      tex_fetch_index;
+} xe_gpu_translate_ctx_t;
+
+class D3D11VertexShaderResource : public VertexShaderResource {
+public:
+  D3D11VertexShaderResource(D3D11ResourceCache* resource_cache,
+                            const MemoryRange& memory_range,
+                            const Info& info);
+  ~D3D11VertexShaderResource() override;
+
+  void* handle() const override { return handle_; }
+  ID3D11InputLayout* input_layout() const { return input_layout_; }
+  const char* translated_src() const { return translated_src_; }
+
+  int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl) override;
+
+  enum GeometryShaderType {
+    POINT_SPRITE_SHADER,
+    RECT_LIST_SHADER,
+    QUAD_LIST_SHADER,
+    MAX_GEOMETRY_SHADER_TYPE,  // keep at the end
+  };
+  int DemandGeometryShader(GeometryShaderType type,
+                           D3D11GeometryShader** out_shader);
+
+private:
+  int CreateInputLayout(const void* byte_code, size_t byte_code_length);
+
+  D3D11ResourceCache* resource_cache_;
+  ID3D11VertexShader* handle_;
+  ID3D11InputLayout* input_layout_;
+  D3D11GeometryShader* geometry_shaders_[MAX_GEOMETRY_SHADER_TYPE];
+  char* translated_src_;
+};
+
+
+class D3D11PixelShaderResource : public PixelShaderResource {
+public:
+  D3D11PixelShaderResource(D3D11ResourceCache* resource_cache,
+                           const MemoryRange& memory_range,
+                           const Info& info);
+  ~D3D11PixelShaderResource() override;
+
+  void* handle() const override { return handle_; }
+  const char* translated_src() const { return translated_src_; }
+
+  int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl,
+              VertexShaderResource* vertex_shader) override;
+
+private:
+  D3D11ResourceCache* resource_cache_;
+  ID3D11PixelShader* handle_;
+  char* translated_src_;
+};
+
+
+}  // namespace d3d11
+}  // namespace gpu
+}  // namespace xe
+
+
+#endif  // XENIA_GPU_D3D11_D3D11_SHADER_RESOURCE_H_
diff --git a/src/xenia/gpu/d3d11/d3d11_shader_translator.cc b/src/xenia/gpu/d3d11/d3d11_shader_translator.cc
new file mode 100644
index 000000000..dde024356
--- /dev/null
+++ b/src/xenia/gpu/d3d11/d3d11_shader_translator.cc
@@ -0,0 +1,1625 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include <xenia/gpu/d3d11/d3d11_shader_translator.h>
+
+#include <xenia/gpu/gpu-private.h>
+#include <xenia/gpu/d3d11/d3d11_geometry_shader.h>
+#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
+#include <xenia/gpu/xenos/ucode.h>
+
+
+using namespace xe;
+using namespace xe::gpu;
+using namespace xe::gpu::d3d11;
+using namespace xe::gpu::xenos;
+
+
+namespace {
+
+const char* GetFormatTypeName(const VertexBufferResource::DeclElement& el) {
+  switch (el.format) {
+  case FMT_32:
+    return el.is_signed ? "int" : "uint";
+  case FMT_32_FLOAT:
+    return "float";
+  case FMT_16_16:
+  case FMT_32_32:
+    if (el.is_normalized) {
+      return el.is_signed ? "snorm float2" : "unorm float2";
+    } else {
+      return el.is_signed ? "int2" : "uint2";
+    }
+  case FMT_16_16_FLOAT:
+  case FMT_32_32_FLOAT:
+    return "float2";
+  case FMT_10_11_11:
+  case FMT_11_11_10:
+    return "int3"; // ?
+  case FMT_32_32_32_FLOAT:
+    return "float3";
+  case FMT_8_8_8_8:
+  case FMT_2_10_10_10:
+  case FMT_16_16_16_16:
+  case FMT_32_32_32_32:
+    if (el.is_normalized) {
+      return el.is_signed ? "snorm float4" : "unorm float4";
+    } else {
+      return el.is_signed ? "int4" : "uint4";
+    }
+  case FMT_16_16_16_16_FLOAT:
+  case FMT_32_32_32_32_FLOAT:
+    return "float4";
+  default:
+    XELOGE("Unknown vertex format: %d", el.format);
+    XEASSERTALWAYS();
+    return "float4";
+  }
+}
+
+}  // anonymous namespace
+
+D3D11ShaderTranslator::D3D11ShaderTranslator()
+    : capacity_(kCapacity), offset_(0) {
+  buffer_[0] = 0;
+}
+
+int D3D11ShaderTranslator::TranslateVertexShader(
+    VertexShaderResource* vertex_shader,
+    const xe_gpu_program_cntl_t& program_cntl) {
+  SCOPE_profile_cpu_f("gpu");
+
+  type_ = XE_GPU_SHADER_TYPE_VERTEX;
+  tex_fetch_index_ = 0;
+  dwords_ = vertex_shader->dwords();
+
+  // Add constants buffers.
+  // We could optimize this by only including used buffers, but the compiler
+  // seems to do a good job of doing this for us.
+  // It also does read detection, so c[512] can end up c[4] in the asm -
+  // instead of doing this optimization ourselves we could maybe just query
+  // this from the compiler.
+  append(
+    "cbuffer float_consts : register(b0) {\n"
+    "  float4 c[512];\n"
+    "};\n");
+  // TODO(benvanik): add bool/loop constants.
+
+  AppendTextureHeader(vertex_shader->sampler_inputs());
+
+  // Transform utilities. We adjust the output position in various ways
+  // as we can't do this via D3D11 APIs.
+  append(
+    "cbuffer vs_consts : register(b3) {\n"
+    "  float4 window;\n"              // x,y,w,h
+    "  float4 viewport_z_enable;\n"   // min,(max - min),?,enabled
+    "  float4 viewport_size;\n"       // x,y,w,h
+    "};"
+    "float4 applyViewport(float4 pos) {\n"
+    "  if (viewport_z_enable.w) {\n"
+    //"    pos.x = (pos.x + 1) * viewport_size.z * 0.5 + viewport_size.x;\n"
+    //"    pos.y = (1 - pos.y) * viewport_size.w * 0.5 + viewport_size.y;\n"
+    //"    pos.z = viewport_z_enable.x + pos.z * viewport_z_enable.y;\n"
+    // w?
+    "  } else {\n"
+    "    pos.xy = pos.xy / float2(window.z / 2.0, -window.w / 2.0) + float2(-1.0, 1.0);\n"
+    "    pos.zw = float2(0.0, 1.0);\n"
+    "  }\n"
+    "  pos.xy += window.xy;\n"
+    "  return pos;\n"
+    "}\n");
+
+  // Add vertex shader input.
+  append(
+    "struct VS_INPUT {\n");
+  uint32_t el_index = 0;
+  const auto& buffer_inputs = vertex_shader->buffer_inputs();
+  for (uint32_t n = 0; n < buffer_inputs.count; n++) {
+    const auto& input = buffer_inputs.descs[n];
+    for (uint32_t m = 0; m < input.info.element_count; m++) {
+      const auto& el = input.info.elements[m];
+      const char* type_name = GetFormatTypeName(el);
+      const auto& fetch = el.vtx_fetch;
+      uint32_t fetch_slot = fetch.const_index * 3 + fetch.const_index_sel;
+      append(
+        "  %s vf%u_%d : XE_VF%u;\n",
+        type_name, fetch_slot, fetch.offset, el_index);
+      el_index++;
+    }
+  }
+  append(
+    "};\n");
+
+  // Add vertex shader output (pixel shader input).
+  const auto& alloc_counts = vertex_shader->alloc_counts();
+  append(
+    "struct VS_OUTPUT {\n");
+  if (alloc_counts.positions) {
+    XEASSERT(alloc_counts.positions == 1);
+    append(
+      "  float4 oPos : SV_POSITION;\n");
+  }
+  if (alloc_counts.params) {
+    append(
+      "  float4 o[%d] : XE_O;\n",
+      kMaxInterpolators);
+  }
+  if (alloc_counts.point_size) {
+    append(
+      "  float4 oPointSize : PSIZE;\n");
+  }
+  append(
+    "};\n");
+
+  // Vertex shader main() header.
+  append(
+    "VS_OUTPUT main(VS_INPUT i) {\n"
+    "  VS_OUTPUT o;\n");
+
+  // Always write position, as some shaders seem to only write certain values.
+  append(
+    "  o.oPos = float4(0.0, 0.0, 0.0, 0.0);\n");
+  if (alloc_counts.point_size) {
+    append(
+      "  o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n");
+  }
+
+  // TODO(benvanik): remove this, if possible (though the compiler may be smart
+  //     enough to do it for us).
+  if (alloc_counts.params) {
+    for (uint32_t n = 0; n < kMaxInterpolators; n++) {
+      append(
+        "  o.o[%d] = float4(0.0, 0.0, 0.0, 0.0);\n", n);
+    }
+  }
+
+  // Add temporaries for any registers we may use.
+  uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
+  for (uint32_t n = 0; n <= temp_regs; n++) {
+    append(
+      "  float4 r%d = c[%d];\n", n, n);
+  }
+  append("  float4 t;\n");
+
+  // Execute blocks.
+  const auto& execs = vertex_shader->execs();
+  for (auto it = execs.begin(); it != execs.end(); ++it) {
+    const instr_cf_exec_t& cf = *it;
+    // TODO(benvanik): figure out how sequences/jmps/loops/etc work.
+    if (TranslateExec(cf)) {
+      return 1;
+    }
+  }
+
+  // main footer.
+  append(
+    "  o.oPos = applyViewport(o.oPos);\n"
+    "  return o;\n"
+    "};\n");
+
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslatePixelShader(
+    PixelShaderResource* pixel_shader,
+    const xe_gpu_program_cntl_t& program_cntl,
+    const VertexShaderResource::AllocCounts& alloc_counts) {
+  SCOPE_profile_cpu_f("gpu");
+
+  // We need an input VS to make decisions here.
+  // TODO(benvanik): do we need to pair VS/PS up and store the combination?
+  // If the same PS is used with different VS that output different amounts
+  // (and less than the number of required registers), things may die.
+
+  type_ = XE_GPU_SHADER_TYPE_PIXEL;
+  tex_fetch_index_ = 0;
+  dwords_ = pixel_shader->dwords();
+
+  // Add constants buffers.
+  // We could optimize this by only including used buffers, but the compiler
+  // seems to do a good job of doing this for us.
+  // It also does read detection, so c[512] can end up c[4] in the asm -
+  // instead of doing this optimization ourselves we could maybe just query
+  // this from the compiler.
+  append(
+    "cbuffer float_consts : register(b0) {\n"
+    "  float4 c[512];\n"
+    "};\n");
+  // TODO(benvanik): add bool/loop constants.
+
+  AppendTextureHeader(pixel_shader->sampler_inputs());
+
+  // Add vertex shader output (pixel shader input).
+  append(
+    "struct VS_OUTPUT {\n");
+  if (alloc_counts.positions) {
+    XEASSERT(alloc_counts.positions == 1);
+    append(
+      "  float4 oPos : SV_POSITION;\n");
+  }
+  if (alloc_counts.params) {
+    append(
+      "  float4 o[%d] : XE_O;\n",
+      kMaxInterpolators);
+  }
+  append(
+    "};\n");
+
+  // Add pixel shader output.
+  append(
+    "struct PS_OUTPUT {\n");
+  for (uint32_t n = 0; n < alloc_counts.params; n++) {
+    append(
+      "  float4 oC%d   : SV_TARGET%d;\n", n, n);
+    if (program_cntl.ps_export_depth) {
+      // Is this per render-target?
+      append(
+        "  float oD%d   : SV_DEPTH%d;\n", n, n);
+    }
+  }
+  append(
+    "};\n");
+
+  // Pixel shader main() header.
+  append(
+    "PS_OUTPUT main(VS_OUTPUT i) {\n"
+    "  PS_OUTPUT o;\n");
+
+  // Add temporary registers.
+  uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
+  for (uint32_t n = 0; n <= MAX(15, temp_regs); n++) {
+    append(
+      "  float4 r%d = c[%d];\n", n, n);
+  }
+  append("  float4 t;\n");
+
+  // Bring registers local.
+  if (alloc_counts.params) {
+    for (uint32_t n = 0; n < kMaxInterpolators; n++) {
+      append(
+        "  r%d = i.o[%d];\n", n, n);
+    }
+  }
+
+  // Execute blocks.
+  const auto& execs = pixel_shader->execs();
+  for (auto it = execs.begin(); it != execs.end(); ++it) {
+    const instr_cf_exec_t& cf = *it;
+    // TODO(benvanik): figure out how sequences/jmps/loops/etc work.
+    if (TranslateExec(cf)) {
+      return 1;
+    }
+  }
+
+  // main footer.
+  append(
+    "  return o;\n"
+    "}\n");
+
+  return 0;
+}
+
+void D3D11ShaderTranslator::AppendTextureHeader(
+    const ShaderResource::SamplerInputs& sampler_inputs) {
+  bool fetch_setup[32] = { false };
+
+  // 1 texture per constant slot, 1 sampler per fetch.
+  for (uint32_t n = 0; n < sampler_inputs.count; n++) {
+    const auto& input = sampler_inputs.descs[n];
+    const auto& fetch = input.tex_fetch;
+
+    // Add texture, if needed.
+    if (!fetch_setup[fetch.const_idx]) {
+      fetch_setup[fetch.const_idx] = true;
+      const char* texture_type = NULL;
+      switch (fetch.dimension) {
+      case DIMENSION_1D:
+        texture_type = "Texture1D";
+        break;
+      default:
+      case DIMENSION_2D:
+        texture_type = "Texture2D";
+        break;
+      case DIMENSION_3D:
+        texture_type = "Texture3D";
+        break;
+      case DIMENSION_CUBE:
+        texture_type = "TextureCube";
+        break;
+      }
+      append("%s x_texture_%d;\n", texture_type, fetch.const_idx);
+    }
+
+    // Add sampler.
+    append("SamplerState x_sampler_%d;\n", n);
+  }
+}
+
+namespace {
+
+static const char chan_names[] = {
+  'x', 'y', 'z', 'w',
+  // these only apply to FETCH dst's, and we shouldn't be using them:
+  '0', '1', '?', '_',
+};
+
+}  // namespace
+
+void D3D11ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type,
+                                         uint32_t swiz, uint32_t negate,
+                                         uint32_t abs) {
+  if (negate) {
+    append("-");
+  }
+  if (abs) {
+    append("abs(");
+  }
+  if (type) {
+    // Register.
+    append("r%u", num);
+  } else {
+    // Constant.
+    append("c[%u]", num);
+  }
+  if (swiz) {
+    append(".");
+    for (int i = 0; i < 4; i++) {
+      append("%c", chan_names[(swiz + i) & 0x3]);
+      swiz >>= 2;
+    }
+  }
+  if (abs) {
+    append(")");
+  }
+}
+
+void D3D11ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) {
+  if (!dst_exp) {
+    // Register.
+    append("r%u", num);
+  } else {
+    // Export.
+    switch (type_) {
+    case XE_GPU_SHADER_TYPE_VERTEX:
+      switch (num) {
+      case 62:
+        append("o.oPos");
+        break;
+      case 63:
+        append("o.oPointSize");
+        break;
+      default:
+        // Varying.
+        append("o.o[%u]", num);;
+        break;
+      }
+      break;
+    case XE_GPU_SHADER_TYPE_PIXEL:
+      switch (num) {
+      case 0:
+        append("o.oC0");
+        break;
+      default:
+        // TODO(benvanik): other render targets?
+        // TODO(benvanik): depth?
+        XEASSERTALWAYS();
+        break;
+      }
+      break;
+    }
+  }
+}
+
+void D3D11ShaderTranslator::AppendDestReg(uint32_t num, uint32_t mask,
+                                          uint32_t dst_exp) {
+  if (mask != 0xF) {
+    // If masking, store to a temporary variable and clean it up later.
+    append("t");
+  } else {
+    // Store directly to output.
+    AppendDestRegName(num, dst_exp);
+  }
+}
+
+void D3D11ShaderTranslator::AppendDestRegPost(uint32_t num, uint32_t mask,
+                                              uint32_t dst_exp) {
+  if (mask != 0xF) {
+    // Masking.
+    append("  ");
+    AppendDestRegName(num, dst_exp);
+    append(" = float4(");
+    for (int i = 0; i < 4; i++) {
+      // TODO(benvanik): mask out values? mix in old value as temp?
+      // append("%c", (mask & 0x1) ? chan_names[i] : 'w');
+      if (!(mask & 0x1)) {
+        AppendDestRegName(num, dst_exp);
+      } else {
+        append("t");
+      }
+      append(".%c", chan_names[i]);
+      mask >>= 1;
+      if (i < 3) {
+        append(", ");
+      }
+    }
+    append(");\n");
+  }
+}
+
+void D3D11ShaderTranslator::PrintSrcReg(uint32_t num, uint32_t type,
+                                        uint32_t swiz, uint32_t negate,
+                                        uint32_t abs) {
+  if (negate) {
+    append("-");
+  }
+  if (abs) {
+    append("|");
+  }
+  append("%c%u", type ? 'R' : 'C', num);
+  if (swiz) {
+    append(".");
+    for (int i = 0; i < 4; i++) {
+      append("%c", chan_names[(swiz + i) & 0x3]);
+      swiz >>= 2;
+    }
+  }
+  if (abs) {
+    append("|");
+  }
+}
+
+void D3D11ShaderTranslator::PrintDstReg(uint32_t num, uint32_t mask,
+                                        uint32_t dst_exp) {
+  append("%s%u", dst_exp ? "export" : "R", num);
+  if (mask != 0xf) {
+    append(".");
+    for (int i = 0; i < 4; i++) {
+      append("%c", (mask & 0x1) ? chan_names[i] : '_');
+      mask >>= 1;
+    }
+  }
+}
+
+void D3D11ShaderTranslator::PrintExportComment(uint32_t num) {
+  const char *name = NULL;
+  switch (type_) {
+  case XE_GPU_SHADER_TYPE_VERTEX:
+    switch (num) {
+    case 62: name = "gl_Position";  break;
+    case 63: name = "gl_PointSize"; break;
+    }
+    break;
+  case XE_GPU_SHADER_TYPE_PIXEL:
+    switch (num) {
+    case 0:  name = "gl_FragColor"; break;
+    }
+    break;
+  }
+  /* if we had a symbol table here, we could look
+   * up the name of the varying..
+   */
+  if (name) {
+    append("\t; %s", name);
+  }
+}
+
+int D3D11ShaderTranslator::TranslateALU_ADDv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  append("(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(" + ");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(")");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_MULv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  append("(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(" * ");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(")");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_MAXv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  if (alu.src1_reg == alu.src2_reg &&
+      alu.src1_sel == alu.src2_sel &&
+      alu.src1_swiz == alu.src2_swiz &&
+      alu.src1_reg_negate == alu.src2_reg_negate &&
+      alu.src1_reg_abs == alu.src2_reg_abs) {
+    // This is a mov.
+    AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  } else {
+    append("max(");
+    AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+    append(", ");
+    AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+    append(")");
+  }
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_MINv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  append("min(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(", ");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(")");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_SETXXv(const instr_alu_t& alu, const char* op) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  append("float4((");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(").x %s (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(").x ? 1.0 : 0.0, (");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(").y %s (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(").y ? 1.0 : 0.0, (");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(").z %s (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(").z ? 1.0 : 0.0, (");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(").w %s (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(").w ? 1.0 : 0.0)");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+int D3D11ShaderTranslator::TranslateALU_SETEv(const instr_alu_t& alu) {
+  return TranslateALU_SETXXv(alu, "==");
+}
+int D3D11ShaderTranslator::TranslateALU_SETGTv(const instr_alu_t& alu) {
+  return TranslateALU_SETXXv(alu, ">");
+}
+int D3D11ShaderTranslator::TranslateALU_SETGTEv(const instr_alu_t& alu) {
+  return TranslateALU_SETXXv(alu, ">=");
+}
+int D3D11ShaderTranslator::TranslateALU_SETNEv(const instr_alu_t& alu) {
+  return TranslateALU_SETXXv(alu, "!=");
+}
+
+int D3D11ShaderTranslator::TranslateALU_FRACv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  append("frac(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(")");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_TRUNCv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  append("trunc(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(")");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_FLOORv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  append("floor(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(")");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_MULADDv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  append("mad(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(", ");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(", ");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(")");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_CNDXXv(const instr_alu_t& alu, const char* op) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  // TODO(benvanik): check argument order - could be 3 as compare and 1 and 2 as values.
+  append("float4((");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(").x %s 0.0 ? (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(").x : (");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(").x, (");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(").y %s 0.0 ? (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(").y : (");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(").y, (");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(").z %s 0.0 ? (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(").z : (");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(").z, (");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(").w %s 0.0 ? (", op);
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(").w : (");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(").w)");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+int D3D11ShaderTranslator::TranslateALU_CNDEv(const instr_alu_t& alu) {
+  return TranslateALU_CNDXXv(alu, "==");
+}
+int D3D11ShaderTranslator::TranslateALU_CNDGTEv(const instr_alu_t& alu) {
+  return TranslateALU_CNDXXv(alu, ">=");
+}
+int D3D11ShaderTranslator::TranslateALU_CNDGTv(const instr_alu_t& alu) {
+  return TranslateALU_CNDXXv(alu, ">");
+}
+
+int D3D11ShaderTranslator::TranslateALU_DOT4v(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  append("dot(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(", ");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(")");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_DOT3v(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  append("dot(float4(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(").xyz, float4(");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(").xyz)");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  append("dot(float4(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(").xy, float4(");
+  AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs);
+  append(").xy) + ");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(".x");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+
+// CUBEv
+
+int D3D11ShaderTranslator::TranslateALU_MAX4v(const instr_alu_t& alu) {
+  AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.vector_clamp) {
+    append("saturate(");
+  }
+  append("max(");
+  append("max(");
+  append("max(");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(".x, ");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(".y), ");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(".z), ");
+  AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs);
+  append(".w)");
+  if (alu.vector_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
+  return 0;
+}
+
+// ...
+
+int D3D11ShaderTranslator::TranslateALU_MAXs(const instr_alu_t& alu) {
+  AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.scalar_clamp) {
+    append("saturate(");
+  }
+  if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) {
+    // This is a mov.
+    AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+  } else {
+    append("max(");
+    AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+    append(".x, ");
+    AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+    append(".y).xxxx");
+  }
+  if (alu.scalar_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_MINs(const instr_alu_t& alu) {
+  AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.scalar_clamp) {
+    append("saturate(");
+  }
+  append("min(");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(".x, ");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(".y).xxxx");
+  if (alu.scalar_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_SETXXs(const instr_alu_t& alu, const char* op) {
+  AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.scalar_clamp) {
+    append("saturate(");
+  }
+  append("((");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(".x %s 0.0) ? 1.0 : 0.0).xxxx", op);
+  if (alu.scalar_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  return 0;
+}
+int D3D11ShaderTranslator::TranslateALU_SETEs(const instr_alu_t& alu) {
+  return TranslateALU_SETXXs(alu, "==");
+}
+int D3D11ShaderTranslator::TranslateALU_SETGTs(const instr_alu_t& alu) {
+  return TranslateALU_SETXXs(alu, ">");
+}
+int D3D11ShaderTranslator::TranslateALU_SETGTEs(const instr_alu_t& alu) {
+  return TranslateALU_SETXXs(alu, ">=");
+}
+int D3D11ShaderTranslator::TranslateALU_SETNEs(const instr_alu_t& alu) {
+  return TranslateALU_SETXXs(alu, "!=");
+}
+
+int D3D11ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) {
+  AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.scalar_clamp) {
+    append("saturate(");
+  }
+  append("(1.0 / ");
+  AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(")");
+  if (alu.scalar_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) {
+  AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.scalar_clamp) {
+    append("saturate(");
+  }
+  uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
+  uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
+  uint32_t swiz_b = (src3_swiz & 0x3);
+  uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
+  append("(");
+  AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(".%c * ", chan_names[swiz_a]);
+  AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(".%c", chan_names[swiz_b]);
+  append(").xxxx");
+  if (alu.scalar_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  return 0;
+}
+int D3D11ShaderTranslator::TranslateALU_MUL_CONST_1(const instr_alu_t& alu) {
+  return TranslateALU_MUL_CONST_0(alu);
+}
+
+int D3D11ShaderTranslator::TranslateALU_ADD_CONST_0(const instr_alu_t& alu) {
+  AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.scalar_clamp) {
+    append("saturate(");
+  }
+  uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
+  uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
+  uint32_t swiz_b = (src3_swiz & 0x3);
+  uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
+  append("(");
+  AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(".%c + ", chan_names[swiz_a]);
+  AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(".%c", chan_names[swiz_b]);
+  append(").xxxx");
+  if (alu.scalar_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  return 0;
+}
+int D3D11ShaderTranslator::TranslateALU_ADD_CONST_1(const instr_alu_t& alu) {
+  return TranslateALU_ADD_CONST_0(alu);
+}
+
+int D3D11ShaderTranslator::TranslateALU_SUB_CONST_0(const instr_alu_t& alu) {
+  AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  append(" = ");
+  if (alu.scalar_clamp) {
+    append("saturate(");
+  }
+  uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
+  uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
+  uint32_t swiz_b = (src3_swiz & 0x3);
+  uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1);
+  append("(");
+  AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(".%c - ", chan_names[swiz_a]);
+  AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs);
+  append(".%c", chan_names[swiz_b]);
+  append(").xxxx");
+  if (alu.scalar_clamp) {
+    append(")");
+  }
+  append(";\n");
+  AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data);
+  return 0;
+}
+int D3D11ShaderTranslator::TranslateALU_SUB_CONST_1(const instr_alu_t& alu) {
+  return TranslateALU_SUB_CONST_0(alu);
+}
+
+namespace {
+
+typedef int (D3D11ShaderTranslator::*TranslateFn)(const instr_alu_t& alu);
+typedef struct {
+  uint32_t num_srcs;
+  const char* name;
+  TranslateFn   fn;
+} TranslateInfo;
+#define ALU_INSTR(opc, num_srcs) \
+    { num_srcs, #opc, nullptr }
+#define ALU_INSTR_IMPL(opc, num_srcs) \
+    { num_srcs, #opc, &D3D11ShaderTranslator::TranslateALU_##opc }
+
+}  // namespace
+
+int D3D11ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) {
+  static TranslateInfo vector_alu_instrs[0x20] = {
+    ALU_INSTR_IMPL(ADDv,              2),  // 0
+    ALU_INSTR_IMPL(MULv,              2),  // 1
+    ALU_INSTR_IMPL(MAXv,              2),  // 2
+    ALU_INSTR_IMPL(MINv,              2),  // 3
+    ALU_INSTR_IMPL(SETEv,             2),  // 4
+    ALU_INSTR_IMPL(SETGTv,            2),  // 5
+    ALU_INSTR_IMPL(SETGTEv,           2),  // 6
+    ALU_INSTR_IMPL(SETNEv,            2),  // 7
+    ALU_INSTR_IMPL(FRACv,             1),  // 8
+    ALU_INSTR_IMPL(TRUNCv,            1),  // 9
+    ALU_INSTR_IMPL(FLOORv,            1),  // 10
+    ALU_INSTR_IMPL(MULADDv,           3),  // 11
+    ALU_INSTR_IMPL(CNDEv,             3),  // 12
+    ALU_INSTR_IMPL(CNDGTEv,           3),  // 13
+    ALU_INSTR_IMPL(CNDGTv,            3),  // 14
+    ALU_INSTR_IMPL(DOT4v,             2),  // 15
+    ALU_INSTR_IMPL(DOT3v,             2),  // 16
+    ALU_INSTR_IMPL(DOT2ADDv,          3),  // 17 -- ???
+    ALU_INSTR(CUBEv,                  2),  // 18
+    ALU_INSTR_IMPL(MAX4v,             1),  // 19
+    ALU_INSTR(PRED_SETE_PUSHv,        2),  // 20
+    ALU_INSTR(PRED_SETNE_PUSHv,       2),  // 21
+    ALU_INSTR(PRED_SETGT_PUSHv,       2),  // 22
+    ALU_INSTR(PRED_SETGTE_PUSHv,      2),  // 23
+    ALU_INSTR(KILLEv,                 2),  // 24
+    ALU_INSTR(KILLGTv,                2),  // 25
+    ALU_INSTR(KILLGTEv,               2),  // 26
+    ALU_INSTR(KILLNEv,                2),  // 27
+    ALU_INSTR(DSTv,                   2),  // 28
+    ALU_INSTR(MOVAv,                  1),  // 29
+  };
+  static TranslateInfo scalar_alu_instrs[0x40] = {
+    ALU_INSTR(ADDs,                   1),  // 0
+    ALU_INSTR(ADD_PREVs,              1),  // 1
+    ALU_INSTR(MULs,                   1),  // 2
+    ALU_INSTR(MUL_PREVs,              1),  // 3
+    ALU_INSTR(MUL_PREV2s,             1),  // 4
+    ALU_INSTR_IMPL(MAXs,              1),  // 5
+    ALU_INSTR_IMPL(MINs,              1),  // 6
+    ALU_INSTR_IMPL(SETEs,             1),  // 7
+    ALU_INSTR_IMPL(SETGTs,            1),  // 8
+    ALU_INSTR_IMPL(SETGTEs,           1),  // 9
+    ALU_INSTR_IMPL(SETNEs,            1),  // 10
+    ALU_INSTR(FRACs,                  1),  // 11
+    ALU_INSTR(TRUNCs,                 1),  // 12
+    ALU_INSTR(FLOORs,                 1),  // 13
+    ALU_INSTR(EXP_IEEE,               1),  // 14
+    ALU_INSTR(LOG_CLAMP,              1),  // 15
+    ALU_INSTR(LOG_IEEE,               1),  // 16
+    ALU_INSTR(RECIP_CLAMP,            1),  // 17
+    ALU_INSTR(RECIP_FF,               1),  // 18
+    ALU_INSTR_IMPL(RECIP_IEEE,        1),  // 19
+    ALU_INSTR(RECIPSQ_CLAMP,          1),  // 20
+    ALU_INSTR(RECIPSQ_FF,             1),  // 21
+    ALU_INSTR(RECIPSQ_IEEE,           1),  // 22
+    ALU_INSTR(MOVAs,                  1),  // 23
+    ALU_INSTR(MOVA_FLOORs,            1),  // 24
+    ALU_INSTR(SUBs,                   1),  // 25
+    ALU_INSTR(SUB_PREVs,              1),  // 26
+    ALU_INSTR(PRED_SETEs,             1),  // 27
+    ALU_INSTR(PRED_SETNEs,            1),  // 28
+    ALU_INSTR(PRED_SETGTs,            1),  // 29
+    ALU_INSTR(PRED_SETGTEs,           1),  // 30
+    ALU_INSTR(PRED_SET_INVs,          1),  // 31
+    ALU_INSTR(PRED_SET_POPs,          1),  // 32
+    ALU_INSTR(PRED_SET_CLRs,          1),  // 33
+    ALU_INSTR(PRED_SET_RESTOREs,      1),  // 34
+    ALU_INSTR(KILLEs,                 1),  // 35
+    ALU_INSTR(KILLGTs,                1),  // 36
+    ALU_INSTR(KILLGTEs,               1),  // 37
+    ALU_INSTR(KILLNEs,                1),  // 38
+    ALU_INSTR(KILLONEs,               1),  // 39
+    ALU_INSTR(SQRT_IEEE,              1),  // 40
+    { 0, 0, false },
+    ALU_INSTR_IMPL(MUL_CONST_0,       2),  // 42
+    ALU_INSTR_IMPL(MUL_CONST_1,       2),  // 43
+    ALU_INSTR_IMPL(ADD_CONST_0,       2),  // 44
+    ALU_INSTR_IMPL(ADD_CONST_1,       2),  // 45
+    ALU_INSTR_IMPL(SUB_CONST_0,       2),  // 46
+    ALU_INSTR_IMPL(SUB_CONST_1,       2),  // 47
+    ALU_INSTR(SIN,                    1),  // 48
+    ALU_INSTR(COS,                    1),  // 49
+    ALU_INSTR(RETAIN_PREV,            1),  // 50
+  };
+#undef ALU_INSTR
+#undef ALU_INSTR_IMPL
+
+  if (!alu->scalar_write_mask && !alu->vector_write_mask) {
+    append("  //   <nop>\n");
+    return 0;
+  }
+
+  if (alu->vector_write_mask) {
+    // Disassemble vector op.
+    const auto& iv = vector_alu_instrs[alu->vector_opc];
+    append("  //   %sALU:\t", sync ? "(S)" : "   ");
+    append("%s", iv.name);
+    if (alu->pred_select & 0x2) {
+      // seems to work similar to conditional execution in ARM instruction
+      // set, so let's use a similar syntax for now:
+      append((alu->pred_select & 0x1) ? "EQ" : "NE");
+    }
+    append("\t");
+    PrintDstReg(alu->vector_dest, alu->vector_write_mask, alu->export_data);
+    append(" = ");
+    if (iv.num_srcs == 3) {
+      PrintSrcReg(alu->src3_reg, alu->src3_sel, alu->src3_swiz,
+                  alu->src3_reg_negate, alu->src3_reg_abs);
+      append(", ");
+    }
+    PrintSrcReg(alu->src1_reg, alu->src1_sel, alu->src1_swiz,
+                alu->src1_reg_negate, alu->src1_reg_abs);
+    if (iv.num_srcs > 1) {
+      append(", ");
+      PrintSrcReg(alu->src2_reg, alu->src2_sel, alu->src2_swiz,
+                  alu->src2_reg_negate, alu->src2_reg_abs);
+    }
+    if (alu->vector_clamp) {
+      append(" CLAMP");
+    }
+    if (alu->export_data) {
+      PrintExportComment(alu->vector_dest);
+    }
+    append("\n");
+
+    // Translate vector op.
+    if (iv.fn) {
+      append("  ");
+      if ((this->*iv.fn)(*alu)) {
+        return 1;
+      }
+    } else {
+      append("  // <UNIMPLEMENTED>\n");
+    }
+  }
+
+  if (alu->scalar_write_mask || !alu->vector_write_mask) {
+    // 2nd optional scalar op:
+
+    // Disassemble scalar op.
+    const auto& is = scalar_alu_instrs[alu->scalar_opc];
+    append("  //  ");
+    append("\t");
+    if (is.name) {
+      append("\t    \t%s\t", is.name);
+    } else {
+      append("\t    \tOP(%u)\t", alu->scalar_opc);
+    }
+    PrintDstReg(alu->scalar_dest, alu->scalar_write_mask, alu->export_data);
+    append(" = ");
+    if (is.num_srcs == 2) {
+      // ADD_CONST_0 dest, [const], [reg]
+      uint32_t src3_swiz = alu->src3_swiz & ~0x3C;
+      uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
+      uint32_t swiz_b = (src3_swiz & 0x3);
+      PrintSrcReg(alu->src3_reg, 0, 0,
+                  alu->src3_reg_negate, alu->src3_reg_abs);
+      append(".%c", chan_names[swiz_a]);
+      append(", ");
+      uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | (alu->src3_sel << 1);
+      PrintSrcReg(reg2, 1, 0,
+                  alu->src3_reg_negate, alu->src3_reg_abs);
+      append(".%c", chan_names[swiz_b]);
+    } else {
+      PrintSrcReg(alu->src3_reg, alu->src3_sel, alu->src3_swiz,
+                  alu->src3_reg_negate, alu->src3_reg_abs);
+    }
+    if (alu->scalar_clamp) {
+      append(" CLAMP");
+    }
+    if (alu->export_data) {
+      PrintExportComment(alu->scalar_dest);
+    }
+    append("\n");
+
+    // Translate scalar op.
+    if (is.fn) {
+      append("  ");
+      if ((this->*is.fn)(*alu)) {
+        return 1;
+      }
+    } else {
+      append("  // <UNIMPLEMENTED>\n");
+    }
+  }
+
+  return 0;
+}
+
+void D3D11ShaderTranslator::PrintDestFecth(uint32_t dst_reg,
+                                           uint32_t dst_swiz) {
+  append("\tR%u.", dst_reg);
+  for (int i = 0; i < 4; i++) {
+    append("%c", chan_names[dst_swiz & 0x7]);
+    dst_swiz >>= 3;
+  }
+}
+
+void D3D11ShaderTranslator::AppendFetchDest(uint32_t dst_reg,
+                                            uint32_t dst_swiz) {
+  append("r%u.", dst_reg);
+  for (int i = 0; i < 4; i++) {
+    append("%c", chan_names[dst_swiz & 0x7]);
+    dst_swiz >>= 3;
+  }
+}
+
+int D3D11ShaderTranslator::GetFormatComponentCount(uint32_t format) {
+  switch (format) {
+  case FMT_32:
+  case FMT_32_FLOAT:
+    return 1;
+  case FMT_16_16:
+  case FMT_16_16_FLOAT:
+  case FMT_32_32:
+  case FMT_32_32_FLOAT:
+    return 2;
+  case FMT_10_11_11:
+  case FMT_11_11_10:
+  case FMT_32_32_32_FLOAT:
+    return 3;
+  case FMT_8_8_8_8:
+  case FMT_2_10_10_10:
+  case FMT_16_16_16_16:
+  case FMT_16_16_16_16_FLOAT:
+  case FMT_32_32_32_32:
+  case FMT_32_32_32_32_FLOAT:
+    return 4;
+  default:
+    XELOGE("Unknown vertex format: %d", format);
+    XEASSERTALWAYS();
+    return 4;
+  }
+}
+
+int D3D11ShaderTranslator::TranslateExec(const instr_cf_exec_t& cf) {
+  static const struct {
+    const char *name;
+  } cf_instructions[] = {
+  #define INSTR(opc, fxn) { #opc }
+      INSTR(NOP, print_cf_nop),
+      INSTR(EXEC, print_cf_exec),
+      INSTR(EXEC_END, print_cf_exec),
+      INSTR(COND_EXEC, print_cf_exec),
+      INSTR(COND_EXEC_END, print_cf_exec),
+      INSTR(COND_PRED_EXEC, print_cf_exec),
+      INSTR(COND_PRED_EXEC_END, print_cf_exec),
+      INSTR(LOOP_START, print_cf_loop),
+      INSTR(LOOP_END, print_cf_loop),
+      INSTR(COND_CALL, print_cf_jmp_call),
+      INSTR(RETURN, print_cf_jmp_call),
+      INSTR(COND_JMP, print_cf_jmp_call),
+      INSTR(ALLOC, print_cf_alloc),
+      INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec),
+      INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec),
+      INSTR(MARK_VS_FETCH_DONE, print_cf_nop),  // ??
+  #undef INSTR
+  };
+
+  append(
+    "  // %s ADDR(0x%x) CNT(0x%x)",
+    cf_instructions[cf.opc].name, cf.address, cf.count);
+  if (cf.yeild) {
+    append(" YIELD");
+  }
+  uint8_t vc = cf.vc_hi | (cf.vc_lo << 2);
+  if (vc) {
+    append(" VC(0x%x)", vc);
+  }
+  if (cf.bool_addr) {
+    append(" BOOL_ADDR(0x%x)", cf.bool_addr);
+  }
+  if (cf.address_mode == ABSOLUTE_ADDR) {
+    append(" ABSOLUTE_ADDR");
+  }
+  if (cf.is_cond_exec()) {
+    append(" COND(%d)", cf.condition);
+  }
+  append("\n");
+
+  uint32_t sequence = cf.serialize;
+  for (uint32_t i = 0; i < cf.count; i++) {
+    uint32_t alu_off = (cf.address + i);
+    int sync = sequence & 0x2;
+    if (sequence & 0x1) {
+      const instr_fetch_t* fetch =
+          (const instr_fetch_t*)(dwords_ + alu_off * 3);
+      switch (fetch->opc) {
+      case VTX_FETCH:
+        if (TranslateVertexFetch(&fetch->vtx, sync)) {
+          return 1;
+        }
+        break;
+      case TEX_FETCH:
+        if (TranslateTextureFetch(&fetch->tex, sync)) {
+          return 1;
+        }
+        break;
+      case TEX_GET_BORDER_COLOR_FRAC:
+      case TEX_GET_COMP_TEX_LOD:
+      case TEX_GET_GRADIENTS:
+      case TEX_GET_WEIGHTS:
+      case TEX_SET_TEX_LOD:
+      case TEX_SET_GRADIENTS_H:
+      case TEX_SET_GRADIENTS_V:
+      default:
+        XEASSERTALWAYS();
+        break;
+      }
+    } else {
+      const instr_alu_t* alu =
+          (const instr_alu_t*)(dwords_ + alu_off * 3);
+      if (TranslateALU(alu, sync)) {
+        return 1;
+      }
+    }
+    sequence >>= 2;
+  }
+
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx,
+                                                int sync) {
+  static const struct {
+    const char *name;
+  } fetch_types[0xff] = {
+  #define TYPE(id) { #id }
+      TYPE(FMT_1_REVERSE), // 0
+      {0},
+      TYPE(FMT_8), // 2
+      {0},
+      {0},
+      {0},
+      TYPE(FMT_8_8_8_8), // 6
+      TYPE(FMT_2_10_10_10), // 7
+      {0},
+      {0},
+      TYPE(FMT_8_8), // 10
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      TYPE(FMT_16), // 24
+      TYPE(FMT_16_16), // 25
+      TYPE(FMT_16_16_16_16), // 26
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      TYPE(FMT_32), // 33
+      TYPE(FMT_32_32), // 34
+      TYPE(FMT_32_32_32_32), // 35
+      TYPE(FMT_32_FLOAT), // 36
+      TYPE(FMT_32_32_FLOAT), // 37
+      TYPE(FMT_32_32_32_32_FLOAT), // 38
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      {0},
+      TYPE(FMT_32_32_32_FLOAT), // 57
+  #undef TYPE
+  };
+
+  // Disassemble.
+  append("  //   %sFETCH:\t", sync ? "(S)" : "   ");
+  if (vtx->pred_select) {
+    append(vtx->pred_condition ? "EQ" : "NE");
+  }
+  PrintDestFecth(vtx->dst_reg, vtx->dst_swiz);
+  append(" = R%u.", vtx->src_reg);
+  append("%c", chan_names[vtx->src_swiz & 0x3]);
+  if (fetch_types[vtx->format].name) {
+    append(" %s", fetch_types[vtx->format].name);
+  } else  {
+    append(" TYPE(0x%x)", vtx->format);
+  }
+  append(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED");
+  if (!vtx->num_format_all) {
+    append(" NORMALIZED");
+  }
+  append(" STRIDE(%u)", vtx->stride);
+  if (vtx->offset) {
+    append(" OFFSET(%u)", vtx->offset);
+  }
+  append(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel);
+  if (1) {
+    // XXX
+    append(" src_reg_am=%u", vtx->src_reg_am);
+    append(" dst_reg_am=%u", vtx->dst_reg_am);
+    append(" num_format_all=%u", vtx->num_format_all);
+    append(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all);
+    append(" exp_adjust_all=%u", vtx->exp_adjust_all);
+  }
+  append("\n");
+
+  // Translate.
+  append("  ");
+  append("r%u.xyzw", vtx->dst_reg);
+  append(" = float4(");
+  uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
+  // TODO(benvanik): detect xyzw = xyzw, etc.
+  // TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc
+  uint32_t component_count = GetFormatComponentCount(vtx->format);
+  uint32_t dst_swiz = vtx->dst_swiz;
+  for (int i = 0; i < 4; i++) {
+    if ((dst_swiz & 0x7) == 4) {
+      append("0.0");
+    } else if ((dst_swiz & 0x7) == 5) {
+      append("1.0");
+    } else if ((dst_swiz & 0x7) == 6) {
+      // ?
+      append("?");
+    } else if ((dst_swiz & 0x7) == 7) {
+      append("r%u.%c", vtx->dst_reg, chan_names[i]);
+    } else {
+      append("i.vf%u_%d.%c",
+                     fetch_slot, vtx->offset,
+                     chan_names[dst_swiz & 0x3]);
+    }
+    if (i < 3) {
+      append(", ");
+    }
+    dst_swiz >>= 3;
+  }
+  append(");\n");
+  return 0;
+}
+
+int D3D11ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
+                                                 int sync) {
+  // Disassemble.
+  static const char *filter[] = {
+    "POINT",    // TEX_FILTER_POINT
+    "LINEAR",   // TEX_FILTER_LINEAR
+    "BASEMAP",  // TEX_FILTER_BASEMAP
+  };
+  static const char *aniso_filter[] = {
+    "DISABLED", // ANISO_FILTER_DISABLED
+    "MAX_1_1",  // ANISO_FILTER_MAX_1_1
+    "MAX_2_1",  // ANISO_FILTER_MAX_2_1
+    "MAX_4_1",  // ANISO_FILTER_MAX_4_1
+    "MAX_8_1",  // ANISO_FILTER_MAX_8_1
+    "MAX_16_1", // ANISO_FILTER_MAX_16_1
+  };
+  static const char *arbitrary_filter[] = {
+    "2x4_SYM",  // ARBITRARY_FILTER_2X4_SYM
+    "2x4_ASYM", // ARBITRARY_FILTER_2X4_ASYM
+    "4x2_SYM",  // ARBITRARY_FILTER_4X2_SYM
+    "4x2_ASYM", // ARBITRARY_FILTER_4X2_ASYM
+    "4x4_SYM",  // ARBITRARY_FILTER_4X4_SYM
+    "4x4_ASYM", // ARBITRARY_FILTER_4X4_ASYM
+  };
+  static const char *sample_loc[] = {
+    "CENTROID", // SAMPLE_CENTROID
+    "CENTER",   // SAMPLE_CENTER
+  };
+  uint32_t src_swiz = tex->src_swiz;
+  append("  //   %sFETCH:\t", sync ? "(S)" : "   ");
+  if (tex->pred_select) {
+    append(tex->pred_condition ? "EQ" : "NE");
+  }
+  PrintDestFecth(tex->dst_reg, tex->dst_swiz);
+  append(" = R%u.", tex->src_reg);
+  for (int i = 0; i < 3; i++) {
+    append("%c", chan_names[src_swiz & 0x3]);
+    src_swiz >>= 2;
+  }
+  append(" CONST(%u)", tex->const_idx);
+  if (tex->fetch_valid_only) {
+    append(" VALID_ONLY");
+  }
+  if (tex->tx_coord_denorm) {
+    append(" DENORM");
+  }
+  if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST) {
+    append(" MAG(%s)", filter[tex->mag_filter]);
+  }
+  if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST) {
+    append(" MIN(%s)", filter[tex->min_filter]);
+  }
+  if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST) {
+    append(" MIP(%s)", filter[tex->mip_filter]);
+  }
+  if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST) {
+    append(" ANISO(%s)", aniso_filter[tex->aniso_filter]);
+  }
+  if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST) {
+    append(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]);
+  }
+  if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST) {
+    append(" VOL_MAG(%s)", filter[tex->vol_mag_filter]);
+  }
+  if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST) {
+    append(" VOL_MIN(%s)", filter[tex->vol_min_filter]);
+  }
+  if (!tex->use_comp_lod) {
+    append(" LOD(%u)", tex->use_comp_lod);
+    append(" LOD_BIAS(%u)", tex->lod_bias);
+  }
+  if (tex->use_reg_lod) {
+    append(" REG_LOD(%u)", tex->use_reg_lod);
+  }
+  if (tex->use_reg_gradients) {
+    append(" USE_REG_GRADIENTS");
+  }
+  append(" LOCATION(%s)", sample_loc[tex->sample_location]);
+  if (tex->offset_x || tex->offset_y || tex->offset_z) {
+    append(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z);
+  }
+  append("\n");
+
+  int src_component_count = 0;
+  switch (tex->dimension) {
+  case DIMENSION_1D:
+    src_component_count = 1;
+    break;
+  default:
+  case DIMENSION_2D:
+    src_component_count = 2;
+    break;
+  case DIMENSION_3D:
+    src_component_count = 3;
+    break;
+  case DIMENSION_CUBE:
+    src_component_count = 3;
+    break;
+  }
+
+  // Translate.
+  append("  ");
+  append("r%u.xyzw", tex->dst_reg);
+  append(" = ");
+  append(
+      "x_texture_%d.Sample(x_sampler_%d, r%u.",
+      tex->const_idx,
+      tex_fetch_index_++, // hacky way to line up to tex buffers
+      tex->src_reg);
+  src_swiz = tex->src_swiz;
+  for (int i = 0; i < src_component_count; i++) {
+    append("%c", chan_names[src_swiz & 0x3]);
+    src_swiz >>= 2;
+  }
+  append(").");
+
+  // Pass one over dest does xyzw and fakes the special values.
+  // TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc
+  uint32_t dst_swiz = tex->dst_swiz;
+  for (int i = 0; i < 4; i++) {
+    append("%c", chan_names[dst_swiz & 0x3]);
+    dst_swiz >>= 3;
+  }
+  append(";\n");
+  // Do another pass to set constant values.
+  dst_swiz = tex->dst_swiz;
+  for (int i = 0; i < 4; i++) {
+    if ((dst_swiz & 0x7) == 4) {
+      append("  r%u.%c = 0.0;\n", tex->dst_reg, chan_names[i]);
+    } else if ((dst_swiz & 0x7) == 5) {
+      append("  r%u.%c = 1.0;\n", tex->dst_reg, chan_names[i]);
+    }
+    dst_swiz >>= 3;
+  }
+  return 0;
+}
diff --git a/src/xenia/gpu/d3d11/d3d11_shader_translator.h b/src/xenia/gpu/d3d11/d3d11_shader_translator.h
new file mode 100644
index 000000000..ad85c7775
--- /dev/null
+++ b/src/xenia/gpu/d3d11/d3d11_shader_translator.h
@@ -0,0 +1,125 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_D3D11_D3D11_SHADER_TRANSLATOR_H_
+#define XENIA_GPU_D3D11_D3D11_SHADER_TRANSLATOR_H_
+
+#include <xenia/gpu/shader_resource.h>
+#include <xenia/gpu/xenos/xenos.h>
+
+#include <d3d11.h>
+
+
+namespace xe {
+namespace gpu {
+namespace d3d11 {
+
+
+class D3D11ShaderTranslator {
+public:
+  const static uint32_t kMaxInterpolators = 16;
+
+  D3D11ShaderTranslator();
+
+  int TranslateVertexShader(VertexShaderResource* vertex_shader,
+                            const xenos::xe_gpu_program_cntl_t& program_cntl);
+  int TranslatePixelShader(
+      PixelShaderResource* pixel_shader,
+      const xenos::xe_gpu_program_cntl_t& program_cntl,
+      const VertexShaderResource::AllocCounts& alloc_counts);
+
+  const char* translated_src() const { return buffer_; }
+
+private:
+  xenos::XE_GPU_SHADER_TYPE type_;
+  uint32_t tex_fetch_index_;
+  const uint32_t* dwords_;
+
+  static const int kCapacity = 64 * 1024;
+  char buffer_[kCapacity];
+  size_t capacity_;
+  size_t offset_;
+  void append(const char* format, ...) {
+    va_list args;
+    va_start(args, format);
+    int len = xevsnprintfa(buffer_ + offset_, capacity_ - offset_,
+                           format, args);
+    va_end(args);
+    offset_ += len;
+    buffer_[offset_] = 0;
+  }
+
+  void AppendTextureHeader(
+      const ShaderResource::SamplerInputs& sampler_inputs);
+
+  void AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
+                    uint32_t abs);
+  void AppendDestRegName(uint32_t num, uint32_t dst_exp);
+  void AppendDestReg(uint32_t num, uint32_t mask, uint32_t dst_exp);
+  void AppendDestRegPost(uint32_t num, uint32_t mask, uint32_t dst_exp);
+  void PrintSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
+                   uint32_t abs);
+  void PrintDstReg(uint32_t num, uint32_t mask, uint32_t dst_exp);
+  void PrintExportComment(uint32_t num);
+
+  int TranslateALU(const xenos::instr_alu_t* alu, int sync);
+  int TranslateALU_ADDv(const xenos::instr_alu_t& alu);
+  int TranslateALU_MULv(const xenos::instr_alu_t& alu);
+  int TranslateALU_MAXv(const xenos::instr_alu_t& alu);
+  int TranslateALU_MINv(const xenos::instr_alu_t& alu);
+  int TranslateALU_SETXXv(const xenos::instr_alu_t& alu, const char* op);
+  int TranslateALU_SETEv(const xenos::instr_alu_t& alu);
+  int TranslateALU_SETGTv(const xenos::instr_alu_t& alu);
+  int TranslateALU_SETGTEv(const xenos::instr_alu_t& alu);
+  int TranslateALU_SETNEv(const xenos::instr_alu_t& alu);
+  int TranslateALU_FRACv(const xenos::instr_alu_t& alu);
+  int TranslateALU_TRUNCv(const xenos::instr_alu_t& alu);
+  int TranslateALU_FLOORv(const xenos::instr_alu_t& alu);
+  int TranslateALU_MULADDv(const xenos::instr_alu_t& alu);
+  int TranslateALU_CNDXXv(const xenos::instr_alu_t& alu, const char* op);
+  int TranslateALU_CNDEv(const xenos::instr_alu_t& alu);
+  int TranslateALU_CNDGTEv(const xenos::instr_alu_t& alu);
+  int TranslateALU_CNDGTv(const xenos::instr_alu_t& alu);
+  int TranslateALU_DOT4v(const xenos::instr_alu_t& alu);
+  int TranslateALU_DOT3v(const xenos::instr_alu_t& alu);
+  int TranslateALU_DOT2ADDv(const xenos::instr_alu_t& alu);
+  // CUBEv
+  int TranslateALU_MAX4v(const xenos::instr_alu_t& alu);
+  // ...
+  int TranslateALU_MAXs(const xenos::instr_alu_t& alu);
+  int TranslateALU_MINs(const xenos::instr_alu_t& alu);
+  int TranslateALU_SETXXs(const xenos::instr_alu_t& alu, const char* op);
+  int TranslateALU_SETEs(const xenos::instr_alu_t& alu);
+  int TranslateALU_SETGTs(const xenos::instr_alu_t& alu);
+  int TranslateALU_SETGTEs(const xenos::instr_alu_t& alu);
+  int TranslateALU_SETNEs(const xenos::instr_alu_t& alu);
+  int TranslateALU_RECIP_IEEE(const xenos::instr_alu_t& alu);
+  int TranslateALU_MUL_CONST_0(const xenos::instr_alu_t& alu);
+  int TranslateALU_MUL_CONST_1(const xenos::instr_alu_t& alu);
+  int TranslateALU_ADD_CONST_0(const xenos::instr_alu_t& alu);
+  int TranslateALU_ADD_CONST_1(const xenos::instr_alu_t& alu);
+  int TranslateALU_SUB_CONST_0(const xenos::instr_alu_t& alu);
+  int TranslateALU_SUB_CONST_1(const xenos::instr_alu_t& alu);
+
+  void PrintDestFecth(uint32_t dst_reg, uint32_t dst_swiz);
+  void AppendFetchDest(uint32_t dst_reg, uint32_t dst_swiz);
+  int GetFormatComponentCount(uint32_t format);
+
+  int TranslateExec(const xenos::instr_cf_exec_t& cf);
+  int TranslateVertexFetch(const xenos::instr_fetch_vtx_t* vtx, int sync);
+  int TranslateTextureFetch(const xenos::instr_fetch_tex_t* tex, int sync);
+};
+
+
+}  // namespace d3d11
+}  // namespace gpu
+}  // namespace xe
+
+
+#endif  // XENIA_GPU_D3D11_D3D11_SHADER_TRANSLATOR_H_
diff --git a/src/xenia/gpu/d3d11/d3d11_texture.cc b/src/xenia/gpu/d3d11/d3d11_texture.cc
deleted file mode 100644
index 809a971ac..000000000
--- a/src/xenia/gpu/d3d11/d3d11_texture.cc
+++ /dev/null
@@ -1,264 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/d3d11/d3d11_texture.h>
-
-#include <xenia/gpu/gpu-private.h>
-#include <xenia/gpu/d3d11/d3d11_texture_cache.h>
-#include <xenia/gpu/xenos/ucode.h>
-#include <xenia/gpu/xenos/xenos.h>
-
-
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::d3d11;
-using namespace xe::gpu::xenos;
-
-
-D3D11Texture::D3D11Texture(D3D11TextureCache* cache, uint32_t address,
-                           const uint8_t* host_address)
-    : Texture(address, host_address),
-      cache_(cache) {
-}
-
-D3D11Texture::~D3D11Texture() {
-}
-
-TextureView* D3D11Texture::FetchNew(
-    const xenos::xe_gpu_texture_fetch_t& fetch) {
-  D3D11TextureView* view = new D3D11TextureView();
-  if (!FillViewInfo(view, fetch)) {
-    return nullptr;
-  }
-
-  D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc;
-  xe_zero_struct(&srv_desc, sizeof(srv_desc));
-  // TODO(benvanik): this may need to be typed on the fetch instruction (float/int/etc?)
-  srv_desc.Format = view->format;
-
-  D3D_SRV_DIMENSION dimension = D3D11_SRV_DIMENSION_UNKNOWN;
-  switch (view->dimensions) {
-  case DIMENSION_1D:
-    srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
-    srv_desc.Texture1D.MipLevels = 1;
-    srv_desc.Texture1D.MostDetailedMip = 0;
-    if (!CreateTexture1D(view, fetch)) {
-      XELOGE("D3D11: failed to fetch Texture1D");
-      return nullptr;
-    }
-    break;
-  case DIMENSION_2D:
-    srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
-    srv_desc.Texture2D.MipLevels = 1;
-    srv_desc.Texture2D.MostDetailedMip = 0;
-    if (!CreateTexture2D(view, fetch)) {
-      XELOGE("D3D11: failed to fetch Texture2D");
-      return nullptr;
-    }
-    break;
-  case DIMENSION_3D:
-    srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
-    srv_desc.Texture3D.MipLevels = 1;
-    srv_desc.Texture3D.MostDetailedMip = 0;
-    if (!CreateTexture3D(view, fetch)) {
-      XELOGE("D3D11: failed to fetch Texture3D");
-      return nullptr;
-    }
-    break;
-  case DIMENSION_CUBE:
-    srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE;
-    srv_desc.TextureCube.MipLevels = 1;
-    srv_desc.TextureCube.MostDetailedMip = 0;
-    if (!CreateTextureCube(view, fetch)) {
-      XELOGE("D3D11: failed to fetch TextureCube");
-      return nullptr;
-    }
-    break;
-  }
-
-  HRESULT hr = cache_->device()->CreateShaderResourceView(
-      view->resource, &srv_desc, &view->srv);
-  if (FAILED(hr)) {
-    XELOGE("D3D11: unable to create texture resource view");
-    return nullptr;
-  }
-
-  return view;
-}
-
-bool D3D11Texture::FetchDirty(
-    TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
-  auto d3d_view = static_cast<D3D11TextureView*>(view);
-  switch (view->dimensions) {
-  case DIMENSION_1D:
-    return FetchTexture1D(d3d_view, fetch);
-  case DIMENSION_2D:
-    return FetchTexture2D(d3d_view, fetch);
-  case DIMENSION_3D:
-    return FetchTexture3D(d3d_view, fetch);
-  case DIMENSION_CUBE:
-    return FetchTextureCube(d3d_view, fetch);
-  }
-  return false;
-}
-
-bool D3D11Texture::CreateTexture1D(
-    D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
-  uint32_t width = 1 + fetch.size_1d.width;
-
-  D3D11_TEXTURE1D_DESC texture_desc;
-  xe_zero_struct(&texture_desc, sizeof(texture_desc));
-  texture_desc.Width          = width;
-  texture_desc.MipLevels      = 1;
-  texture_desc.ArraySize      = 1;
-  texture_desc.Format         = view->format;
-  texture_desc.Usage          = D3D11_USAGE_DYNAMIC;
-  texture_desc.BindFlags      = D3D11_BIND_SHADER_RESOURCE;
-  texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
-  texture_desc.MiscFlags      = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS?
-  HRESULT hr = cache_->device()->CreateTexture1D(
-      &texture_desc, NULL, (ID3D11Texture1D**)&view->resource);
-  if (FAILED(hr)) {
-    return false;
-  }
-
-  return FetchTexture1D(view, fetch);
-}
-
-bool D3D11Texture::FetchTexture1D(
-    D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) {
-  SCOPE_profile_cpu_f("gpu");
-
-  // TODO(benvanik): upload!
-  XELOGE("D3D11: FetchTexture1D not yet implemented");
-  return false;
-}
-
-bool D3D11Texture::CreateTexture2D(
-    D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
-  XEASSERTTRUE(fetch.dimension == 1);
-
-  D3D11_TEXTURE2D_DESC texture_desc;
-  xe_zero_struct(&texture_desc, sizeof(texture_desc));
-  texture_desc.Width              = view->sizes_2d.output_width;
-  texture_desc.Height             = view->sizes_2d.output_height;
-  texture_desc.MipLevels          = 1;
-  texture_desc.ArraySize          = 1;
-  texture_desc.Format             = view->format;
-  texture_desc.SampleDesc.Count   = 1;
-  texture_desc.SampleDesc.Quality = 0;
-  texture_desc.Usage              = D3D11_USAGE_DYNAMIC;
-  texture_desc.BindFlags          = D3D11_BIND_SHADER_RESOURCE;
-  texture_desc.CPUAccessFlags     = D3D11_CPU_ACCESS_WRITE;
-  texture_desc.MiscFlags          = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS?
-  HRESULT hr = cache_->device()->CreateTexture2D(
-      &texture_desc, NULL, (ID3D11Texture2D**)&view->resource);
-  if (FAILED(hr)) {
-    return false;
-  }
-
-  return FetchTexture2D(view, fetch);
-}
-
-bool D3D11Texture::FetchTexture2D(
-    D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) {
-  SCOPE_profile_cpu_f("gpu");
-
-  XEASSERTTRUE(fetch.dimension == 1);
-
-  auto sizes = GetTextureSizes2D(view);
-
-  // TODO(benvanik): all mip levels.
-  D3D11_MAPPED_SUBRESOURCE res;
-  HRESULT hr = cache_->context()->Map(view->resource, 0,
-                                      D3D11_MAP_WRITE_DISCARD, 0, &res);
-  if (FAILED(hr)) {
-    XELOGE("D3D11: failed to map texture");
-    return false;
-  }
-
-  const uint8_t* src = cache_->memory()->Translate(address_);
-  uint8_t* dest = (uint8_t*)res.pData;
-
-  //memset(dest, 0, output_pitch * (output_height / view->block_size)); // TODO(gibbed): remove me later
-
-  uint32_t output_pitch = res.RowPitch; // (output_width / info.block_size) * info.texel_pitch;
-  if (!fetch.tiled) {
-    dest = (uint8_t*)res.pData;
-    for (uint32_t y = 0; y < sizes.block_height; y++) {
-      for (uint32_t x = 0; x < sizes.logical_pitch; x += view->texel_pitch) {
-        TextureSwap(dest + x, src + x, view->texel_pitch, (XE_GPU_ENDIAN)fetch.endianness);
-      }
-      src += sizes.input_pitch;
-      dest += output_pitch;
-    }
-  } else {
-    auto bpp = (view->texel_pitch >> 2) + ((view->texel_pitch >> 1) >> (view->texel_pitch >> 2));
-    for (uint32_t y = 0, output_base_offset = 0;
-         y < sizes.block_height;
-         y++, output_base_offset += output_pitch) {
-      auto input_base_offset = TiledOffset2DOuter(y, (sizes.input_width / view->block_size), bpp);
-      for (uint32_t x = 0, output_offset = output_base_offset;
-           x < sizes.block_width;
-           x++, output_offset += view->texel_pitch) {
-        auto input_offset = TiledOffset2DInner(x, y, bpp, input_base_offset) >> bpp;
-        TextureSwap(dest + output_offset,
-                    src + input_offset * view->texel_pitch,
-                    view->texel_pitch, (XE_GPU_ENDIAN)fetch.endianness);
-      }
-    }
-  }
-  cache_->context()->Unmap(view->resource, 0);
-  return true;
-}
-
-bool D3D11Texture::CreateTexture3D(
-    D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
-  XELOGE("D3D11: CreateTexture3D not yet implemented");
-  XEASSERTALWAYS();
-  return false;
-}
-
-bool D3D11Texture::FetchTexture3D(
-    D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) {
-  SCOPE_profile_cpu_f("gpu");
-
-  XELOGE("D3D11: FetchTexture3D not yet implemented");
-  XEASSERTALWAYS();
-  return false;
-  //D3D11_TEXTURE3D_DESC texture_desc;
-  //xe_zero_struct(&texture_desc, sizeof(texture_desc));
-  //texture_desc.Width;
-  //texture_desc.Height;
-  //texture_desc.Depth;
-  //texture_desc.MipLevels;
-  //texture_desc.Format;
-  //texture_desc.Usage;
-  //texture_desc.BindFlags;
-  //texture_desc.CPUAccessFlags;
-  //texture_desc.MiscFlags;
-  //hr = device_->CreateTexture3D(
-  //    &texture_desc, &initial_data, (ID3D11Texture3D**)&view->resource);
-}
-
-bool D3D11Texture::CreateTextureCube(
-    D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
-  XELOGE("D3D11: CreateTextureCube not yet implemented");
-  XEASSERTALWAYS();
-  return false;
-}
-
-bool D3D11Texture::FetchTextureCube(
-    D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) {
-  SCOPE_profile_cpu_f("gpu");
-
-  XELOGE("D3D11: FetchTextureCube not yet implemented");
-  XEASSERTALWAYS();
-  return false;
-}
diff --git a/src/xenia/gpu/d3d11/d3d11_texture.h b/src/xenia/gpu/d3d11/d3d11_texture.h
deleted file mode 100644
index a8ee91662..000000000
--- a/src/xenia/gpu/d3d11/d3d11_texture.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_D3D11_D3D11_TEXTURE_H_
-#define XENIA_GPU_D3D11_D3D11_TEXTURE_H_
-
-#include <xenia/core.h>
-
-#include <xenia/gpu/texture.h>
-
-#include <d3d11.h>
-
-
-namespace xe {
-namespace gpu {
-namespace d3d11 {
-
-class D3D11TextureCache;
-
-
-struct D3D11TextureView : TextureView {
-  ID3D11Resource* resource;
-  ID3D11ShaderResourceView* srv;
-
-  D3D11TextureView()
-      : resource(nullptr), srv(nullptr) {}
-  virtual ~D3D11TextureView() {
-    XESAFERELEASE(srv);
-    XESAFERELEASE(resource);
-  }
-};
-
-
-class D3D11Texture : public Texture {
-public:
-  D3D11Texture(D3D11TextureCache* cache, uint32_t address,
-               const uint8_t* host_address);
-  virtual ~D3D11Texture();
-
-protected:
-  TextureView* FetchNew(
-      const xenos::xe_gpu_texture_fetch_t& fetch) override;
-  bool FetchDirty(
-      TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) override;
-
-  bool CreateTexture1D(
-      D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
-  bool FetchTexture1D(
-      D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
-  bool CreateTexture2D(
-      D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
-  bool FetchTexture2D(
-      D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
-  bool CreateTexture3D(
-      D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
-  bool FetchTexture3D(
-      D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
-  bool CreateTextureCube(
-      D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
-  bool FetchTextureCube(
-      D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
-
-  D3D11TextureCache* cache_;
-};
-
-
-}  // namespace d3d11
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_D3D11_D3D11_TEXTURE_H_
diff --git a/src/xenia/gpu/d3d11/d3d11_texture_cache.h b/src/xenia/gpu/d3d11/d3d11_texture_cache.h
deleted file mode 100644
index 63f275d02..000000000
--- a/src/xenia/gpu/d3d11/d3d11_texture_cache.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_D3D11_D3D11_TEXTURE_CACHE_H_
-#define XENIA_GPU_D3D11_D3D11_TEXTURE_CACHE_H_
-
-#include <xenia/core.h>
-
-#include <xenia/gpu/texture_cache.h>
-#include <xenia/gpu/shader.h>
-#include <xenia/gpu/d3d11/d3d11_texture.h>
-
-#include <d3d11.h>
-
-
-namespace xe {
-namespace gpu {
-namespace d3d11 {
-
-
-class D3D11TextureCache : public TextureCache {
-public:
-  D3D11TextureCache(Memory* memory,
-                    ID3D11DeviceContext* context, ID3D11Device* device);
-  virtual ~D3D11TextureCache();
-
-  ID3D11DeviceContext* context() const { return context_; }
-  ID3D11Device* device() const { return device_; }
-
-  ID3D11SamplerState* GetSamplerState(
-      const xenos::xe_gpu_texture_fetch_t& fetch,
-      const Shader::tex_buffer_desc_t& desc);
-
-protected:
-  Texture* CreateTexture(uint32_t address, const uint8_t* host_address,
-                         const xenos::xe_gpu_texture_fetch_t& fetch) override;
-
-private:
-  ID3D11DeviceContext* context_;
-  ID3D11Device* device_;
-
-  struct CachedSamplerState {
-    D3D11_SAMPLER_DESC desc;
-    ID3D11SamplerState* state;
-  };
-  std::unordered_multimap<size_t, CachedSamplerState> samplers_;
-};
-
-
-}  // namespace d3d11
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_D3D11_D3D11_TEXTURE_CACHE_H_
diff --git a/src/xenia/gpu/d3d11/d3d11_texture_resource.cc b/src/xenia/gpu/d3d11/d3d11_texture_resource.cc
new file mode 100644
index 000000000..a90c60b0d
--- /dev/null
+++ b/src/xenia/gpu/d3d11/d3d11_texture_resource.cc
@@ -0,0 +1,219 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include <xenia/gpu/d3d11/d3d11_texture_resource.h>
+
+#include <xenia/gpu/gpu-private.h>
+#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
+
+
+using namespace xe;
+using namespace xe::gpu;
+using namespace xe::gpu::d3d11;
+using namespace xe::gpu::xenos;
+
+
+D3D11TextureResource::D3D11TextureResource(
+    D3D11ResourceCache* resource_cache,
+    const MemoryRange& memory_range,
+    const Info& info)
+    : TextureResource(memory_range, info),
+      resource_cache_(resource_cache),
+      texture_(nullptr),
+      handle_(nullptr) {
+}
+
+D3D11TextureResource::~D3D11TextureResource() {
+  XESAFERELEASE(texture_);
+  XESAFERELEASE(handle_);
+}
+
+int D3D11TextureResource::CreateHandle() {
+  SCOPE_profile_cpu_f("gpu");
+
+  D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc;
+  xe_zero_struct(&srv_desc, sizeof(srv_desc));
+  // TODO(benvanik): this may need to be typed on the fetch instruction (float/int/etc?)
+  srv_desc.Format = info_.format;
+
+  D3D_SRV_DIMENSION dimension = D3D11_SRV_DIMENSION_UNKNOWN;
+  switch (info_.dimension) {
+  case TEXTURE_DIMENSION_1D:
+    srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
+    srv_desc.Texture1D.MipLevels = 1;
+    srv_desc.Texture1D.MostDetailedMip = 0;
+    if (CreateHandle1D()) {
+      XELOGE("D3D11: failed to create Texture1D");
+      return 1;
+    }
+    break;
+  case TEXTURE_DIMENSION_2D:
+    srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
+    srv_desc.Texture2D.MipLevels = 1;
+    srv_desc.Texture2D.MostDetailedMip = 0;
+    if (CreateHandle2D()) {
+      XELOGE("D3D11: failed to create Texture2D");
+      return 1;
+    }
+    break;
+  case TEXTURE_DIMENSION_3D:
+    srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
+    srv_desc.Texture3D.MipLevels = 1;
+    srv_desc.Texture3D.MostDetailedMip = 0;
+    if (CreateHandle3D()) {
+      XELOGE("D3D11: failed to create Texture3D");
+      return 1;
+    }
+    break;
+  case TEXTURE_DIMENSION_CUBE:
+    srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE;
+    srv_desc.TextureCube.MipLevels = 1;
+    srv_desc.TextureCube.MostDetailedMip = 0;
+    if (CreateHandleCube()) {
+      XELOGE("D3D11: failed to create TextureCube");
+      return 1;
+    }
+    break;
+  }
+
+  HRESULT hr = resource_cache_->device()->CreateShaderResourceView(
+      texture_, &srv_desc, &handle_);
+  if (FAILED(hr)) {
+    XELOGE("D3D11: unable to create texture resource view");
+    return 1;
+  }
+  return 0;
+}
+
+int D3D11TextureResource::CreateHandle1D() {
+  uint32_t width = 1 + info_.size_1d.width;
+
+  D3D11_TEXTURE1D_DESC texture_desc;
+  xe_zero_struct(&texture_desc, sizeof(texture_desc));
+  texture_desc.Width          = width;
+  texture_desc.MipLevels      = 1;
+  texture_desc.ArraySize      = 1;
+  texture_desc.Format         = info_.format;
+  texture_desc.Usage          = D3D11_USAGE_DYNAMIC;
+  texture_desc.BindFlags      = D3D11_BIND_SHADER_RESOURCE;
+  texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
+  texture_desc.MiscFlags      = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS?
+  HRESULT hr = resource_cache_->device()->CreateTexture1D(
+      &texture_desc, NULL, (ID3D11Texture1D**)&texture_);
+  if (FAILED(hr)) {
+    return 1;
+  }
+  return 0;
+}
+
+int D3D11TextureResource::CreateHandle2D() {
+  D3D11_TEXTURE2D_DESC texture_desc;
+  xe_zero_struct(&texture_desc, sizeof(texture_desc));
+  texture_desc.Width              = info_.size_2d.output_width;
+  texture_desc.Height             = info_.size_2d.output_height;
+  texture_desc.MipLevels          = 1;
+  texture_desc.ArraySize          = 1;
+  texture_desc.Format             = info_.format;
+  texture_desc.SampleDesc.Count   = 1;
+  texture_desc.SampleDesc.Quality = 0;
+  texture_desc.Usage              = D3D11_USAGE_DYNAMIC;
+  texture_desc.BindFlags          = D3D11_BIND_SHADER_RESOURCE;
+  texture_desc.CPUAccessFlags     = D3D11_CPU_ACCESS_WRITE;
+  texture_desc.MiscFlags          = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS?
+  HRESULT hr = resource_cache_->device()->CreateTexture2D(
+      &texture_desc, NULL, (ID3D11Texture2D**)&texture_);
+  if (FAILED(hr)) {
+    return 1;
+  }
+  return 0;
+}
+
+int D3D11TextureResource::CreateHandle3D() {
+  XELOGE("D3D11: CreateTexture3D not yet implemented");
+  XEASSERTALWAYS();
+  return 1;
+}
+
+int D3D11TextureResource::CreateHandleCube() {
+  XELOGE("D3D11: CreateTextureCube not yet implemented");
+  XEASSERTALWAYS();
+  return 1;
+}
+
+int D3D11TextureResource::InvalidateRegion(const MemoryRange& memory_range) {
+  SCOPE_profile_cpu_f("gpu");
+
+  switch (info_.dimension) {
+  case TEXTURE_DIMENSION_1D:
+    return InvalidateRegion1D(memory_range);
+  case TEXTURE_DIMENSION_2D:
+    return InvalidateRegion2D(memory_range);
+  case TEXTURE_DIMENSION_3D:
+    return InvalidateRegion3D(memory_range);
+  case TEXTURE_DIMENSION_CUBE:
+    return InvalidateRegionCube(memory_range);
+  }
+  return 1;
+}
+
+int D3D11TextureResource::InvalidateRegion1D(const MemoryRange& memory_range) {
+  return 1;
+}
+
+int D3D11TextureResource::InvalidateRegion2D(const MemoryRange& memory_range) {
+  // TODO(benvanik): all mip levels.
+  D3D11_MAPPED_SUBRESOURCE res;
+  HRESULT hr = resource_cache_->context()->Map(
+      texture_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
+  if (FAILED(hr)) {
+    XELOGE("D3D11: failed to map texture");
+    return 1;
+  }
+
+  const uint8_t* src = memory_range_.host_base;
+  uint8_t* dest = (uint8_t*)res.pData;
+
+  uint32_t output_pitch = res.RowPitch; // (output_width / info.block_size) * info.texel_pitch;
+  if (!info_.is_tiled) {
+    dest = (uint8_t*)res.pData;
+    for (uint32_t y = 0; y < info_.size_2d.block_height; y++) {
+      for (uint32_t x = 0; x < info_.size_2d.logical_pitch; x += info_.texel_pitch) {
+        TextureSwap(dest + x, src + x, info_.texel_pitch);
+      }
+      src += info_.size_2d.input_pitch;
+      dest += output_pitch;
+    }
+  } else {
+    auto bpp = (info_.texel_pitch >> 2) + ((info_.texel_pitch >> 1) >> (info_.texel_pitch >> 2));
+    for (uint32_t y = 0, output_base_offset = 0;
+         y < info_.size_2d.block_height;
+         y++, output_base_offset += output_pitch) {
+      auto input_base_offset = TiledOffset2DOuter(y, (info_.size_2d.input_width / info_.block_size), bpp);
+      for (uint32_t x = 0, output_offset = output_base_offset;
+           x < info_.size_2d.block_width;
+           x++, output_offset += info_.texel_pitch) {
+        auto input_offset = TiledOffset2DInner(x, y, bpp, input_base_offset) >> bpp;
+        TextureSwap(dest + output_offset,
+                    src + input_offset * info_.texel_pitch,
+                    info_.texel_pitch);
+      }
+    }
+  }
+  resource_cache_->context()->Unmap(texture_, 0);
+  return 0;
+}
+
+int D3D11TextureResource::InvalidateRegion3D(const MemoryRange& memory_range) {
+  return 1;
+}
+
+int D3D11TextureResource::InvalidateRegionCube(
+    const MemoryRange& memory_range) {
+  return 1;
+}
diff --git a/src/xenia/gpu/d3d11/d3d11_texture_resource.h b/src/xenia/gpu/d3d11/d3d11_texture_resource.h
new file mode 100644
index 000000000..4e59662a4
--- /dev/null
+++ b/src/xenia/gpu/d3d11/d3d11_texture_resource.h
@@ -0,0 +1,60 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_D3D11_D3D11_TEXTURE_RESOURCE_H_
+#define XENIA_GPU_D3D11_D3D11_TEXTURE_RESOURCE_H_
+
+#include <xenia/gpu/texture_resource.h>
+#include <xenia/gpu/xenos/xenos.h>
+
+#include <d3d11.h>
+
+
+namespace xe {
+namespace gpu {
+namespace d3d11 {
+
+class D3D11ResourceCache;
+
+
+class D3D11TextureResource : public TextureResource {
+public:
+  D3D11TextureResource(D3D11ResourceCache* resource_cache,
+                       const MemoryRange& memory_range,
+                       const Info& info);
+  ~D3D11TextureResource() override;
+
+  void* handle() const override { return handle_; }
+
+protected:
+  int CreateHandle() override;
+  int CreateHandle1D();
+  int CreateHandle2D();
+  int CreateHandle3D();
+  int CreateHandleCube();
+
+  int InvalidateRegion(const MemoryRange& memory_range) override;
+  int InvalidateRegion1D(const MemoryRange& memory_range);
+  int InvalidateRegion2D(const MemoryRange& memory_range);
+  int InvalidateRegion3D(const MemoryRange& memory_range);
+  int InvalidateRegionCube(const MemoryRange& memory_range);
+
+private:
+  D3D11ResourceCache* resource_cache_;
+  ID3D11Resource* texture_;
+  ID3D11ShaderResourceView* handle_;
+};
+
+
+}  // namespace d3d11
+}  // namespace gpu
+}  // namespace xe
+
+
+#endif  // XENIA_GPU_D3D11_D3D11_TEXTURE_RESOURCE_H_
diff --git a/src/xenia/gpu/d3d11/sources.gypi b/src/xenia/gpu/d3d11/sources.gypi
index 6dc7ae242..b6b6d76c1 100644
--- a/src/xenia/gpu/d3d11/sources.gypi
+++ b/src/xenia/gpu/d3d11/sources.gypi
@@ -1,10 +1,8 @@
 # Copyright 2013 Ben Vanik. All Rights Reserved.
 {
   'sources': [
-    'd3d11_buffer.cc',
-    'd3d11_buffer.h',
-    'd3d11_buffer_cache.cc',
-    'd3d11_buffer_cache.h',
+    'd3d11_buffer_resource.cc',
+    'd3d11_buffer_resource.h',
     'd3d11_geometry_shader.cc',
     'd3d11_geometry_shader.h',
     'd3d11_gpu-private.h',
@@ -16,14 +14,16 @@
     'd3d11_graphics_system.h',
     'd3d11_profiler_display.cc',
     'd3d11_profiler_display.h',
-    'd3d11_shader.cc',
-    'd3d11_shader.h',
-    'd3d11_shader_cache.cc',
-    'd3d11_shader_cache.h',
-    'd3d11_texture.cc',
-    'd3d11_texture.h',
-    'd3d11_texture_cache.cc',
-    'd3d11_texture_cache.h',
+    'd3d11_resource_cache.cc',
+    'd3d11_resource_cache.h',
+    'd3d11_sampler_state_resource.cc',
+    'd3d11_sampler_state_resource.h',
+    'd3d11_shader_resource.cc',
+    'd3d11_shader_resource.h',
+    'd3d11_shader_translator.cc',
+    'd3d11_shader_translator.h',
+    'd3d11_texture_resource.cc',
+    'd3d11_texture_resource.h',
     'd3d11_window.cc',
     'd3d11_window.h',
   ],
diff --git a/src/xenia/gpu/xenos/registers.cc b/src/xenia/gpu/draw_command.cc
similarity index 56%
rename from src/xenia/gpu/xenos/registers.cc
rename to src/xenia/gpu/draw_command.cc
index 5d4e99106..468c4ed08 100644
--- a/src/xenia/gpu/xenos/registers.cc
+++ b/src/xenia/gpu/draw_command.cc
@@ -1,27 +1,17 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/xenos/registers.h>
-
-
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::xenos;
-
-
-const char* xe::gpu::xenos::GetRegisterName(uint32_t index) {
-  switch (index) {
-#define XE_GPU_REGISTER(index, type, name) \
-    case index: return #name;
-#include <xenia/gpu/xenos/register_table.inc>
-#undef XE_GPU_REGISTER
-    default:
-      return NULL;
-  }
-}
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include <xenia/gpu/draw_command.h>
+
+
+using namespace std;
+using namespace xe;
+using namespace xe::gpu;
+using namespace xe::gpu::xenos;
+
diff --git a/src/xenia/gpu/draw_command.h b/src/xenia/gpu/draw_command.h
new file mode 100644
index 000000000..ac5b07fe6
--- /dev/null
+++ b/src/xenia/gpu/draw_command.h
@@ -0,0 +1,78 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_DRAW_COMMAND_H_
+#define XENIA_GPU_DRAW_COMMAND_H_
+
+#include <xenia/core.h>
+#include <xenia/gpu/buffer_resource.h>
+#include <xenia/gpu/sampler_state_resource.h>
+#include <xenia/gpu/shader_resource.h>
+#include <xenia/gpu/texture_resource.h>
+#include <xenia/gpu/xenos/xenos.h>
+
+
+namespace xe {
+namespace gpu {
+
+
+// TODO(benvanik): move more of the enums in here?
+struct DrawCommand {
+  xenos::XE_GPU_PRIMITIVE_TYPE prim_type;
+  uint32_t start_index;
+  uint32_t index_count;
+  uint32_t base_vertex;
+
+  VertexShaderResource* vertex_shader;
+  PixelShaderResource* pixel_shader;
+
+  // TODO(benvanik): dirty tracking/max ranges/etc.
+  struct {
+    float* values;
+    size_t count;
+  } float4_constants;
+  struct {
+    uint32_t* values;
+    size_t count;
+  } loop_constants;
+  struct {
+    uint32_t* values;
+    size_t count;
+  } bool_constants;
+
+  // Index buffer, if present. If index_count > 0 then auto draw.
+  IndexBufferResource* index_buffer;
+
+  // Vertex buffers.
+  struct {
+    uint32_t input_index;
+    VertexBufferResource* buffer;
+    uint32_t stride;
+    uint32_t offset;
+  } vertex_buffers[96];
+  size_t vertex_buffer_count;
+
+  // Texture samplers.
+  struct SamplerInput {
+    uint32_t input_index;
+    TextureResource* texture;
+    SamplerStateResource* sampler_state;
+  };
+  SamplerInput vertex_shader_samplers[32];
+  size_t vertex_shader_sampler_count;
+  SamplerInput pixel_shader_samplers[32];
+  size_t pixel_shader_sampler_count;
+};
+
+
+}  // namespace gpu
+}  // namespace xe
+
+
+#endif  // XENIA_GPU_DRAW_COMMAND_H_
diff --git a/src/xenia/gpu/graphics_driver.cc b/src/xenia/gpu/graphics_driver.cc
index 65dddea49..e398839b8 100644
--- a/src/xenia/gpu/graphics_driver.cc
+++ b/src/xenia/gpu/graphics_driver.cc
@@ -12,12 +12,300 @@
 
 using namespace xe;
 using namespace xe::gpu;
+using namespace xe::gpu::xenos;
 
 
 GraphicsDriver::GraphicsDriver(Memory* memory) :
-    memory_(memory),  address_translation_(0) {
-  memset(&register_file_, 0, sizeof(register_file_));
+    memory_(memory), address_translation_(0) {
 }
 
 GraphicsDriver::~GraphicsDriver() {
 }
+
+int GraphicsDriver::LoadShader(XE_GPU_SHADER_TYPE type,
+                               uint32_t address, uint32_t length,
+                               uint32_t start) {
+  MemoryRange memory_range(
+      memory_->Translate(address),
+      address, length);
+
+  ShaderResource* shader = nullptr;
+  if (type == XE_GPU_SHADER_TYPE_VERTEX) {
+    VertexShaderResource::Info info;
+    shader = vertex_shader_ = resource_cache()->FetchVertexShader(memory_range,
+                                                                  info);
+    if (!vertex_shader_) {
+      XELOGE("Unable to fetch vertex shader");
+      return 1;
+    }
+  } else {
+    PixelShaderResource::Info info;
+    shader = pixel_shader_ = resource_cache()->FetchPixelShader(memory_range,
+                                                                info);
+    if (!pixel_shader_) {
+      XELOGE("Unable to fetch pixel shader");
+      return 1;
+    }
+  }
+
+  if (!shader->is_prepared()) {
+    // Disassemble.
+    const char* source = shader->disasm_src();
+    XELOGGPU("Set shader %d at %0.8X (%db):\n%s",
+             type, address, length,
+             source ? source : "<failed to disassemble>");
+  }
+
+  return 0;
+}
+
+int GraphicsDriver::PrepareDraw(DrawCommand& command) {
+  SCOPE_profile_cpu_f("gpu");
+
+  // Ignore copies for now.
+  uint32_t enable_mode = register_file_[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7;
+  if (enable_mode != 4) {
+    XELOGW("GPU: ignoring draw with enable mode %d", enable_mode);
+    return 1;
+  }
+
+  // Reset the things we don't modify so that we have clean state.
+  command.prim_type = XE_GPU_PRIMITIVE_TYPE_POINT_LIST;
+  command.index_count = 0;
+  command.index_buffer = nullptr;
+
+  // Generic stuff.
+  command.start_index = register_file_[XE_GPU_REG_VGT_INDX_OFFSET].u32;
+  command.base_vertex = 0;
+
+  int ret;
+  ret = PopulateState(command);
+  if (ret) {
+    XELOGE("Unable to prepare draw state");
+    return ret;
+  }
+  ret = PopulateConstantBuffers(command);
+  if (ret) {
+    XELOGE("Unable to prepare draw constant buffers");
+    return ret;
+  }
+  ret = PopulateShaders(command);
+  if (ret) {
+    XELOGE("Unable to prepare draw shaders");
+    return ret;
+  }
+  ret = PopulateInputAssembly(command);
+  if (ret) {
+    XELOGE("Unable to prepare draw input assembly");
+    return ret;
+  }
+  ret = PopulateSamplers(command);
+  if (ret) {
+    XELOGE("Unable to prepare draw samplers");
+    return ret;
+  }
+  return 0;
+}
+
+int GraphicsDriver::PrepareDrawIndexBuffer(
+    DrawCommand& command,
+    uint32_t address, uint32_t length,
+    xenos::XE_GPU_ENDIAN endianness,
+    IndexFormat format) {
+  SCOPE_profile_cpu_f("gpu");
+
+  address += address_translation_;
+  MemoryRange memory_range(memory_->Translate(address), address, length);
+
+  IndexBufferResource::Info info;
+  info.endianness = endianness;
+  info.format = format;
+
+  command.index_buffer =
+      resource_cache()->FetchIndexBuffer(memory_range, info);
+  if (!command.index_buffer) {
+    return 1;
+  }
+  return 0;
+}
+
+int GraphicsDriver::PopulateState(DrawCommand& command) {
+  return 0;
+}
+
+int GraphicsDriver::PopulateConstantBuffers(DrawCommand& command) {
+  command.float4_constants.count = 512;
+  command.float4_constants.values =
+      &register_file_[XE_GPU_REG_SHADER_CONSTANT_000_X].f32;
+  command.loop_constants.count = 32;
+  command.loop_constants.values =
+      &register_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32;
+  command.bool_constants.count = 8;
+  command.bool_constants.values =
+      &register_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32;
+  return 0;
+}
+
+int GraphicsDriver::PopulateShaders(DrawCommand& command) {
+  SCOPE_profile_cpu_f("gpu");
+
+  if (!vertex_shader_) {
+    XELOGE("No vertex shader bound; ignoring");
+    return 1;
+  }
+  if (!pixel_shader_) {
+    XELOGE("No pixel shader bound; ignoring");
+    return 1;
+  }
+  
+  xe_gpu_program_cntl_t program_cntl;
+  program_cntl.dword_0 = register_file_[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
+  if (!vertex_shader_->is_prepared()) {
+    if (vertex_shader_->Prepare(program_cntl)) {
+      XELOGE("Unable to prepare vertex shader");
+      return 1;
+    }
+  }
+  if (!pixel_shader_->is_prepared()) {
+    if (pixel_shader_->Prepare(program_cntl, vertex_shader_)) {
+      XELOGE("Unable to prepare pixel shader");
+      return 1;
+    }
+  }
+
+  command.vertex_shader = vertex_shader_;
+  command.pixel_shader = pixel_shader_;
+
+  return 0;
+}
+
+int GraphicsDriver::PopulateInputAssembly(DrawCommand& command) {
+  SCOPE_profile_cpu_f("gpu");
+  
+  const auto& buffer_inputs = command.vertex_shader->buffer_inputs();
+  command.vertex_buffer_count = buffer_inputs.count;
+  for (size_t n = 0; n < buffer_inputs.count; n++) {
+    const auto& desc = buffer_inputs.descs[n];
+
+    int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6;
+    auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(&register_file_.values[r]);
+    xe_gpu_vertex_fetch_t* fetch = nullptr;
+    switch (desc.fetch_slot % 3) {
+    case 0:
+      fetch = &group->vertex_fetch_0;
+      break;
+    case 1:
+      fetch = &group->vertex_fetch_1;
+      break;
+    case 2:
+      fetch = &group->vertex_fetch_2;
+      break;
+    }
+    XEASSERTNOTNULL(fetch);
+    // If this assert doesn't hold, maybe we just abort?
+    XEASSERT(fetch->type == 0x3);
+    XEASSERTNOTZERO(fetch->size);
+
+    const auto& info = desc.info;
+
+    MemoryRange memory_range;
+    memory_range.guest_base = (fetch->address << 2) + address_translation_;
+    memory_range.host_base = memory_->Translate(memory_range.guest_base);
+    memory_range.length = fetch->size * 4;
+    // TODO(benvanik): if the memory range is within the command buffer, we
+    //     should use a cached transient buffer.
+
+    auto buffer = resource_cache()->FetchVertexBuffer(memory_range, info);
+    if (!buffer) {
+      XELOGE("Unable to create vertex fetch buffer");
+      return 1;
+    }
+
+    command.vertex_buffers[n].input_index = desc.input_index;
+    command.vertex_buffers[n].buffer = buffer;
+    command.vertex_buffers[n].stride = desc.info.stride_words * 4;
+    command.vertex_buffers[n].offset = 0;
+  }
+  return 0;
+}
+
+int GraphicsDriver::PopulateSamplers(DrawCommand& command) {
+  SCOPE_profile_cpu_f("gpu");
+
+  // Vertex texture samplers.
+  const auto& vertex_sampler_inputs = command.vertex_shader->sampler_inputs();
+  command.vertex_shader_sampler_count = vertex_sampler_inputs.count;
+  for (size_t i = 0; i < command.vertex_shader_sampler_count; ++i) {
+    if (PopulateSamplerSet(vertex_sampler_inputs.descs[i],
+                           command.vertex_shader_samplers[i])) {
+      return 1;
+    }
+  }
+
+  // Pixel shader texture sampler.
+  const auto& pixel_sampler_inputs = command.pixel_shader->sampler_inputs();
+  command.pixel_shader_sampler_count = pixel_sampler_inputs.count;
+  for (size_t i = 0; i < command.pixel_shader_sampler_count; ++i) {
+    if (PopulateSamplerSet(pixel_sampler_inputs.descs[i],
+                           command.pixel_shader_samplers[i])) {
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
+int GraphicsDriver::PopulateSamplerSet(
+    const ShaderResource::SamplerDesc& src_input,
+    DrawCommand::SamplerInput& dst_input) {
+  int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + src_input.fetch_slot * 6;
+  const auto group = (const xe_gpu_fetch_group_t*)&register_file_.values[r];
+  const xenos::xe_gpu_texture_fetch_t& fetch = group->texture_fetch;
+  if (fetch.type != 0x2) {
+    return 0;
+  }
+
+  dst_input.input_index = src_input.input_index;
+  dst_input.texture = nullptr;
+  dst_input.sampler_state = nullptr;
+
+  TextureResource::Info info;
+  if (!TextureResource::Info::Prepare(fetch, info)) {
+    XELOGE("D3D11: unable to parse texture fetcher info");
+    return 0;  // invalid texture used
+  }
+  if (info.format == DXGI_FORMAT_UNKNOWN) {
+    XELOGW("D3D11: unknown texture format %d", info.format);
+    return 0;  // invalid texture used
+  }
+  
+  // TODO(benvanik): quick validate without refetching intraframe.
+  // Fetch texture from the cache.
+  MemoryRange memory_range;
+  memory_range.guest_base = (fetch.address << 12) + address_translation_;
+  memory_range.host_base = memory_->Translate(memory_range.guest_base);
+  memory_range.length = info.input_length;
+
+  auto texture = resource_cache()->FetchTexture(memory_range, info);
+  if (!texture) {
+    XELOGW("D3D11: unable to fetch texture");
+    return 0;  // invalid texture used
+  }
+
+  SamplerStateResource::Info sampler_info;
+  if (!SamplerStateResource::Info::Prepare(fetch,
+                                           src_input.tex_fetch,
+                                           sampler_info)) {
+    XELOGW("D3D11: unable to parse sampler info");
+    return 0;  // invalid texture used
+  }
+  auto sampler_state = resource_cache()->FetchSamplerState(sampler_info);
+  if (!sampler_state) {
+    XELOGW("D3D11: unable to fetch sampler");
+    return 0;  // invalid texture used
+  }
+
+  dst_input.texture = texture;
+  dst_input.sampler_state = sampler_state;
+  return 0;
+}
diff --git a/src/xenia/gpu/graphics_driver.h b/src/xenia/gpu/graphics_driver.h
index 675a5a7c2..23cb24972 100644
--- a/src/xenia/gpu/graphics_driver.h
+++ b/src/xenia/gpu/graphics_driver.h
@@ -11,7 +11,9 @@
 #define XENIA_GPU_GRAPHICS_DRIVER_H_
 
 #include <xenia/core.h>
-#include <xenia/gpu/xenos/registers.h>
+#include <xenia/gpu/draw_command.h>
+#include <xenia/gpu/register_file.h>
+#include <xenia/gpu/resource_cache.h>
 #include <xenia/gpu/xenos/xenos.h>
 
 
@@ -24,38 +26,45 @@ public:
   virtual ~GraphicsDriver();
 
   Memory* memory() const { return memory_; }
-  xenos::RegisterFile* register_file() { return &register_file_; };
+  virtual ResourceCache* resource_cache() const = 0;
+  RegisterFile* register_file() { return &register_file_; };
   void set_address_translation(uint32_t value) {
     address_translation_ = value;
   }
 
-  virtual void Initialize() = 0;
+  virtual int Initialize() = 0;
 
-  virtual void InvalidateState(
-      uint32_t mask) = 0;
-  virtual void SetShader(
-      xenos::XE_GPU_SHADER_TYPE type,
-      uint32_t address,
-      uint32_t start,
-      uint32_t length) = 0;
-  virtual void DrawIndexBuffer(
-    xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
-    bool index_32bit, uint32_t index_count,
-    uint32_t index_base, uint32_t index_size, uint32_t endianness) = 0;
-  //virtual void DrawIndexImmediate();
-  virtual void DrawIndexAuto(
-      xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
-      uint32_t index_count) = 0;
+  int LoadShader(xenos::XE_GPU_SHADER_TYPE type,
+                 uint32_t address, uint32_t length, 
+                 uint32_t start);
+
+  int PrepareDraw(DrawCommand& command);
+  int PrepareDrawIndexBuffer(DrawCommand& command,
+                             uint32_t address, uint32_t length,
+                             xenos::XE_GPU_ENDIAN endianness,
+                             IndexFormat format);
+  virtual int Draw(const DrawCommand& command) = 0;
 
   virtual int Resolve() = 0;
 
+private:
+  int PopulateState(DrawCommand& command);
+  int PopulateConstantBuffers(DrawCommand& command);
+  int PopulateShaders(DrawCommand& command);
+  int PopulateInputAssembly(DrawCommand& command);
+  int PopulateSamplers(DrawCommand& command);
+  int PopulateSamplerSet(const ShaderResource::SamplerDesc& src_input,
+                         DrawCommand::SamplerInput& dst_input);
+
 protected:
   GraphicsDriver(Memory* memory);
 
   Memory* memory_;
-
-  xenos::RegisterFile register_file_;
+  RegisterFile register_file_;
   uint32_t address_translation_;
+
+  VertexShaderResource* vertex_shader_;
+  PixelShaderResource* pixel_shader_;
 };
 
 
diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc
index c0a614d35..be3e4e0de 100644
--- a/src/xenia/gpu/graphics_system.cc
+++ b/src/xenia/gpu/graphics_system.cc
@@ -11,9 +11,10 @@
 
 #include <xenia/emulator.h>
 #include <xenia/cpu/processor.h>
+#include <xenia/gpu/command_processor.h>
+#include <xenia/gpu/gpu-private.h>
 #include <xenia/gpu/graphics_driver.h>
-#include <xenia/gpu/ring_buffer_worker.h>
-#include <xenia/gpu/xenos/registers.h>
+#include <xenia/gpu/register_file.h>
 
 
 using namespace xe;
@@ -24,10 +25,10 @@ using namespace xe::gpu::xenos;
 
 GraphicsSystem::GraphicsSystem(Emulator* emulator) :
     emulator_(emulator), memory_(emulator->memory()),
-    thread_(0), running_(false), driver_(0), worker_(0),
+    thread_(nullptr), running_(false), driver_(nullptr),
+    command_processor_(nullptr),
     interrupt_callback_(0), interrupt_callback_data_(0),
-    last_interrupt_time_(0), swap_pending_(false),
-	thread_wait_(NULL) {
+    last_interrupt_time_(0), swap_pending_(false), thread_wait_(nullptr) {
   // Create the run loop used for any windows/etc.
   // This must be done on the thread we create the driver.
   run_loop_ = xe_run_loop_create();
@@ -42,7 +43,7 @@ X_STATUS GraphicsSystem::Setup() {
   processor_ = emulator_->processor();
 
   // Create worker.
-  worker_ = new RingBufferWorker(this, memory_);
+  command_processor_ = new CommandProcessor(this, memory_);
 
   // Let the processor know we want register access callbacks.
   emulator_->memory()->AddMappedRange(
@@ -77,15 +78,18 @@ void GraphicsSystem::ThreadStart() {
   // Main run loop.
   while (running_) {
     // Peek main run loop.
-    if (xe_run_loop_pump(run_loop)) {
-      break;
+    {
+      SCOPE_profile_cpu_i("gpu", "GraphicsSystemRunLoopPump");
+      if (xe_run_loop_pump(run_loop)) {
+        break;
+      }
     }
     if (!running_) {
       break;
     }
 
     // Pump worker.
-    worker_->Pump();
+    command_processor_->Pump();
 
     if (!running_) {
       break;
@@ -107,7 +111,7 @@ void GraphicsSystem::Shutdown() {
   xe_thread_join(thread_);
   xe_thread_release(thread_);
 
-  delete worker_;
+  delete command_processor_;
 
   xe_run_loop_release(run_loop_);
 }
@@ -125,17 +129,19 @@ void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t page_count) {
     Sleep(0);
   }
   XEASSERTNOTNULL(driver_);
-  worker_->Initialize(driver_, ptr, page_count);
+  command_processor_->Initialize(driver_, ptr, page_count);
 }
 
 void GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr,
                                                 uint32_t block_size) {
-  worker_->EnableReadPointerWriteBack(ptr, block_size);
+  command_processor_->EnableReadPointerWriteBack(ptr, block_size);
 }
 
 uint64_t GraphicsSystem::ReadRegister(uint64_t addr) {
   uint32_t r = addr & 0xFFFF;
-  XELOGGPU("ReadRegister(%.4X)", r);
+  if (FLAGS_trace_ring_buffer) {
+    XELOGGPU("ReadRegister(%.4X)", r);
+  }
 
   RegisterFile* regs = driver_->register_file();
 
@@ -148,31 +154,33 @@ uint64_t GraphicsSystem::ReadRegister(uint64_t addr) {
     return 1;
   }
 
-  XEASSERT(r >= 0 && r < kXEGpuRegisterCount);
+  XEASSERT(r >= 0 && r < RegisterFile::kRegisterCount);
   return regs->values[r].u32;
 }
 
 void GraphicsSystem::WriteRegister(uint64_t addr, uint64_t value) {
   uint32_t r = addr & 0xFFFF;
-  XELOGGPU("WriteRegister(%.4X, %.8X)", r, value);
+  if (FLAGS_trace_ring_buffer) {
+    XELOGGPU("WriteRegister(%.4X, %.8X)", r, value);
+  }
 
   RegisterFile* regs = driver_->register_file();
 
   switch (r) {
     case 0x0714: // CP_RB_WPTR
-      worker_->UpdateWritePointer((uint32_t)value);
+      command_processor_->UpdateWritePointer((uint32_t)value);
       break;
     default:
       XELOGW("Unknown GPU register %.4X write: %.8X", r, value);
       break;
   }
 
-  XEASSERT(r >= 0 && r < kXEGpuRegisterCount);
+  XEASSERT(r >= 0 && r < RegisterFile::kRegisterCount);
   regs->values[r].u32 = (uint32_t)value;
 }
 
 void GraphicsSystem::MarkVblank() {
-  worker_->increment_counter();
+  command_processor_->increment_counter();
 }
 
 void GraphicsSystem::DispatchInterruptCallback(
diff --git a/src/xenia/gpu/graphics_system.h b/src/xenia/gpu/graphics_system.h
index c7c72fea5..8c0a542c8 100644
--- a/src/xenia/gpu/graphics_system.h
+++ b/src/xenia/gpu/graphics_system.h
@@ -21,8 +21,8 @@ XEDECLARECLASS2(xe, cpu, Processor);
 namespace xe {
 namespace gpu {
 
+class CommandProcessor;
 class GraphicsDriver;
-class RingBufferWorker;
 
 
 class GraphicsSystem {
@@ -78,7 +78,7 @@ protected:
   bool              running_;
 
   GraphicsDriver*   driver_;
-  RingBufferWorker* worker_;
+  CommandProcessor* command_processor_;
 
   uint32_t          interrupt_callback_;
   uint32_t          interrupt_callback_data_;
diff --git a/src/xenia/gpu/nop/nop_graphics_driver.cc b/src/xenia/gpu/nop/nop_graphics_driver.cc
index 69f88fa95..b710b85e4 100644
--- a/src/xenia/gpu/nop/nop_graphics_driver.cc
+++ b/src/xenia/gpu/nop/nop_graphics_driver.cc
@@ -10,7 +10,6 @@
 #include <xenia/gpu/nop/nop_graphics_driver.h>
 
 #include <xenia/gpu/gpu-private.h>
-#include <xenia/gpu/shader_cache.h>
 
 
 using namespace xe;
@@ -19,69 +18,19 @@ using namespace xe::gpu::nop;
 using namespace xe::gpu::xenos;
 
 
-NopGraphicsDriver::NopGraphicsDriver(Memory* memory) :
-    GraphicsDriver(memory) {
-  shader_cache_ = new ShaderCache();
+NopGraphicsDriver::NopGraphicsDriver(Memory* memory)
+    : GraphicsDriver(memory), resource_cache_(nullptr) {
 }
 
 NopGraphicsDriver::~NopGraphicsDriver() {
-  delete shader_cache_;
 }
 
-void NopGraphicsDriver::Initialize() {
+int NopGraphicsDriver::Initialize() {
+  return 0;
 }
 
-void NopGraphicsDriver::InvalidateState(
-    uint32_t mask) {
-  if (mask == XE_GPU_INVALIDATE_MASK_ALL) {
-    XELOGGPU("NOP: (invalidate all)");
-  }
-  if (mask & XE_GPU_INVALIDATE_MASK_VERTEX_SHADER) {
-    XELOGGPU("NOP: invalidate vertex shader");
-  }
-  if (mask & XE_GPU_INVALIDATE_MASK_PIXEL_SHADER) {
-    XELOGGPU("NOP: invalidate pixel shader");
-  }
-}
-
-void NopGraphicsDriver::SetShader(
-    XE_GPU_SHADER_TYPE type,
-    uint32_t address,
-    uint32_t start,
-    uint32_t length) {
-  // Find or create shader in the cache.
-  uint8_t* p = memory_->Translate(address);
-  Shader* shader = shader_cache_->FindOrCreate(
-      type, p, length);
-
-  // Disassemble.
-  const char* source = shader->disasm_src();
-  if (!source) {
-    source = "<failed to disassemble>";
-  }
-  XELOGGPU("NOP: set shader %d at %0.8X (%db):\n%s",
-           type, address, length, source);
-}
-
-void NopGraphicsDriver::DrawIndexBuffer(
-    XE_GPU_PRIMITIVE_TYPE prim_type,
-    bool index_32bit, uint32_t index_count,
-    uint32_t index_base, uint32_t index_size, uint32_t endianness) {
-  XELOGGPU("NOP: draw index buffer");
-}
-
-void NopGraphicsDriver::DrawIndexAuto(
-    XE_GPU_PRIMITIVE_TYPE prim_type,
-    uint32_t index_count) {
-  XELOGGPU("NOP: draw indexed %d (%d indicies)",
-           prim_type, index_count);
-
-  // TODO(benvanik):
-  // program control
-  // context misc
-  // interpolator control
-  // shader constants / bools / integers
-  // fetch constants
+int NopGraphicsDriver::Draw(const DrawCommand& command) {
+  return 0;
 }
 
 int NopGraphicsDriver::Resolve() {
diff --git a/src/xenia/gpu/nop/nop_graphics_driver.h b/src/xenia/gpu/nop/nop_graphics_driver.h
index d345c8159..9463a0cd5 100644
--- a/src/xenia/gpu/nop/nop_graphics_driver.h
+++ b/src/xenia/gpu/nop/nop_graphics_driver.h
@@ -19,9 +19,6 @@
 
 namespace xe {
 namespace gpu {
-
-class ShaderCache;
-
 namespace nop {
 
 
@@ -30,27 +27,16 @@ public:
   NopGraphicsDriver(Memory* memory);
   virtual ~NopGraphicsDriver();
 
-  virtual void Initialize();
+  ResourceCache* resource_cache() const override { return resource_cache_; }
 
-  virtual void InvalidateState(
-      uint32_t mask);
-  virtual void SetShader(
-      xenos::XE_GPU_SHADER_TYPE type,
-      uint32_t address,
-      uint32_t start,
-      uint32_t length);
-  virtual void DrawIndexBuffer(
-      xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
-      bool index_32bit, uint32_t index_count,
-      uint32_t index_base, uint32_t index_size, uint32_t endianness);
-  virtual void DrawIndexAuto(
-      xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
-      uint32_t index_count);
+  int Initialize() override;
 
-  virtual int Resolve();
+  int Draw(const DrawCommand& command) override;
+
+  int Resolve() override;
 
 protected:
-  ShaderCache*  shader_cache_;
+  ResourceCache* resource_cache_;
 };
 
 
diff --git a/src/xenia/gpu/register_file.cc b/src/xenia/gpu/register_file.cc
index f6f119376..288881d58 100644
--- a/src/xenia/gpu/register_file.cc
+++ b/src/xenia/gpu/register_file.cc
@@ -10,8 +10,21 @@
 #include <xenia/gpu/register_file.h>
 
 
-using namespace std;
 using namespace xe;
 using namespace xe::gpu;
-using namespace xe::gpu::xenos;
 
+
+RegisterFile::RegisterFile() {
+  xe_zero_struct(values, sizeof(values));
+}
+
+const char* RegisterFile::GetRegisterName(uint32_t index) {
+  switch (index) {
+#define XE_GPU_REGISTER(index, type, name) \
+    case index: return #name;
+#include <xenia/gpu/xenos/register_table.inc>
+#undef XE_GPU_REGISTER
+    default:
+      return NULL;
+  }
+}
diff --git a/src/xenia/gpu/register_file.h b/src/xenia/gpu/register_file.h
index 2a530995f..3ab23b4fa 100644
--- a/src/xenia/gpu/register_file.h
+++ b/src/xenia/gpu/register_file.h
@@ -11,15 +11,36 @@
 #define XENIA_GPU_REGISTER_FILE_H_
 
 #include <xenia/core.h>
-#include <xenia/gpu/xenos/xenos.h>
 
 
 namespace xe {
 namespace gpu {
 
 
+enum Register {
+#define XE_GPU_REGISTER(index, type, name) \
+    XE_GPU_REG_##name = index,
+#include <xenia/gpu/xenos/register_table.inc>
+#undef XE_GPU_REGISTER
+};
+
+
 class RegisterFile {
 public:
+  RegisterFile();
+
+  const char* GetRegisterName(uint32_t index);
+
+  static const size_t kRegisterCount = 0x5003;
+  union RegisterValue {
+    uint32_t  u32;
+    float     f32;
+  };
+  RegisterValue values[kRegisterCount];
+
+  RegisterValue& operator[](Register reg) {
+    return values[reg];
+  }
 };
 
 
diff --git a/src/xenia/gpu/resource.cc b/src/xenia/gpu/resource.cc
index 88966aac5..35ef82bb6 100644
--- a/src/xenia/gpu/resource.cc
+++ b/src/xenia/gpu/resource.cc
@@ -15,3 +15,23 @@ using namespace xe;
 using namespace xe::gpu;
 using namespace xe::gpu::xenos;
 
+
+HashedResource::HashedResource(const MemoryRange& memory_range)
+    : memory_range_(memory_range) {
+}
+
+HashedResource::~HashedResource() = default;
+
+PagedResource::PagedResource(const MemoryRange& memory_range)
+    : memory_range_(memory_range), dirtied_(true) {
+}
+
+PagedResource::~PagedResource() = default;
+
+void PagedResource::MarkDirty(uint32_t lo_address, uint32_t hi_address) {
+  dirtied_ = true;
+}
+
+StaticResource::StaticResource() = default;
+
+StaticResource::~StaticResource() = default;
diff --git a/src/xenia/gpu/resource.h b/src/xenia/gpu/resource.h
index e9a0be7fa..1fb56b3d8 100644
--- a/src/xenia/gpu/resource.h
+++ b/src/xenia/gpu/resource.h
@@ -18,8 +18,82 @@ namespace xe {
 namespace gpu {
 
 
+struct MemoryRange {
+  uint8_t* host_base;
+  uint32_t guest_base;
+  uint32_t length;
+
+  MemoryRange() : host_base(nullptr), guest_base(0), length(0) {}
+  MemoryRange(const MemoryRange& other)
+      : host_base(other.host_base), guest_base(other.guest_base),
+        length(other.length) {}
+  MemoryRange(uint8_t* _host_base, uint32_t _guest_base, uint32_t _length)
+      : host_base(_host_base), guest_base(_guest_base), length(_length) {}
+};
+
+
 class Resource {
 public:
+  virtual ~Resource() = default;
+
+  virtual void* handle() const = 0;
+
+  template <typename T>
+  T* handle_as() {
+    return reinterpret_cast<T*>(handle());
+  }
+
+protected:
+  Resource() = default;
+
+  // last use/LRU stuff
+};
+
+
+class HashedResource : public Resource {
+public:
+  ~HashedResource() override;
+
+  const MemoryRange& memory_range() const { return memory_range_; }
+
+protected:
+  HashedResource(const MemoryRange& memory_range);
+
+  MemoryRange memory_range_;
+  // key
+};
+
+
+class PagedResource : public Resource {
+public:
+  ~PagedResource() override;
+
+  const MemoryRange& memory_range() const { return memory_range_; }
+
+  template <typename T>
+  bool Equals(const T& info) {
+    return Equals(&info, sizeof(info));
+  }
+  virtual bool Equals(const void* info_ptr, size_t info_length) = 0;
+
+  bool is_dirty() const { return dirtied_; }
+  void MarkDirty(uint32_t lo_address, uint32_t hi_address);
+
+protected:
+  PagedResource(const MemoryRange& memory_range);
+
+  MemoryRange memory_range_;
+  bool dirtied_;
+  // dirtied pages list
+};
+
+
+class StaticResource : public Resource {
+public:
+  ~StaticResource() override;
+
+protected:
+  StaticResource();
 };
 
 
diff --git a/src/xenia/gpu/resource_cache.cc b/src/xenia/gpu/resource_cache.cc
index 7a9a1c24d..c317a12be 100644
--- a/src/xenia/gpu/resource_cache.cc
+++ b/src/xenia/gpu/resource_cache.cc
@@ -15,3 +15,140 @@ using namespace xe;
 using namespace xe::gpu;
 using namespace xe::gpu::xenos;
 
+
+ResourceCache::ResourceCache(Memory* memory)
+    : memory_(memory) {
+}
+
+ResourceCache::~ResourceCache() {
+  for (auto it = resources_.begin(); it != resources_.end(); ++it) {
+    Resource* resource = *it;
+    delete resource;
+  }
+  resources_.clear();
+}
+
+VertexShaderResource* ResourceCache::FetchVertexShader(
+    const MemoryRange& memory_range,
+    const VertexShaderResource::Info& info) {
+  return FetchHashedResource<VertexShaderResource>(
+      memory_range, info, &ResourceCache::CreateVertexShader);
+}
+
+PixelShaderResource* ResourceCache::FetchPixelShader(
+    const MemoryRange& memory_range,
+    const PixelShaderResource::Info& info) {
+  return FetchHashedResource<PixelShaderResource>(
+      memory_range, info, &ResourceCache::CreatePixelShader);
+}
+
+TextureResource* ResourceCache::FetchTexture(
+    const MemoryRange& memory_range,
+    const TextureResource::Info& info) {
+  auto resource = FetchPagedResource<TextureResource>(
+      memory_range, info, &ResourceCache::CreateTexture);
+  if (!resource) {
+    return nullptr;
+  }
+  if (resource->Prepare()) {
+    XELOGE("Unable to prepare texture");
+    return nullptr;
+  }
+  return resource;
+}
+
+SamplerStateResource* ResourceCache::FetchSamplerState(
+    const SamplerStateResource::Info& info) {
+  auto key = info.hash();
+  auto it = static_resources_.find(key);
+  if (it != static_resources_.end()) {
+    return static_cast<SamplerStateResource*>(it->second);
+  }
+  auto resource = CreateSamplerState(info);
+  if (resource->Prepare()) {
+    XELOGE("Unable to prepare sampler state");
+    return nullptr;
+  }
+  static_resources_.insert({ key, resource });
+  resources_.push_back(resource);
+  return resource;
+}
+
+IndexBufferResource* ResourceCache::FetchIndexBuffer(
+    const MemoryRange& memory_range,
+    const IndexBufferResource::Info& info) {
+  auto resource = FetchPagedResource<IndexBufferResource>(
+      memory_range, info, &ResourceCache::CreateIndexBuffer);
+  if (!resource) {
+    return nullptr;
+  }
+  if (resource->Prepare()) {
+    XELOGE("Unable to prepare index buffer");
+    return nullptr;
+  }
+  return resource;
+}
+
+VertexBufferResource* ResourceCache::FetchVertexBuffer(
+    const MemoryRange& memory_range,
+    const VertexBufferResource::Info& info) {
+  auto resource = FetchPagedResource<VertexBufferResource>(
+      memory_range, info, &ResourceCache::CreateVertexBuffer);
+  if (!resource) {
+    return nullptr;
+  }
+  if (resource->Prepare()) {
+    XELOGE("Unable to prepare vertex buffer");
+    return nullptr;
+  }
+  return resource;
+}
+
+uint64_t ResourceCache::HashRange(const MemoryRange& memory_range) {
+  // We could do something smarter here to potentially early exit.
+  return xe_hash64(memory_range.host_base, memory_range.length);
+}
+
+void ResourceCache::SyncRange(uint32_t address, int length) {
+  // Scan the page table in sync with our resource list. This means
+  // we have O(n) complexity for updates, though we could definitely
+  // make this faster/cleaner.
+  // TODO(benvanik): actually do this right.
+  // For now we assume the page table in the range of our resources
+  // will not be changing, which allows us to do a foreach(res) and reload
+  // and then clear the table.
+
+  // total bytes = (512 * 1024 * 1024) / (16 * 1024) = 32768
+  // each byte = 1 page
+  // Walk as qwords so we can clear things up faster.
+  uint64_t* page_table = reinterpret_cast<uint64_t*>(
+      memory_->Translate(memory_->page_table()));
+  int page_size = 16 * 1024;  // 16KB pages
+
+  uint32_t lo_address = address % 0x20000000;
+  uint32_t hi_address = lo_address + length;
+  hi_address = (hi_address / page_size) * page_size + page_size;
+  int start_page = lo_address / page_size;
+  int end_page = hi_address / page_size;
+
+  auto it = paged_resources_.upper_bound(lo_address);
+  auto end_it = paged_resources_.lower_bound(hi_address);
+  while (it != end_it) {
+    const auto& memory_range = it->second->memory_range();
+    int lo_page = (memory_range.guest_base % 0x20000000) / page_size;
+    int hi_page = lo_page + (memory_range.length / page_size);
+    for (int i = lo_page / 8; i <= hi_page / 8; ++i) {
+      uint64_t page_flags = page_table[i];
+      if (page_flags) {
+        // Dirty!
+        it->second->MarkDirty(i * 8 * page_size, (i * 8 + 7) * page_size);
+      }
+    }
+    ++it;
+  }
+
+  // Reset page table.
+  for (auto i = start_page / 8; i <= end_page / 8; ++i) {
+    page_table[i] = 0;
+  }
+}
diff --git a/src/xenia/gpu/resource_cache.h b/src/xenia/gpu/resource_cache.h
index 7caaad51f..be95f0861 100644
--- a/src/xenia/gpu/resource_cache.h
+++ b/src/xenia/gpu/resource_cache.h
@@ -10,7 +10,14 @@
 #ifndef XENIA_GPU_RESOURCE_CACHE_H_
 #define XENIA_GPU_RESOURCE_CACHE_H_
 
+#include <map>
+
 #include <xenia/core.h>
+#include <xenia/gpu/buffer_resource.h>
+#include <xenia/gpu/resource.h>
+#include <xenia/gpu/sampler_state_resource.h>
+#include <xenia/gpu/shader_resource.h>
+#include <xenia/gpu/texture_resource.h>
 #include <xenia/gpu/xenos/xenos.h>
 
 
@@ -20,6 +27,96 @@ namespace gpu {
 
 class ResourceCache {
 public:
+  virtual ~ResourceCache();
+
+  VertexShaderResource* FetchVertexShader(
+      const MemoryRange& memory_range,
+      const VertexShaderResource::Info& info);
+  PixelShaderResource* FetchPixelShader(
+      const MemoryRange& memory_range,
+      const PixelShaderResource::Info& info);
+  
+  TextureResource* FetchTexture(
+      const MemoryRange& memory_range,
+      const TextureResource::Info& info);
+  SamplerStateResource* FetchSamplerState(
+      const SamplerStateResource::Info& info);
+
+  IndexBufferResource* FetchIndexBuffer(
+      const MemoryRange& memory_range,
+      const IndexBufferResource::Info& info);
+  VertexBufferResource* FetchVertexBuffer(
+      const MemoryRange& memory_range,
+      const VertexBufferResource::Info& info);
+
+  uint64_t HashRange(const MemoryRange& memory_range);
+
+  void SyncRange(uint32_t address, int length);
+
+protected:
+  ResourceCache(Memory* memory);
+
+  template <typename T, typename V>
+  T* FetchHashedResource(const MemoryRange& memory_range,
+                         const typename T::Info& info,
+                         const V& factory) {
+    // TODO(benvanik): if there's no way it's changed and it's been checked,
+    //     just lookup. This way we don't rehash 100x a frame.
+    auto key = HashRange(memory_range);
+    auto it = hashed_resources_.find(key);
+    if (it != hashed_resources_.end()) {
+      return static_cast<T*>(it->second);
+    }
+    auto resource = (this->*factory)(memory_range, info);
+    hashed_resources_.insert({ key, resource });
+    resources_.push_back(resource);
+    return resource;
+  }
+
+  template <typename T, typename V>
+  T* FetchPagedResource(const MemoryRange& memory_range,
+                        const typename T::Info& info,
+                        const V& factory) {
+    uint32_t lo_address = memory_range.guest_base % 0x20000000;
+    auto key = uint64_t(lo_address);
+    auto range = paged_resources_.equal_range(key);
+    for (auto it = range.first; it != range.second; ++it) {
+      if (it->second->memory_range().length == memory_range.length &&
+          it->second->Equals(info)) {
+        return static_cast<T*>(it->second);
+      }
+    }
+    auto resource = (this->*factory)(memory_range, info);
+    paged_resources_.insert({ key, resource });
+    resources_.push_back(resource);
+    return resource;
+  }
+  
+  virtual VertexShaderResource* CreateVertexShader(
+      const MemoryRange& memory_range,
+      const VertexShaderResource::Info& info) = 0;
+  virtual PixelShaderResource* CreatePixelShader(
+      const MemoryRange& memory_range,
+      const PixelShaderResource::Info& info) = 0;
+  virtual TextureResource* CreateTexture(
+      const MemoryRange& memory_range,
+      const TextureResource::Info& info) = 0;
+  virtual SamplerStateResource* CreateSamplerState(
+      const SamplerStateResource::Info& info) = 0;
+  virtual IndexBufferResource* CreateIndexBuffer(
+      const MemoryRange& memory_range,
+      const IndexBufferResource::Info& info) = 0;
+  virtual VertexBufferResource* CreateVertexBuffer(
+      const MemoryRange& memory_range,
+      const VertexBufferResource::Info& info) = 0;
+
+private:
+  Memory* memory_;
+
+  std::vector<Resource*> resources_;
+  std::unordered_map<uint64_t, HashedResource*> hashed_resources_;
+  std::unordered_map<uint64_t, StaticResource*> static_resources_;
+  std::multimap<uint64_t, PagedResource*> paged_resources_;
 };
 
 
diff --git a/src/xenia/gpu/ring_buffer_worker.cc b/src/xenia/gpu/ring_buffer_worker.cc
deleted file mode 100644
index 9999601bb..000000000
--- a/src/xenia/gpu/ring_buffer_worker.cc
+++ /dev/null
@@ -1,741 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/ring_buffer_worker.h>
-
-#include <xenia/gpu/gpu-private.h>
-#include <xenia/gpu/graphics_driver.h>
-#include <xenia/gpu/graphics_system.h>
-#include <xenia/gpu/xenos/packets.h>
-#include <xenia/gpu/xenos/registers.h>
-
-
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::xenos;
-
-
-#define XETRACERB(fmt, ...) if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__)
-
-
-RingBufferWorker::RingBufferWorker(
-    GraphicsSystem* graphics_system, Memory* memory) :
-    graphics_system_(graphics_system), memory_(memory), driver_(0) {
-  write_ptr_index_event_ = CreateEvent(
-      NULL, FALSE, FALSE, NULL);
-
-  primary_buffer_ptr_     = 0;
-  primary_buffer_size_    = 0;
-  read_ptr_index_         = 0;
-  read_ptr_update_freq_   = 0;
-  read_ptr_writeback_ptr_ = 0;
-  write_ptr_index_        = 0;
-  write_ptr_max_index_    = 0;
-
-  LARGE_INTEGER perf_counter;
-  QueryPerformanceCounter(&perf_counter);
-  time_base_ = perf_counter.QuadPart;
-  counter_ = 0;
-}
-
-RingBufferWorker::~RingBufferWorker() {
-  SetEvent(write_ptr_index_event_);
-  CloseHandle(write_ptr_index_event_);
-}
-
-uint64_t RingBufferWorker::QueryTime() {
-  LARGE_INTEGER perf_counter;
-  QueryPerformanceCounter(&perf_counter);
-  return perf_counter.QuadPart - time_base_;
-}
-
-void RingBufferWorker::Initialize(GraphicsDriver* driver,
-                                  uint32_t ptr, uint32_t page_count) {
-  driver_               = driver;
-  primary_buffer_ptr_   = ptr;
-  // Not sure this is correct, but it's a way to take the page_count back to
-  // the number of bytes allocated by the physical alloc.
-  uint32_t original_size = 1 << (0x1C - page_count - 1);
-  primary_buffer_size_  = original_size;
-  read_ptr_index_       = 0;
-
-  // Tell the driver what to use for translation.
-  driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF);
-}
-
-void RingBufferWorker::EnableReadPointerWriteBack(uint32_t ptr,
-                                                  uint32_t block_size) {
-  // CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C
-  // ptr = RB_RPTR_ADDR, pointer to write back the address to.
-  read_ptr_writeback_ptr_ = (primary_buffer_ptr_ & ~0x1FFFFFFF) + ptr;
-  // CP_RB_CNTL Ring Buffer Control 0x704
-  // block_size = RB_BLKSZ, number of quadwords read between updates of the
-  //              read pointer.
-  read_ptr_update_freq_ = (uint32_t)pow(2.0, (double)block_size) / 4;
-}
-
-void RingBufferWorker::UpdateWritePointer(uint32_t value) {
-  write_ptr_max_index_  = MAX(write_ptr_max_index_, value);
-  write_ptr_index_      = value;
-  SetEvent(write_ptr_index_event_);
-}
-
-void RingBufferWorker::Pump() {
-  uint8_t* p = memory_->membase();
-
-  if (write_ptr_index_ == 0xBAADF00D ||
-      read_ptr_index_ == write_ptr_index_) {
-    // Check if the pointer has moved.
-    // We wait a short bit here to yield time. Since we are also running the
-    // main window display we don't want to pause too long, though.
-    const int wait_time_ms = 1;
-    if (WaitForSingleObject(write_ptr_index_event_,
-                            wait_time_ms) == WAIT_TIMEOUT) {
-      return;
-    }
-  }
-
-  // Bring local so we don't have to worry about them changing out from under
-  // us.
-  uint32_t write_ptr_index = write_ptr_index_;
-  uint32_t write_ptr_max_index = write_ptr_max_index_;
-  if (read_ptr_index_ == write_ptr_index) {
-    return;
-  }
-
-  // Process the new commands.
-  XETRACERB("Ring buffer thread work");
-
-  // Execute. Note that we handle wraparound transparently.
-  ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index);
-  read_ptr_index_ = write_ptr_index;
-
-  // TODO(benvanik): use read_ptr_update_freq_ and only issue after moving
-  //     that many indices.
-  if (read_ptr_writeback_ptr_) {
-    XESETUINT32BE(p + read_ptr_writeback_ptr_, read_ptr_index_);
-  }
-}
-
-void RingBufferWorker::ExecutePrimaryBuffer(
-    uint32_t start_index, uint32_t end_index) {
-  SCOPE_profile_cpu_f("gpu");
-
-  // Adjust pointer base.
-  uint32_t ptr = primary_buffer_ptr_ + start_index * 4;
-  ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF);
-  uint32_t end_ptr = primary_buffer_ptr_ + end_index * 4;
-  end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF);
-
-  XETRACERB("[%.8X] ExecutePrimaryBuffer(%dw -> %dw)",
-            ptr, start_index, end_index);
-
-  // Execute commands!
-  PacketArgs args;
-  args.ptr          = ptr;
-  args.base_ptr     = primary_buffer_ptr_;
-  args.max_address  = primary_buffer_ptr_ + primary_buffer_size_ * 4;
-  args.ptr_mask     = (primary_buffer_size_ / 4) - 1;
-  uint32_t n = 0;
-  while (args.ptr != end_ptr) {
-    n += ExecutePacket(args);
-  }
-  if (end_index > start_index) {
-    XEASSERT(n == (end_index - start_index));
-  }
-
-  XETRACERB("           ExecutePrimaryBuffer End");
-}
-
-void RingBufferWorker::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) {
-  XETRACERB("[%.8X] ExecuteIndirectBuffer(%dw)", ptr, length);
-
-  // Execute commands!
-  PacketArgs args;
-  args.ptr          = ptr;
-  args.base_ptr     = ptr;
-  args.max_address  = ptr + length * 4;
-  args.ptr_mask     = 0;
-  for (uint32_t n = 0; n < length;) {
-    n += ExecutePacket(args);
-    XEASSERT(n <= length);
-  }
-
-  XETRACERB("           ExecuteIndirectBuffer End");
-}
-
-#define LOG_DATA(count) \
-  for (uint32_t __m = 0; __m < count; __m++) { \
-    XETRACERB("[%.8X]   %.8X", \
-              packet_ptr + (1 + __m) * 4, \
-              XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \
-  }
-
-void RingBufferWorker::AdvancePtr(PacketArgs& args, uint32_t n) {
-  args.ptr = args.ptr + n * 4;
-  if (args.ptr_mask) {
-    args.ptr =
-        args.base_ptr + (((args.ptr - args.base_ptr) / 4) & args.ptr_mask) * 4;
-  }
-}
-#define ADVANCE_PTR(n) AdvancePtr(args, n)
-#define PEEK_PTR() \
-    XEGETUINT32BE(p + args.ptr)
-#define READ_PTR() \
-    XEGETUINT32BE(p + args.ptr); ADVANCE_PTR(1);
-
-uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
-  uint8_t* p = memory_->membase();
-  RegisterFile* regs = driver_->register_file();
-
-  uint32_t packet_ptr = args.ptr;
-  const uint8_t* packet_base = p + packet_ptr;
-  const uint32_t packet = PEEK_PTR();
-  ADVANCE_PTR(1);
-  const uint32_t packet_type = packet >> 30;
-  if (packet == 0) {
-    XETRACERB("[%.8X] Packet(%.8X): 0?",
-              packet_ptr, packet);
-    return 1;
-  }
-
-  switch (packet_type) {
-  case 0x00:
-    {
-      // Type-0 packet.
-      // Write count registers in sequence to the registers starting at
-      // (base_index << 2).
-      XETRACERB("[%.8X] Packet(%.8X): set registers:",
-                packet_ptr, packet);
-      uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
-      uint32_t base_index = (packet & 0x7FFF);
-      uint32_t write_one_reg = (packet >> 15) & 0x1;
-      for (uint32_t m = 0; m < count; m++) {
-        uint32_t reg_data = PEEK_PTR();
-        uint32_t target_index = write_one_reg ? base_index : base_index + m;
-        const char* reg_name = xenos::GetRegisterName(target_index);
-        XETRACERB("[%.8X]   %.8X -> %.4X %s",
-                  args.ptr,
-                  reg_data, target_index, reg_name ? reg_name : "");
-        ADVANCE_PTR(1);
-        WriteRegister(packet_ptr, target_index, reg_data);
-      }
-      return 1 + count;
-    }
-    break;
-  case 0x01:
-    {
-      // Type-1 packet.
-      // Contains two registers of data. Type-0 should be more common.
-      XETRACERB("[%.8X] Packet(%.8X): set registers:",
-                packet_ptr, packet);
-      uint32_t reg_index_1 = packet & 0x7FF;
-      uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
-      uint32_t reg_ptr_1 = args.ptr;
-      uint32_t reg_data_1 = READ_PTR();
-      uint32_t reg_ptr_2 = args.ptr;
-      uint32_t reg_data_2 = READ_PTR();
-      const char* reg_name_1 = xenos::GetRegisterName(reg_index_1);
-      const char* reg_name_2 = xenos::GetRegisterName(reg_index_2);
-      XETRACERB("[%.8X]   %.8X -> %.4X %s",
-                reg_ptr_1,
-                reg_data_1, reg_index_1, reg_name_1 ? reg_name_1 : "");
-      XETRACERB("[%.8X]   %.8X -> %.4X %s",
-                reg_ptr_2,
-                reg_data_2, reg_index_2, reg_name_2 ? reg_name_2 : "");
-      WriteRegister(packet_ptr, reg_index_1, reg_data_1);
-      WriteRegister(packet_ptr, reg_index_2, reg_data_2);
-      return 1 + 2;
-    }
-    break;
-  case 0x02:
-    // Type-2 packet.
-    // No-op. Do nothing.
-    XETRACERB("[%.8X] Packet(%.8X): padding",
-              packet_ptr, packet);
-    return 1;
-  case 0x03:
-    {
-      // Type-3 packet.
-      uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
-      uint32_t opcode = (packet >> 8) & 0x7F;
-      // & 1 == predicate, maybe?
-
-      switch (opcode) {
-      case PM4_ME_INIT:
-        // initialize CP's micro-engine
-        XETRACERB("[%.8X] Packet(%.8X): PM4_ME_INIT",
-                  packet_ptr, packet);
-        LOG_DATA(count);
-        ADVANCE_PTR(count);
-        break;
-
-      case PM4_NOP:
-        // skip N 32-bit words to get to the next packet
-        // No-op, ignore some data.
-        XETRACERB("[%.8X] Packet(%.8X): PM4_NOP",
-                  packet_ptr, packet);
-        LOG_DATA(count);
-        ADVANCE_PTR(count);
-        break;
-
-      case PM4_INTERRUPT:
-        // generate interrupt from the command stream
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_INTERRUPT",
-                    packet_ptr, packet);
-          LOG_DATA(count);
-          uint32_t cpu_mask = READ_PTR();
-          for (int n = 0; n < 6; n++) {
-            if (cpu_mask & (1 << n)) {
-              graphics_system_->DispatchInterruptCallback(1, n);
-            }
-          }
-        }
-        break;
-
-      case PM4_INDIRECT_BUFFER:
-        // indirect buffer dispatch
-        {
-          uint32_t list_ptr = READ_PTR();
-          uint32_t list_length = READ_PTR();
-          XETRACERB("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
-                    packet_ptr, packet, list_ptr, list_length);
-          ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length);
-        }
-        break;
-
-      case PM4_WAIT_REG_MEM:
-        // wait until a register or memory location is a specific value
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM",
-                    packet_ptr, packet);
-          LOG_DATA(count);
-          uint32_t wait_info = READ_PTR();
-          uint32_t poll_reg_addr = READ_PTR();
-          uint32_t ref = READ_PTR();
-          uint32_t mask = READ_PTR();
-          uint32_t wait = READ_PTR();
-          bool matched = false;
-          do {
-            uint32_t value;
-            if (wait_info & 0x10) {
-              // Memory.
-              XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
-              poll_reg_addr &= ~0x3;
-              value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr));
-              value = GpuSwap(value, endianness);
-            } else {
-              // Register.
-              XEASSERT(poll_reg_addr < kXEGpuRegisterCount);
-              value = regs->values[poll_reg_addr].u32;
-            }
-            switch (wait_info & 0x7) {
-            case 0x0: // Never.
-              matched = false;
-              break;
-            case 0x1: // Less than reference.
-              matched = (value & mask) < ref;
-              break;
-            case 0x2: // Less than or equal to reference.
-              matched = (value & mask) <= ref;
-              break;
-            case 0x3: // Equal to reference.
-              matched = (value & mask) == ref;
-              break;
-            case 0x4: // Not equal to reference.
-              matched = (value & mask) != ref;
-              break;
-            case 0x5: // Greater than or equal to reference.
-              matched = (value & mask) >= ref;
-              break;
-            case 0x6: // Greater than reference.
-              matched = (value & mask) > ref;
-              break;
-            case 0x7: // Always
-              matched = true;
-              break;
-            }
-            if (!matched) {
-              // Wait.
-              if (wait >= 0x100) {
-                Sleep(wait / 0x100);
-              } else {
-                SwitchToThread();
-              }
-            }
-          } while (!matched);
-        }
-        break;
-
-      case PM4_REG_RMW:
-        // register read/modify/write
-        // ? (used during shader upload and edram setup)
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_REG_RMW",
-                    packet_ptr, packet);
-          LOG_DATA(count);
-          uint32_t rmw_info = READ_PTR();
-          uint32_t and_mask = READ_PTR();
-          uint32_t or_mask = READ_PTR();
-          uint32_t value = regs->values[rmw_info & 0x1FFF].u32;
-          if ((rmw_info >> 30) & 0x1) {
-            // | reg
-            value |= regs->values[or_mask & 0x1FFF].u32;
-          } else {
-            // | imm
-            value |= or_mask;
-          }
-          if ((rmw_info >> 31) & 0x1) {
-            // & reg
-            value &= regs->values[and_mask & 0x1FFF].u32;
-          } else {
-            // & imm
-            value &= and_mask;
-          }
-          WriteRegister(packet_ptr, rmw_info & 0x1FFF, value);
-        }
-        break;
-
-      case PM4_COND_WRITE:
-        // conditional write to memory or register
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_COND_WRITE",
-                    packet_ptr, packet);
-          LOG_DATA(count);
-          uint32_t wait_info = READ_PTR();
-          uint32_t poll_reg_addr = READ_PTR();
-          uint32_t ref = READ_PTR();
-          uint32_t mask = READ_PTR();
-          uint32_t write_reg_addr = READ_PTR();
-          uint32_t write_data = READ_PTR();
-          uint32_t value;
-          if (wait_info & 0x10) {
-            // Memory.
-            XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
-            poll_reg_addr &= ~0x3;
-            value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr));
-            value = GpuSwap(value, endianness);
-          } else {
-            // Register.
-            XEASSERT(poll_reg_addr < kXEGpuRegisterCount);
-            value = regs->values[poll_reg_addr].u32;
-          }
-          bool matched = false;
-          switch (wait_info & 0x7) {
-          case 0x0: // Never.
-            matched = false;
-            break;
-          case 0x1: // Less than reference.
-            matched = (value & mask) < ref;
-            break;
-          case 0x2: // Less than or equal to reference.
-            matched = (value & mask) <= ref;
-            break;
-          case 0x3: // Equal to reference.
-            matched = (value & mask) == ref;
-            break;
-          case 0x4: // Not equal to reference.
-            matched = (value & mask) != ref;
-            break;
-          case 0x5: // Greater than or equal to reference.
-            matched = (value & mask) >= ref;
-            break;
-          case 0x6: // Greater than reference.
-            matched = (value & mask) > ref;
-            break;
-          case 0x7: // Always
-            matched = true;
-            break;
-          }
-          if (matched) {
-            // Write.
-            if (wait_info & 0x100) {
-              // Memory.
-              XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(write_reg_addr & 0x3);
-              write_reg_addr &= ~0x3;
-              write_data = GpuSwap(write_data, endianness);
-              XESETUINT32LE(p + GpuToCpu(packet_ptr, write_reg_addr),
-                            write_data);
-            } else {
-              // Register.
-              WriteRegister(packet_ptr, write_reg_addr, write_data);
-            }
-          }
-        }
-        break;
-
-      case PM4_EVENT_WRITE:
-        // generate an event that creates a write to memory when completed
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)",
-                    packet_ptr, packet);
-          LOG_DATA(count);
-          uint32_t initiator = READ_PTR();
-          if (count == 1) {
-            // Just an event flag? Where does this write?
-          } else {
-            // Write to an address.
-            XEASSERTALWAYS();
-            ADVANCE_PTR(count - 1);
-          }
-        }
-        break;
-      case PM4_EVENT_WRITE_SHD:
-        // generate a VS|PS_done event
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD",
-                    packet_ptr, packet);
-          LOG_DATA(count);
-          uint32_t initiator = READ_PTR();
-          uint32_t address = READ_PTR();
-          uint32_t value = READ_PTR();
-          // Writeback initiator.
-          WriteRegister(packet_ptr, XE_GPU_REG_VGT_EVENT_INITIATOR,
-                        initiator & 0x1F);
-          uint32_t data_value;
-          if ((initiator >> 31) & 0x1) {
-            // Write counter (GPU vblank counter?).
-            data_value = counter_;
-          } else {
-            // Write value.
-            data_value = value;
-          }
-          XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(address & 0x3);
-          address &= ~0x3;
-          data_value = GpuSwap(data_value, endianness);
-          XESETUINT32LE(p + GpuToCpu(address), data_value);
-        }
-        break;
-
-      case PM4_DRAW_INDX:
-        // initiate fetch of index buffer and draw
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_DRAW_INDX",
-                    packet_ptr, packet);
-          LOG_DATA(count);
-          // d0 = viz query info
-          uint32_t d0 = READ_PTR();
-          uint32_t d1 = READ_PTR();
-          uint32_t index_count = d1 >> 16;
-          uint32_t prim_type = d1 & 0x3F;
-          uint32_t src_sel = (d1 >> 6) & 0x3;
-          if (src_sel == 0x0) {
-            uint32_t index_base = READ_PTR();
-            uint32_t index_size = READ_PTR();
-            uint32_t endianness = index_size >> 29;
-            index_size &= 0x00FFFFFF;
-            bool index_32bit = (d1 >> 11) & 0x1;
-            index_size *= index_32bit ? 4 : 2;
-            driver_->DrawIndexBuffer(
-                (XE_GPU_PRIMITIVE_TYPE)prim_type,
-                index_32bit, index_count, index_base, index_size, endianness);
-          } else if (src_sel == 0x2) {
-            driver_->DrawIndexAuto(
-                (XE_GPU_PRIMITIVE_TYPE)prim_type,
-                index_count);
-          } else {
-            // Unknown source select.
-            XEASSERTALWAYS();
-          }
-        }
-        break;
-      case PM4_DRAW_INDX_2:
-        // draw using supplied indices in packet
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2",
-                    packet_ptr, packet);
-          LOG_DATA(count);
-          uint32_t d0 = READ_PTR();
-          uint32_t index_count = d0 >> 16;
-          uint32_t prim_type = d0 & 0x3F;
-          uint32_t src_sel = (d0 >> 6) & 0x3;
-          XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex'
-          driver_->DrawIndexAuto(
-              (XE_GPU_PRIMITIVE_TYPE)prim_type,
-              index_count);
-        }
-        break;
-
-      case PM4_SET_CONSTANT:
-        // load constant into chip and to memory
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_SET_CONSTANT",
-                    packet_ptr, packet);
-          // PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg)))
-          //                                     reg - 0x2000
-          uint32_t offset_type = READ_PTR();
-          uint32_t index = offset_type & 0x7FF;
-          uint32_t type = (offset_type >> 16) & 0xFF;
-          switch (type) {
-          case 0x4: // REGISTER
-            index += 0x2000; // registers
-            for (uint32_t n = 0; n < count - 1; n++, index++) {
-              uint32_t data = READ_PTR();
-              const char* reg_name = xenos::GetRegisterName(index);
-              XETRACERB("[%.8X]   %.8X -> %.4X %s",
-                        packet_ptr + (1 + n) * 4,
-                        data, index, reg_name ? reg_name : "");
-              WriteRegister(packet_ptr, index, data);
-            }
-            break;
-          default:
-            XEASSERTALWAYS();
-            break;
-          }
-        }
-        break;
-      case PM4_LOAD_ALU_CONSTANT:
-        // load constants from memory
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_LOAD_ALU_CONSTANT",
-                    packet_ptr, packet);
-          uint32_t address = READ_PTR();
-          address &= 0x3FFFFFFF;
-          uint32_t offset_type = READ_PTR();
-          uint32_t index = offset_type & 0x7FF;
-          uint32_t size = READ_PTR();
-          size &= 0xFFF;
-          index += 0x4000; // alu constants
-          for (uint32_t n = 0; n < size; n++, index++) {
-            uint32_t data = XEGETUINT32BE(
-                p + GpuToCpu(packet_ptr, address + n * 4));
-            const char* reg_name = xenos::GetRegisterName(index);
-            XETRACERB("[%.8X]   %.8X -> %.4X %s",
-                      packet_ptr,
-                      data, index, reg_name ? reg_name : "");
-            WriteRegister(packet_ptr, index, data);
-          }
-        }
-        break;
-
-      case PM4_IM_LOAD:
-        // load sequencer instruction memory (pointer-based)
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_IM_LOAD",
-                    packet_ptr, packet);
-          LOG_DATA(count);
-          uint32_t addr_type = READ_PTR();
-          uint32_t type = addr_type & 0x3;
-          uint32_t addr = addr_type & ~0x3;
-          uint32_t start_size = READ_PTR();
-          uint32_t start = start_size >> 16;
-          uint32_t size = start_size & 0xFFFF; // dwords
-          XEASSERT(start == 0);
-          driver_->SetShader(
-              (XE_GPU_SHADER_TYPE)type,
-              GpuToCpu(packet_ptr, addr),
-              start,
-              size * 4);
-        }
-        break;
-      case PM4_IM_LOAD_IMMEDIATE:
-        // load sequencer instruction memory (code embedded in packet)
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE",
-                    packet_ptr, packet);
-          LOG_DATA(count);
-          uint32_t type = READ_PTR();
-          uint32_t start_size = READ_PTR();
-          uint32_t start = start_size >> 16;
-          uint32_t size = start_size & 0xFFFF; // dwords
-          XEASSERT(start == 0);
-          // TODO(benvanik): figure out if this could wrap.
-          XEASSERT(args.ptr + size * 4 < args.max_address);
-          driver_->SetShader(
-              (XE_GPU_SHADER_TYPE)type,
-              args.ptr,
-              start,
-              size * 4);
-          ADVANCE_PTR(size);
-        }
-        break;
-
-      case PM4_INVALIDATE_STATE:
-        // selective invalidation of state pointers
-        {
-          XETRACERB("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE",
-                    packet_ptr, packet);
-          LOG_DATA(count);
-          uint32_t mask = READ_PTR();
-          driver_->InvalidateState(mask);
-        }
-        break;
-
-      case PM4_SET_BIN_MASK_LO:
-        {
-          uint32_t value = READ_PTR();
-          XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X",
-                    packet_ptr, packet, value);
-        }
-        break;
-      case PM4_SET_BIN_MASK_HI:
-        {
-          uint32_t value = READ_PTR();
-          XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X",
-                    packet_ptr, packet, value);
-        }
-        break;
-      case PM4_SET_BIN_SELECT_LO:
-        {
-          uint32_t value = READ_PTR();
-          XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X",
-                    packet_ptr, packet, value);
-        }
-        break;
-      case PM4_SET_BIN_SELECT_HI:
-        {
-          uint32_t value = READ_PTR();
-          XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X",
-                    packet_ptr, packet, value);
-        }
-        break;
-
-      // Ignored packets - useful if breaking on the default handler below.
-      case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000
-        XETRACERB("[%.8X] Packet(%.8X): unknown!",
-                  packet_ptr, packet);
-        LOG_DATA(count);
-        ADVANCE_PTR(count);
-        break;
-
-      default:
-        XETRACERB("[%.8X] Packet(%.8X): unknown!",
-                  packet_ptr, packet);
-        LOG_DATA(count);
-        ADVANCE_PTR(count);
-        break;
-      }
-
-      return 1 + count;
-    }
-    break;
-  }
-
-  return 0;
-}
-
-void RingBufferWorker::WriteRegister(
-    uint32_t packet_ptr, uint32_t index, uint32_t value) {
-  RegisterFile* regs = driver_->register_file();
-  XEASSERT(index < kXEGpuRegisterCount);
-  regs->values[index].u32 = value;
-
-  // Scratch register writeback.
-  if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
-    uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
-    if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) {
-      // Enabled - write to address.
-      uint8_t* p = memory_->membase();
-      uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32;
-      uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
-      XESETUINT32BE(p + GpuToCpu(primary_buffer_ptr_, mem_addr), value);
-    }
-  }
-}
diff --git a/src/xenia/gpu/ring_buffer_worker.h b/src/xenia/gpu/ring_buffer_worker.h
deleted file mode 100644
index 889625d68..000000000
--- a/src/xenia/gpu/ring_buffer_worker.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_RING_BUFFER_WORKER_H_
-#define XENIA_GPU_RING_BUFFER_WORKER_H_
-
-#include <xenia/core.h>
-
-#include <xenia/gpu/xenos/registers.h>
-
-
-namespace xe {
-namespace gpu {
-
-class GraphicsDriver;
-class GraphicsSystem;
-
-class RingBufferWorker {
-public:
-  RingBufferWorker(GraphicsSystem* graphics_system, Memory* memory);
-  virtual ~RingBufferWorker();
-
-  Memory* memory() const { return memory_; }
-
-  uint64_t QueryTime();
-  uint32_t counter() const { return counter_; }
-  void increment_counter() { counter_++; }
-
-  void Initialize(GraphicsDriver* driver,
-                  uint32_t ptr, uint32_t page_count);
-  void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
-
-  void UpdateWritePointer(uint32_t value);
-
-  void Pump();
-
-private:
-  typedef struct {
-    uint32_t ptr;
-    uint32_t base_ptr;
-    uint32_t max_address;
-    uint32_t ptr_mask;
-  } PacketArgs;
-  void AdvancePtr(PacketArgs& args, uint32_t n);
-  void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index);
-  void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length);
-  uint32_t ExecutePacket(PacketArgs& args);
-  void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value);
-
-protected:
-  Memory*           memory_;
-  GraphicsSystem*   graphics_system_;
-  GraphicsDriver*   driver_;
-
-  uint64_t          time_base_;
-  uint32_t          counter_;
-
-  uint32_t          primary_buffer_ptr_;
-  uint32_t          primary_buffer_size_;
-
-  uint32_t          read_ptr_index_;
-  uint32_t          read_ptr_update_freq_;
-  uint32_t          read_ptr_writeback_ptr_;
-
-  HANDLE            write_ptr_index_event_;
-  volatile uint32_t write_ptr_index_;
-  volatile uint32_t write_ptr_max_index_;
-};
-
-
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_RING_BUFFER_WORKER_H_
diff --git a/src/xenia/gpu/sampler_state_resource.cc b/src/xenia/gpu/sampler_state_resource.cc
new file mode 100644
index 000000000..5865a6920
--- /dev/null
+++ b/src/xenia/gpu/sampler_state_resource.cc
@@ -0,0 +1,32 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include <xenia/gpu/sampler_state_resource.h>
+
+
+using namespace std;
+using namespace xe;
+using namespace xe::gpu;
+using namespace xe::gpu::xenos;
+
+
+bool SamplerStateResource::Info::Prepare(
+    const xe_gpu_texture_fetch_t& fetch, const instr_fetch_tex_t& fetch_instr,
+    Info& out_info) {
+  out_info.min_filter = static_cast<instr_tex_filter_t>(
+      fetch_instr.min_filter == 3 ? fetch.min_filter : fetch_instr.min_filter);
+  out_info.mag_filter = static_cast<instr_tex_filter_t>(
+      fetch_instr.mag_filter == 3 ? fetch.mag_filter : fetch_instr.mag_filter);
+  out_info.mip_filter = static_cast<instr_tex_filter_t>(
+      fetch_instr.mip_filter == 3 ? fetch.mip_filter : fetch_instr.mip_filter);
+  out_info.clamp_u = fetch.clamp_x;
+  out_info.clamp_v = fetch.clamp_y;
+  out_info.clamp_w = fetch.clamp_z;
+  return true;
+}
diff --git a/src/xenia/gpu/sampler_state_resource.h b/src/xenia/gpu/sampler_state_resource.h
new file mode 100644
index 000000000..c0a3c4ab3
--- /dev/null
+++ b/src/xenia/gpu/sampler_state_resource.h
@@ -0,0 +1,67 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2014 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_SAMPLER_STATE_RESOURCE_H_
+#define XENIA_GPU_SAMPLER_STATE_RESOURCE_H_
+
+#include <xenia/gpu/resource.h>
+#include <xenia/gpu/xenos/ucode.h>
+#include <xenia/gpu/xenos/xenos.h>
+
+
+namespace xe {
+namespace gpu {
+
+
+class SamplerStateResource : public StaticResource {
+public:
+  struct Info {
+    xenos::instr_tex_filter_t min_filter;
+    xenos::instr_tex_filter_t mag_filter;
+    xenos::instr_tex_filter_t mip_filter;
+    uint32_t clamp_u;
+    uint32_t clamp_v;
+    uint32_t clamp_w;
+
+    uint64_t hash() const {
+      return hash_combine(0,
+                          min_filter, mag_filter, mip_filter,
+                          clamp_u, clamp_v, clamp_w);
+    }
+    bool Equals(const Info& other) const {
+      return min_filter == other.min_filter &&
+             mag_filter == other.mag_filter &&
+             mip_filter == other.mip_filter &&
+             clamp_u == other.clamp_u &&
+             clamp_v == other.clamp_v &&
+             clamp_w == other.clamp_w;
+    }
+
+    static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
+                        const xenos::instr_fetch_tex_t& fetch_instr,
+                        Info& out_info);
+  };
+
+  SamplerStateResource(const Info& info) : info_(info) {}
+  virtual ~SamplerStateResource() = default;
+
+  const Info& info() const { return info_; }
+
+  virtual int Prepare() = 0;
+
+protected:
+  Info info_;
+};
+
+
+}  // namespace gpu
+}  // namespace xe
+
+
+#endif  // XENIA_GPU_SAMPLER_STATE_RESOURCE_H_
diff --git a/src/xenia/gpu/shader.cc b/src/xenia/gpu/shader.cc
deleted file mode 100644
index 69b083a60..000000000
--- a/src/xenia/gpu/shader.cc
+++ /dev/null
@@ -1,266 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/shader.h>
-
-#include <xenia/gpu/xenos/ucode_disassembler.h>
-
-
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::xenos;
-
-
-Shader::Shader(
-    XE_GPU_SHADER_TYPE type,
-    const uint8_t* src_ptr, size_t length,
-    uint64_t hash) :
-    type_(type), hash_(hash), is_prepared_(false), disasm_src_(NULL) {
-  xe_zero_struct(&alloc_counts_, sizeof(alloc_counts_));
-  xe_zero_struct(&vtx_buffer_inputs_, sizeof(vtx_buffer_inputs_));
-  xe_zero_struct(&tex_buffer_inputs_, sizeof(tex_buffer_inputs_));
-
-  // Verify.
-  dword_count_ = length / 4;
-  XEASSERT(dword_count_ <= 512);
-
-  // Copy bytes and swap.
-  size_t byte_size = dword_count_ * sizeof(uint32_t);
-  dwords_ = (uint32_t*)xe_malloc(byte_size);
-  for (uint32_t n = 0; n < dword_count_; n++) {
-    dwords_[n] = XEGETUINT32BE(src_ptr + n * 4);
-  }
-
-  // Gather input/output registers/etc.
-  GatherIO();
-
-  // Disassemble, for debugging.
-  disasm_src_ = DisassembleShader(type_, dwords_, dword_count_);
-}
-
-Shader::~Shader() {
-  if (disasm_src_) {
-    xe_free(disasm_src_);
-  }
-  xe_free(dwords_);
-}
-
-void Shader::GatherIO() {
-  // Process all execution blocks.
-  instr_cf_t cfa;
-  instr_cf_t cfb;
-  for (int idx = 0; idx < dword_count_; idx += 3) {
-    uint32_t dword_0 = dwords_[idx + 0];
-    uint32_t dword_1 = dwords_[idx + 1];
-    uint32_t dword_2 = dwords_[idx + 2];
-    cfa.dword_0 = dword_0;
-    cfa.dword_1 = dword_1 & 0xFFFF;
-    cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16);
-    cfb.dword_1 = dword_2 >> 16;
-    if (cfa.opc == ALLOC) {
-      GatherAlloc(&cfa.alloc);
-    } else if (cfa.is_exec()) {
-      GatherExec(&cfa.exec);
-    }
-    if (cfb.opc == ALLOC) {
-      GatherAlloc(&cfb.alloc);
-    } else if (cfb.is_exec()) {
-      GatherExec(&cfb.exec);
-    }
-    if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) {
-      break;
-    }
-  }
-}
-
-void Shader::GatherAlloc(const instr_cf_alloc_t* cf) {
-  allocs_.push_back(*cf);
-
-  switch (cf->buffer_select) {
-  case SQ_POSITION:
-    // Position (SV_POSITION).
-    alloc_counts_.positions += cf->size + 1;
-    break;
-  case SQ_PARAMETER_PIXEL:
-    // Output to PS (if VS), or frag output (if PS).
-    alloc_counts_.params += cf->size + 1;
-    break;
-  case SQ_MEMORY:
-    // MEMEXPORT?
-    alloc_counts_.memories += cf->size + 1;
-    break;
-  }
-}
-
-void Shader::GatherExec(const instr_cf_exec_t* cf) {
-  execs_.push_back(*cf);
-
-  uint32_t sequence = cf->serialize;
-  for (uint32_t i = 0; i < cf->count; i++) {
-    uint32_t alu_off = (cf->address + i);
-    int sync = sequence & 0x2;
-    if (sequence & 0x1) {
-      const instr_fetch_t* fetch =
-          (const instr_fetch_t*)(dwords_ + alu_off * 3);
-      switch (fetch->opc) {
-      case VTX_FETCH:
-        GatherVertexFetch(&fetch->vtx);
-        break;
-      case TEX_FETCH:
-        GatherTextureFetch(&fetch->tex);
-        break;
-      case TEX_GET_BORDER_COLOR_FRAC:
-      case TEX_GET_COMP_TEX_LOD:
-      case TEX_GET_GRADIENTS:
-      case TEX_GET_WEIGHTS:
-      case TEX_SET_TEX_LOD:
-      case TEX_SET_GRADIENTS_H:
-      case TEX_SET_GRADIENTS_V:
-      default:
-        XEASSERTALWAYS();
-        break;
-      }
-    } else {
-      // TODO(benvanik): gather registers used, predicate bits used, etc.
-      const instr_alu_t* alu =
-          (const instr_alu_t*)(dwords_ + alu_off * 3);
-      if (alu->vector_write_mask) {
-        if (alu->export_data && alu->vector_dest == 63) {
-          alloc_counts_.point_size = true;
-        }
-      }
-      if (alu->scalar_write_mask || !alu->vector_write_mask) {
-        if (alu->export_data && alu->scalar_dest == 63) {
-          alloc_counts_.point_size = true;
-        }
-      }
-    }
-    sequence >>= 2;
-  }
-}
-
-void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
-  // dst_reg/dst_swiz
-  // src_reg/src_swiz
-  // format = a2xx_sq_surfaceformat
-  // format_comp_all ? signed : unsigned
-  // num_format_all ? normalized
-  // stride
-  // offset
-  // const_index/const_index_sel -- fetch constant register
-  // num_format_all ? integer : fraction
-  // exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default
-
-  // Sometimes games have fetches that just produce constants. We can
-  // ignore those.
-  uint32_t dst_swiz = vtx->dst_swiz;
-  bool fetches_any_data = false;
-  for (int i = 0; i < 4; i++) {
-    if ((dst_swiz & 0x7) == 4) {
-      // 0.0
-    } else if ((dst_swiz & 0x7) == 5) {
-      // 1.0
-    } else if ((dst_swiz & 0x7) == 6) {
-      // ?
-    } else if ((dst_swiz & 0x7) == 7) {
-      // Previous register value.
-    } else {
-      fetches_any_data = true;
-      break;
-    }
-    dst_swiz >>= 3;
-  }
-  if (!fetches_any_data) {
-    return;
-  }
-
-  uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
-  auto& inputs = vtx_buffer_inputs_;
-  vtx_buffer_element_t* el = NULL;
-  for (size_t n = 0; n < inputs.count; n++) {
-    auto& input = inputs.descs[n];
-    if (input.fetch_slot == fetch_slot) {
-      XEASSERT(input.element_count + 1 < XECOUNT(input.elements));
-      // It may not hold that all strides are equal, but I hope it does.
-      XEASSERT(!vtx->stride || input.stride_words == vtx->stride);
-      el = &input.elements[input.element_count++];
-      break;
-    }
-  }
-  if (!el) {
-    XEASSERTNOTZERO(vtx->stride);
-    XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs));
-    auto& input = inputs.descs[inputs.count++];
-    input.input_index = inputs.count - 1;
-    input.fetch_slot = fetch_slot;
-    input.stride_words = vtx->stride;
-    el = &input.elements[input.element_count++];
-  }
-
-  el->vtx_fetch = *vtx;
-  el->format = vtx->format;
-  el->offset_words = vtx->offset;
-  el->size_words = 0;
-  switch (el->format) {
-  case FMT_8_8_8_8:
-  case FMT_2_10_10_10:
-  case FMT_10_11_11:
-  case FMT_11_11_10:
-    el->size_words = 1;
-    break;
-  case FMT_16_16:
-  case FMT_16_16_FLOAT:
-    el->size_words = 1;
-    break;
-  case FMT_16_16_16_16:
-  case FMT_16_16_16_16_FLOAT:
-    el->size_words = 2;
-    break;
-  case FMT_32:
-  case FMT_32_FLOAT:
-    el->size_words = 1;
-    break;
-  case FMT_32_32:
-  case FMT_32_32_FLOAT:
-    el->size_words = 2;
-    break;
-  case FMT_32_32_32_FLOAT:
-    el->size_words = 3;
-    break;
-  case FMT_32_32_32_32:
-  case FMT_32_32_32_32_FLOAT:
-    el->size_words = 4;
-    break;
-  default:
-    XELOGE("Unknown vertex format: %d", el->format);
-    XEASSERTALWAYS();
-    break;
-  }
-}
-
-const Shader::vtx_buffer_inputs_t* Shader::GetVertexBufferInputs() {
-  return &vtx_buffer_inputs_;
-}
-
-void Shader::GatherTextureFetch(const xenos::instr_fetch_tex_t* tex) {
-  // TODO(benvanik): check dest_swiz to see if we are writing anything.
-
-  auto& inputs = tex_buffer_inputs_;
-  XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs));
-  auto& input = inputs.descs[inputs.count++];
-  input.input_index = inputs.count - 1;
-  input.fetch_slot = tex->const_idx & 0xF; // ?
-  input.tex_fetch = *tex;
-
-  // Format mangling, size estimation, etc.
-}
-
-const Shader::tex_buffer_inputs_t* Shader::GetTextureBufferInputs() {
-  return &tex_buffer_inputs_;
-}
diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h
deleted file mode 100644
index 1dd26b2b4..000000000
--- a/src/xenia/gpu/shader.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_SHADER_H_
-#define XENIA_GPU_SHADER_H_
-
-#include <xenia/core.h>
-#include <xenia/gpu/xenos/ucode.h>
-#include <xenia/gpu/xenos/xenos.h>
-
-
-namespace xe {
-namespace gpu {
-
-
-class Shader {
-public:
-  Shader(xenos::XE_GPU_SHADER_TYPE type,
-         const uint8_t* src_ptr, size_t length,
-         uint64_t hash);
-  virtual ~Shader();
-
-  xenos::XE_GPU_SHADER_TYPE type() const { return type_; }
-  const uint32_t* dwords() const { return dwords_; }
-  size_t dword_count() const { return dword_count_; }
-  uint64_t hash() const { return hash_; }
-  bool is_prepared() const { return is_prepared_; }
-
-  const char* disasm_src() const { return disasm_src_; }
-
-  typedef struct {
-    xenos::instr_fetch_vtx_t vtx_fetch;
-    uint32_t format;
-    uint32_t offset_words;
-    uint32_t size_words;
-  } vtx_buffer_element_t;
-  typedef struct {
-    uint32_t input_index;
-    uint32_t fetch_slot;
-    uint32_t stride_words;
-    uint32_t element_count;
-    vtx_buffer_element_t elements[16];
-  } vtx_buffer_desc_t;
-  typedef struct {
-    uint32_t count;
-    vtx_buffer_desc_t descs[16];
-  } vtx_buffer_inputs_t;
-  const vtx_buffer_inputs_t* GetVertexBufferInputs();
-
-  typedef struct {
-    uint32_t input_index;
-    uint32_t fetch_slot;
-    xenos::instr_fetch_tex_t tex_fetch;
-    uint32_t format;
-  } tex_buffer_desc_t;
-  typedef struct {
-    uint32_t count;
-    tex_buffer_desc_t descs[32];
-  } tex_buffer_inputs_t;
-  const tex_buffer_inputs_t* GetTextureBufferInputs();
-
-  typedef struct {
-    uint32_t  positions;
-    uint32_t  params;
-    uint32_t  memories;
-    bool      point_size;
-  } alloc_counts_t;
-  const alloc_counts_t& alloc_counts() const { return alloc_counts_; }
-
-private:
-  void GatherIO();
-  void GatherAlloc(const xenos::instr_cf_alloc_t* cf);
-  void GatherExec(const xenos::instr_cf_exec_t* cf);
-  void GatherVertexFetch(const xenos::instr_fetch_vtx_t* vtx);
-  void GatherTextureFetch(const xenos::instr_fetch_tex_t* tex);
-
-protected:
-  xenos::XE_GPU_SHADER_TYPE type_;
-  uint32_t*   dwords_;
-  size_t      dword_count_;
-  uint64_t    hash_;
-  bool        is_prepared_;
-
-  char*       disasm_src_;
-
-  alloc_counts_t alloc_counts_;
-  std::vector<xenos::instr_cf_exec_t>   execs_;
-  std::vector<xenos::instr_cf_alloc_t>  allocs_;
-  vtx_buffer_inputs_t vtx_buffer_inputs_;
-  tex_buffer_inputs_t tex_buffer_inputs_;
-};
-
-
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_SHADER_H_
diff --git a/src/xenia/gpu/shader_cache.cc b/src/xenia/gpu/shader_cache.cc
deleted file mode 100644
index 2c5e84294..000000000
--- a/src/xenia/gpu/shader_cache.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/shader_cache.h>
-
-#include <xenia/gpu/shader.h>
-
-
-using namespace std;
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::xenos;
-
-
-ShaderCache::ShaderCache() {
-}
-
-ShaderCache::~ShaderCache() {
-  Clear();
-}
-
-Shader* ShaderCache::Create(
-    XE_GPU_SHADER_TYPE type,
-    const uint8_t* src_ptr, size_t length) {
-  uint64_t hash = Hash(src_ptr, length);
-  Shader* shader = CreateCore(type, src_ptr, length, hash);
-  map_.insert({ hash, shader });
-  return shader;
-}
-
-Shader* ShaderCache::CreateCore(
-    XE_GPU_SHADER_TYPE type,
-    const uint8_t* src_ptr, size_t length,
-    uint64_t hash) {
-  return new Shader(type, src_ptr, length, hash);
-}
-
-Shader* ShaderCache::Find(
-    XE_GPU_SHADER_TYPE type,
-    const uint8_t* src_ptr, size_t length) {
-  uint64_t hash = Hash(src_ptr, length);
-  auto it = map_.find(hash);
-  if (it != map_.end()) {
-    return it->second;
-  }
-  return NULL;
-}
-
-Shader* ShaderCache::FindOrCreate(
-    XE_GPU_SHADER_TYPE type,
-    const uint8_t* src_ptr, size_t length) {
-  SCOPE_profile_cpu_f("gpu");
-
-  uint64_t hash = Hash(src_ptr, length);
-  auto it = map_.find(hash);
-  if (it != map_.end()) {
-    return it->second;
-  }
-  Shader* shader = CreateCore(type, src_ptr, length, hash);
-  map_.insert({ hash, shader });
-  return shader;
-}
-
-void ShaderCache::Clear() {
-  for (auto it = map_.begin(); it != map_.end(); ++it) {
-    Shader* shader = it->second;
-    delete shader;
-  }
-  map_.clear();
-}
-
-uint64_t ShaderCache::Hash(const uint8_t* src_ptr, size_t length) {
-  return xe_hash64(src_ptr, length, 0);
-}
diff --git a/src/xenia/gpu/shader_cache.h b/src/xenia/gpu/shader_cache.h
deleted file mode 100644
index 97edc382f..000000000
--- a/src/xenia/gpu/shader_cache.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_SHADER_CACHE_H_
-#define XENIA_GPU_SHADER_CACHE_H_
-
-#include <xenia/core.h>
-#include <xenia/gpu/shader.h>
-#include <xenia/gpu/xenos/xenos.h>
-
-
-namespace xe {
-namespace gpu {
-
-
-class ShaderCache {
-public:
-  ShaderCache();
-  virtual ~ShaderCache();
-
-  Shader* Create(
-      xenos::XE_GPU_SHADER_TYPE type,
-      const uint8_t* src_ptr, size_t length);
-  Shader* Find(
-      xenos::XE_GPU_SHADER_TYPE type,
-      const uint8_t* src_ptr, size_t length);
-  Shader* FindOrCreate(
-      xenos::XE_GPU_SHADER_TYPE type,
-      const uint8_t* src_ptr, size_t length);
-
-  void Clear();
-
-private:
-  uint64_t Hash(const uint8_t* src_ptr, size_t length);
-
-  std::unordered_map<uint64_t, Shader*> map_;
-
-protected:
-  virtual Shader* CreateCore(
-      xenos::XE_GPU_SHADER_TYPE type,
-      const uint8_t* src_ptr, size_t length,
-      uint64_t hash);
-};
-
-
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_SHADER_CACHE_H_
diff --git a/src/xenia/gpu/shader_resource.cc b/src/xenia/gpu/shader_resource.cc
index e2520db62..9fbcbf2bb 100644
--- a/src/xenia/gpu/shader_resource.cc
+++ b/src/xenia/gpu/shader_resource.cc
@@ -9,9 +9,267 @@
 
 #include <xenia/gpu/shader_resource.h>
 
+#include <xenia/gpu/xenos/ucode_disassembler.h>
+
 
 using namespace std;
 using namespace xe;
 using namespace xe::gpu;
 using namespace xe::gpu::xenos;
 
+
+ShaderResource::ShaderResource(const MemoryRange& memory_range,
+                               const Info& info,
+                               xenos::XE_GPU_SHADER_TYPE type)
+    : HashedResource(memory_range),
+      info_(info), type_(type), is_prepared_(false), disasm_src_(nullptr) {
+  xe_zero_struct(&alloc_counts_, sizeof(alloc_counts_));
+  xe_zero_struct(&buffer_inputs_, sizeof(buffer_inputs_));
+  xe_zero_struct(&sampler_inputs_, sizeof(sampler_inputs_));
+
+  // Verify.
+  dword_count_ = memory_range.length / 4;
+  XEASSERT(dword_count_ <= 512);
+
+  // Copy bytes and swap.
+  size_t byte_size = dword_count_ * sizeof(uint32_t);
+  dwords_ = (uint32_t*)xe_malloc(byte_size);
+  for (uint32_t n = 0; n < dword_count_; n++) {
+    dwords_[n] = XEGETUINT32BE(memory_range.host_base + n * 4);
+  }
+
+  // Disassemble, for debugging.
+  disasm_src_ = DisassembleShader(type_, dwords_, dword_count_);
+
+  // Gather input/output registers/etc.
+  GatherIO();
+}
+
+ShaderResource::~ShaderResource() {
+  xe_free(disasm_src_);
+  xe_free(dwords_);
+}
+
+void ShaderResource::GatherIO() {
+  // Process all execution blocks.
+  instr_cf_t cfa;
+  instr_cf_t cfb;
+  for (int idx = 0; idx < dword_count_; idx += 3) {
+    uint32_t dword_0 = dwords_[idx + 0];
+    uint32_t dword_1 = dwords_[idx + 1];
+    uint32_t dword_2 = dwords_[idx + 2];
+    cfa.dword_0 = dword_0;
+    cfa.dword_1 = dword_1 & 0xFFFF;
+    cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16);
+    cfb.dword_1 = dword_2 >> 16;
+    if (cfa.opc == ALLOC) {
+      GatherAlloc(&cfa.alloc);
+    } else if (cfa.is_exec()) {
+      GatherExec(&cfa.exec);
+    }
+    if (cfb.opc == ALLOC) {
+      GatherAlloc(&cfb.alloc);
+    } else if (cfb.is_exec()) {
+      GatherExec(&cfb.exec);
+    }
+    if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) {
+      break;
+    }
+  }
+}
+
+void ShaderResource::GatherAlloc(const instr_cf_alloc_t* cf) {
+  allocs_.push_back(*cf);
+
+  switch (cf->buffer_select) {
+  case SQ_POSITION:
+    // Position (SV_POSITION).
+    alloc_counts_.positions += cf->size + 1;
+    break;
+  case SQ_PARAMETER_PIXEL:
+    // Output to PS (if VS), or frag output (if PS).
+    alloc_counts_.params += cf->size + 1;
+    break;
+  case SQ_MEMORY:
+    // MEMEXPORT?
+    alloc_counts_.memories += cf->size + 1;
+    break;
+  }
+}
+
+void ShaderResource::GatherExec(const instr_cf_exec_t* cf) {
+  execs_.push_back(*cf);
+
+  uint32_t sequence = cf->serialize;
+  for (uint32_t i = 0; i < cf->count; i++) {
+    uint32_t alu_off = (cf->address + i);
+    int sync = sequence & 0x2;
+    if (sequence & 0x1) {
+      const instr_fetch_t* fetch =
+          (const instr_fetch_t*)(dwords_ + alu_off * 3);
+      switch (fetch->opc) {
+      case VTX_FETCH:
+        GatherVertexFetch(&fetch->vtx);
+        break;
+      case TEX_FETCH:
+        GatherTextureFetch(&fetch->tex);
+        break;
+      case TEX_GET_BORDER_COLOR_FRAC:
+      case TEX_GET_COMP_TEX_LOD:
+      case TEX_GET_GRADIENTS:
+      case TEX_GET_WEIGHTS:
+      case TEX_SET_TEX_LOD:
+      case TEX_SET_GRADIENTS_H:
+      case TEX_SET_GRADIENTS_V:
+      default:
+        XEASSERTALWAYS();
+        break;
+      }
+    } else {
+      // TODO(benvanik): gather registers used, predicate bits used, etc.
+      const instr_alu_t* alu =
+          (const instr_alu_t*)(dwords_ + alu_off * 3);
+      if (alu->vector_write_mask) {
+        if (alu->export_data && alu->vector_dest == 63) {
+          alloc_counts_.point_size = true;
+        }
+      }
+      if (alu->scalar_write_mask || !alu->vector_write_mask) {
+        if (alu->export_data && alu->scalar_dest == 63) {
+          alloc_counts_.point_size = true;
+        }
+      }
+    }
+    sequence >>= 2;
+  }
+}
+
+void ShaderResource::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
+  XEASSERT(type_ == XE_GPU_SHADER_TYPE_VERTEX);
+
+  // dst_reg/dst_swiz
+  // src_reg/src_swiz
+  // format = a2xx_sq_surfaceformat
+  // format_comp_all ? signed : unsigned
+  // num_format_all ? normalized
+  // stride
+  // offset
+  // const_index/const_index_sel -- fetch constant register
+  // num_format_all ? integer : fraction
+  // exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default
+
+  // Sometimes games have fetches that just produce constants. We can
+  // ignore those.
+  uint32_t dst_swiz = vtx->dst_swiz;
+  bool fetches_any_data = false;
+  for (int i = 0; i < 4; i++) {
+    if ((dst_swiz & 0x7) == 4) {
+      // 0.0
+    } else if ((dst_swiz & 0x7) == 5) {
+      // 1.0
+    } else if ((dst_swiz & 0x7) == 6) {
+      // ?
+    } else if ((dst_swiz & 0x7) == 7) {
+      // Previous register value.
+    } else {
+      fetches_any_data = true;
+      break;
+    }
+    dst_swiz >>= 3;
+  }
+  if (!fetches_any_data) {
+    return;
+  }
+
+  uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
+  auto& inputs = buffer_inputs_;
+  VertexBufferResource::DeclElement* el = nullptr;
+  for (size_t n = 0; n < inputs.count; n++) {
+    auto& desc = inputs.descs[n];
+    auto& info = desc.info;
+    if (desc.fetch_slot == fetch_slot) {
+      XEASSERT(info.element_count + 1 < XECOUNT(info.elements));
+      // It may not hold that all strides are equal, but I hope it does.
+      XEASSERT(!vtx->stride || info.stride_words == vtx->stride);
+      el = &info.elements[info.element_count++];
+      break;
+    }
+  }
+  if (!el) {
+    XEASSERTNOTZERO(vtx->stride);
+    XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs));
+    auto& desc = inputs.descs[inputs.count++];
+    desc.input_index = inputs.count - 1;
+    desc.fetch_slot = fetch_slot;
+    desc.info.stride_words = vtx->stride;
+    el = &desc.info.elements[desc.info.element_count++];
+  }
+
+  el->vtx_fetch = *vtx;
+  el->format = vtx->format;
+  el->is_normalized = vtx->num_format_all == 0;
+  el->is_signed = vtx->format_comp_all == 1;
+  el->offset_words = vtx->offset;
+  el->size_words = 0;
+  switch (el->format) {
+  case FMT_8_8_8_8:
+  case FMT_2_10_10_10:
+  case FMT_10_11_11:
+  case FMT_11_11_10:
+    el->size_words = 1;
+    break;
+  case FMT_16_16:
+  case FMT_16_16_FLOAT:
+    el->size_words = 1;
+    break;
+  case FMT_16_16_16_16:
+  case FMT_16_16_16_16_FLOAT:
+    el->size_words = 2;
+    break;
+  case FMT_32:
+  case FMT_32_FLOAT:
+    el->size_words = 1;
+    break;
+  case FMT_32_32:
+  case FMT_32_32_FLOAT:
+    el->size_words = 2;
+    break;
+  case FMT_32_32_32_FLOAT:
+    el->size_words = 3;
+    break;
+  case FMT_32_32_32_32:
+  case FMT_32_32_32_32_FLOAT:
+    el->size_words = 4;
+    break;
+  default:
+    XELOGE("Unknown vertex format: %d", el->format);
+    XEASSERTALWAYS();
+    break;
+  }
+}
+
+void ShaderResource::GatherTextureFetch(const xenos::instr_fetch_tex_t* tex) {
+  // TODO(benvanik): check dest_swiz to see if we are writing anything.
+
+  XEASSERT(sampler_inputs_.count + 1 < XECOUNT(sampler_inputs_.descs));
+  auto& input = sampler_inputs_.descs[sampler_inputs_.count++];
+  input.input_index = sampler_inputs_.count - 1;
+  input.fetch_slot = tex->const_idx & 0xF; // ?
+  input.tex_fetch = *tex;
+
+  // Format mangling, size estimation, etc.
+}
+
+VertexShaderResource::VertexShaderResource(
+    const MemoryRange& memory_range, const Info& info)
+    : ShaderResource(memory_range, info, XE_GPU_SHADER_TYPE_VERTEX) {
+}
+
+VertexShaderResource::~VertexShaderResource() = default;
+
+PixelShaderResource::PixelShaderResource(
+    const MemoryRange& memory_range, const Info& info)
+    : ShaderResource(memory_range, info, XE_GPU_SHADER_TYPE_PIXEL) {
+}
+
+PixelShaderResource::~PixelShaderResource() = default;
diff --git a/src/xenia/gpu/shader_resource.h b/src/xenia/gpu/shader_resource.h
index 24b787ec4..b591bfaf2 100644
--- a/src/xenia/gpu/shader_resource.h
+++ b/src/xenia/gpu/shader_resource.h
@@ -10,7 +10,9 @@
 #ifndef XENIA_GPU_SHADER_RESOURCE_H_
 #define XENIA_GPU_SHADER_RESOURCE_H_
 
-#include <xenia/core.h>
+#include <xenia/gpu/buffer_resource.h>
+#include <xenia/gpu/resource.h>
+#include <xenia/gpu/xenos/ucode.h>
 #include <xenia/gpu/xenos/xenos.h>
 
 
@@ -18,8 +20,104 @@ namespace xe {
 namespace gpu {
 
 
-class ShaderResource : public Resource {
+class ShaderResource : public HashedResource {
 public:
+  struct Info {
+    // type, etc?
+  };
+
+  ~ShaderResource() override;
+
+  const Info& info() const { return info_; }
+  xenos::XE_GPU_SHADER_TYPE type() const { return type_; }
+  const uint32_t* dwords() const { return dwords_; }
+  const size_t dword_count() const { return dword_count_; }
+
+  bool is_prepared() const { return is_prepared_; }
+  const char* disasm_src() const { return disasm_src_; }
+
+  struct BufferDesc {
+    uint32_t input_index;
+    uint32_t fetch_slot;
+    VertexBufferResource::Info info;
+    // xenos::instr_fetch_vtx_t vtx_fetch; for each el
+  };
+  struct BufferInputs {
+    uint32_t count;
+    BufferDesc descs[32];
+  };
+  const BufferInputs& buffer_inputs() { return buffer_inputs_; }
+
+  struct SamplerDesc {
+    uint32_t input_index;
+    uint32_t fetch_slot;
+    uint32_t format;
+    xenos::instr_fetch_tex_t tex_fetch;
+  };
+  struct SamplerInputs {
+    uint32_t count;
+    SamplerDesc descs[32];
+  };
+  const SamplerInputs& sampler_inputs() { return sampler_inputs_; }
+
+  struct AllocCounts {
+    uint32_t  positions;
+    uint32_t  params;
+    uint32_t  memories;
+    bool      point_size;
+  };
+  const AllocCounts& alloc_counts() const { return alloc_counts_; }
+  const std::vector<xenos::instr_cf_exec_t>& execs() const { return execs_; }
+  const std::vector<xenos::instr_cf_alloc_t>& allocs() const { return allocs_; }
+
+private:
+  void GatherIO();
+  void GatherAlloc(const xenos::instr_cf_alloc_t* cf);
+  void GatherExec(const xenos::instr_cf_exec_t* cf);
+  void GatherVertexFetch(const xenos::instr_fetch_vtx_t* vtx);
+  void GatherTextureFetch(const xenos::instr_fetch_tex_t* tex);
+
+protected:
+  ShaderResource(const MemoryRange& memory_range,
+                 const Info& info,
+                 xenos::XE_GPU_SHADER_TYPE type);
+
+  Info info_;
+  xenos::XE_GPU_SHADER_TYPE type_;
+  size_t dword_count_;
+  uint32_t* dwords_;
+  char* disasm_src_;
+
+  AllocCounts alloc_counts_;
+  std::vector<xenos::instr_cf_exec_t> execs_;
+  std::vector<xenos::instr_cf_alloc_t> allocs_;
+  BufferInputs buffer_inputs_;
+  SamplerInputs sampler_inputs_;
+
+  bool is_prepared_;
+};
+
+
+class VertexShaderResource : public ShaderResource {
+public:
+  VertexShaderResource(const MemoryRange& memory_range,
+                       const Info& info);
+  ~VertexShaderResource() override;
+
+  // buffer_inputs() matching VertexBufferResource::Info
+
+  virtual int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl) = 0;
+};
+
+
+class PixelShaderResource : public ShaderResource {
+public:
+  PixelShaderResource(const MemoryRange& memory_range,
+                      const Info& info);
+  ~PixelShaderResource() override;
+
+  virtual int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl,
+                      VertexShaderResource* vertex_shader) = 0;
 };
 
 
diff --git a/src/xenia/gpu/sources.gypi b/src/xenia/gpu/sources.gypi
index 3d4462fd1..b01f7a33b 100644
--- a/src/xenia/gpu/sources.gypi
+++ b/src/xenia/gpu/sources.gypi
@@ -5,6 +5,8 @@
     'buffer_resource.h',
     'command_processor.cc',
     'command_processor.h',
+    'draw_command.cc',
+    'draw_command.h',
     'gpu-private.h',
     'gpu.cc',
     'gpu.h',
@@ -18,6 +20,8 @@
     'resource.h',
     'resource_cache.cc',
     'resource_cache.h',
+    'sampler_state_resource.cc',
+    'sampler_state_resource.h',
     'shader_resource.cc',
     'shader_resource.h',
     'texture_resource.cc',
diff --git a/src/xenia/gpu/texture.cc b/src/xenia/gpu/texture.cc
deleted file mode 100644
index d624d82ce..000000000
--- a/src/xenia/gpu/texture.cc
+++ /dev/null
@@ -1,369 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/texture.h>
-
-#include <xenia/gpu/xenos/ucode.h>
-#include <xenia/gpu/xenos/xenos.h>
-
-// TODO(benvanik): replace DXGI constants with xenia constants.
-#include <d3d11.h>
-
-
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::xenos;
-
-
-Texture::Texture(uint32_t address, const uint8_t* host_address)
-    : address_(address), host_address_(host_address) {
-}
-
-Texture::~Texture() {
-  for (auto it = views_.begin(); it != views_.end(); ++it) {
-    auto view = *it;
-    delete view;
-  }
-  views_.clear();
-}
-
-TextureView* Texture::Fetch(
-    const xenos::xe_gpu_texture_fetch_t& fetch) {
-  // TODO(benvanik): compute length for hash check.
-  size_t length = 0;
-  switch (fetch.dimension) {
-  case DIMENSION_1D:
-    break;
-  case DIMENSION_2D:
-    break;
-  case DIMENSION_3D:
-    break;
-  case DIMENSION_CUBE:
-    break;
-  }
-  uint64_t hash = xe_hash64(host_address_, length);
-
-  for (auto it = views_.begin(); it != views_.end(); ++it) {
-    auto view = *it;
-    if (memcmp(&view->fetch, &fetch, sizeof(fetch))) {
-      continue;
-    }
-    bool dirty = hash != view->hash;
-    if (dirty) {
-      return FetchDirty(view, fetch) ? view : nullptr;
-    } else {
-      return view;
-    }
-  }
-
-  auto new_view = FetchNew(fetch);
-  if (!new_view) {
-    return nullptr;
-  }
-  new_view->hash = hash;
-  views_.push_back(new_view);
-  return new_view;
-}
-
-bool Texture::FillViewInfo(TextureView* view,
-                           const xenos::xe_gpu_texture_fetch_t& fetch) {
-  // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx
-  // a2xx_sq_surfaceformat
-
-  view->texture = this;
-  view->fetch = fetch;
-
-  view->dimensions = fetch.dimension;
-  switch (fetch.dimension) {
-  case DIMENSION_1D:
-    view->width = fetch.size_1d.width;
-    break;
-  case DIMENSION_2D:
-    view->width = fetch.size_2d.width;
-    view->height = fetch.size_2d.height;
-    break;
-  case DIMENSION_3D:
-    view->width = fetch.size_3d.width;
-    view->height = fetch.size_3d.height;
-    view->depth = fetch.size_3d.depth;
-    break;
-  case DIMENSION_CUBE:
-    view->width = fetch.size_stack.width;
-    view->height = fetch.size_stack.height;
-    view->depth = fetch.size_stack.depth;
-    break;
-  }
-  view->format = DXGI_FORMAT_UNKNOWN;
-  view->block_size = 0;
-  view->texel_pitch = 0;
-  view->is_compressed = false;
-  switch (fetch.format) {
-  case FMT_8:
-    switch (fetch.swizzle) {
-    case XE_GPU_SWIZZLE_RRR1:
-      view->format = DXGI_FORMAT_R8_UNORM;
-      break;
-    case XE_GPU_SWIZZLE_000R:
-      view->format = DXGI_FORMAT_A8_UNORM;
-      break;
-    default:
-      XELOGW("D3D11: unhandled swizzle for FMT_8");
-      view->format = DXGI_FORMAT_A8_UNORM;
-      break;
-    }
-    view->block_size = 1;
-    view->texel_pitch = 1;
-    break;
-  case FMT_1_5_5_5:
-    switch (fetch.swizzle) {
-    case XE_GPU_SWIZZLE_BGRA:
-      view->format = DXGI_FORMAT_B5G5R5A1_UNORM;
-      break;
-    default:
-      XELOGW("D3D11: unhandled swizzle for FMT_1_5_5_5");
-      view->format = DXGI_FORMAT_B5G5R5A1_UNORM;
-      break;
-    }
-    view->block_size = 1;
-    view->texel_pitch = 2;
-    break;
-  case FMT_8_8_8_8:
-    switch (fetch.swizzle) {
-    case XE_GPU_SWIZZLE_RGBA:
-      view->format = DXGI_FORMAT_R8G8B8A8_UNORM;
-      break;
-    case XE_GPU_SWIZZLE_BGRA:
-      view->format = DXGI_FORMAT_B8G8R8A8_UNORM;
-      break;
-    case XE_GPU_SWIZZLE_RGB1:
-      view->format = DXGI_FORMAT_R8G8B8A8_UNORM; // ?
-      break;
-    case XE_GPU_SWIZZLE_BGR1:
-      view->format = DXGI_FORMAT_B8G8R8X8_UNORM;
-      break;
-    default:
-      XELOGW("D3D11: unhandled swizzle for FMT_8_8_8_8");
-      view->format = DXGI_FORMAT_R8G8B8A8_UNORM;
-      break;
-    }
-    view->block_size = 1;
-    view->texel_pitch = 4;
-    break;
-  case FMT_4_4_4_4:
-    switch (fetch.swizzle) {
-    case XE_GPU_SWIZZLE_BGRA:
-      view->format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+
-      break;
-    default:
-      XELOGW("D3D11: unhandled swizzle for FMT_4_4_4_4");
-      view->format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+
-      break;
-    }
-    view->block_size = 1;
-    view->texel_pitch = 2;
-    break;
-  case FMT_16_16_16_16_FLOAT:
-    switch (fetch.swizzle) {
-    case XE_GPU_SWIZZLE_RGBA:
-      view->format = DXGI_FORMAT_R16G16B16A16_FLOAT;
-      break;
-    default:
-      XELOGW("D3D11: unhandled swizzle for FMT_16_16_16_16_FLOAT");
-      view->format = DXGI_FORMAT_R16G16B16A16_FLOAT;
-      break;
-    }
-    view->block_size = 1;
-    view->texel_pitch = 8;
-    break;
-  case FMT_32_FLOAT:
-    switch (fetch.swizzle) {
-    case XE_GPU_SWIZZLE_R111:
-      view->format = DXGI_FORMAT_R32_FLOAT;
-      break;
-    default:
-      XELOGW("D3D11: unhandled swizzle for FMT_32_FLOAT");
-      view->format = DXGI_FORMAT_R32_FLOAT;
-      break;
-    }
-    view->block_size = 1;
-    view->texel_pitch = 4;
-    break;
-  case FMT_DXT1:
-    view->format = DXGI_FORMAT_BC1_UNORM;
-    view->block_size = 4;
-    view->texel_pitch = 8;
-    view->is_compressed = true;
-    break;
-  case FMT_DXT2_3:
-  case FMT_DXT4_5:
-    view->format = (fetch.format == FMT_DXT4_5 ? DXGI_FORMAT_BC3_UNORM : DXGI_FORMAT_BC2_UNORM);
-    view->block_size = 4;
-    view->texel_pitch = 16;
-    view->is_compressed = true;
-    break;
-  case FMT_1_REVERSE:
-  case FMT_1:
-  case FMT_5_6_5:
-  case FMT_6_5_5:
-  case FMT_2_10_10_10:
-  case FMT_8_A:
-  case FMT_8_B:
-  case FMT_8_8:
-  case FMT_Cr_Y1_Cb_Y0:
-  case FMT_Y1_Cr_Y0_Cb:
-  case FMT_5_5_5_1:
-  case FMT_8_8_8_8_A:
-  case FMT_10_11_11:
-  case FMT_11_11_10:
-  case FMT_24_8:
-  case FMT_24_8_FLOAT:
-  case FMT_16:
-  case FMT_16_16:
-  case FMT_16_16_16_16:
-  case FMT_16_EXPAND:
-  case FMT_16_16_EXPAND:
-  case FMT_16_16_16_16_EXPAND:
-  case FMT_16_FLOAT:
-  case FMT_16_16_FLOAT:
-  case FMT_32:
-  case FMT_32_32:
-  case FMT_32_32_32_32:
-  case FMT_32_32_FLOAT:
-  case FMT_32_32_32_32_FLOAT:
-  case FMT_32_AS_8:
-  case FMT_32_AS_8_8:
-  case FMT_16_MPEG:
-  case FMT_16_16_MPEG:
-  case FMT_8_INTERLACED:
-  case FMT_32_AS_8_INTERLACED:
-  case FMT_32_AS_8_8_INTERLACED:
-  case FMT_16_INTERLACED:
-  case FMT_16_MPEG_INTERLACED:
-  case FMT_16_16_MPEG_INTERLACED:
-  case FMT_DXN:
-  case FMT_8_8_8_8_AS_16_16_16_16:
-  case FMT_DXT1_AS_16_16_16_16:
-  case FMT_DXT2_3_AS_16_16_16_16:
-  case FMT_DXT4_5_AS_16_16_16_16:
-  case FMT_2_10_10_10_AS_16_16_16_16:
-  case FMT_10_11_11_AS_16_16_16_16:
-  case FMT_11_11_10_AS_16_16_16_16:
-  case FMT_32_32_32_FLOAT:
-  case FMT_DXT3A:
-  case FMT_DXT5A:
-  case FMT_CTX1:
-  case FMT_DXT3A_AS_1_1_1_1:
-    view->format = DXGI_FORMAT_UNKNOWN;
-    break;
-  }
-
-  if (view->format == DXGI_FORMAT_UNKNOWN) {
-    return false;
-  }
-
-  switch (fetch.dimension) {
-  case DIMENSION_1D:
-    break;
-  case DIMENSION_2D:
-    view->sizes_2d = GetTextureSizes2D(view);
-    break;
-  case DIMENSION_3D:
-    break;
-  case DIMENSION_CUBE:
-    break;
-  }
-  return true;
-}
-
-const TextureSizes2D Texture::GetTextureSizes2D(TextureView* view) {
-  TextureSizes2D sizes;
-
-  sizes.logical_width = 1 + view->fetch.size_2d.width;
-  sizes.logical_height = 1 + view->fetch.size_2d.height;
-
-  sizes.block_width = sizes.logical_width / view->block_size;
-  sizes.block_height = sizes.logical_height / view->block_size;
-
-  if (!view->is_compressed) {
-    // must be 32x32, but also must have a pitch that is a multiple of 256 bytes
-    uint32_t bytes_per_block = view->block_size * view->block_size *
-                               view->texel_pitch;
-    uint32_t width_multiple = 32;
-    if (bytes_per_block) {
-      uint32_t minimum_multiple = 256 / bytes_per_block;
-      if (width_multiple < minimum_multiple) {
-        width_multiple = minimum_multiple;
-      }
-    }
-    sizes.input_width = XEROUNDUP(sizes.logical_width, width_multiple);
-    sizes.input_height = XEROUNDUP(sizes.logical_height, 32);
-    sizes.output_width = sizes.logical_width;
-    sizes.output_height = sizes.logical_height;
-  } else {
-    // must be 128x128
-    sizes.input_width = XEROUNDUP(sizes.logical_width, 128);
-    sizes.input_height = XEROUNDUP(sizes.logical_height, 128);
-    sizes.output_width = XENEXTPOW2(sizes.logical_width);
-    sizes.output_height = XENEXTPOW2(sizes.logical_height);
-  }
-
-  sizes.logical_pitch =
-      (sizes.logical_width / view->block_size) * view->texel_pitch;
-  sizes.input_pitch =
-      (sizes.input_width / view->block_size) * view->texel_pitch;
-
-  return sizes;
-}
-
-void Texture::TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch,
-                          XE_GPU_ENDIAN endianness) {
-  switch (endianness) {
-    case XE_GPU_ENDIAN_8IN16:
-      for (uint32_t i = 0; i < pitch; i += 2, src += 2, dest += 2) {
-        *(uint16_t*)dest = XESWAP16(*(uint16_t*)src);
-      }
-      break;
-    case XE_GPU_ENDIAN_8IN32: // Swap bytes.
-      for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) {
-        *(uint32_t*)dest = XESWAP32(*(uint32_t*)src);
-      }
-      break;
-    case XE_GPU_ENDIAN_16IN32: // Swap half words.
-      for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) {
-        uint32_t value = *(uint32_t*)src;
-        *(uint32_t*)dest = ((value >> 16) & 0xFFFF) | (value << 16);
-      }
-      break;
-    default:
-    case XE_GPU_ENDIAN_NONE:
-      memcpy(dest, src, pitch);
-      break;
-  }
-}
-
-// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104
-uint32_t Texture::TiledOffset2DOuter(uint32_t y, uint32_t width,
-                                     uint32_t log_bpp) {
-  uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7);
-  uint32_t micro = ((y & 6) << 2) << log_bpp;
-  return macro +
-         ((micro & ~15) << 1) +
-         (micro & 15) +
-         ((y & 8) << (3 + log_bpp)) +
-         ((y & 1) << 4);
-}
-
-uint32_t Texture::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
-                                     uint32_t base_offset) {
-  uint32_t macro = (x >> 5) << (bpp + 7);
-  uint32_t micro = (x & 7) << bpp;
-  uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15));
-  return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) +
-         ((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
-}
diff --git a/src/xenia/gpu/texture.h b/src/xenia/gpu/texture.h
deleted file mode 100644
index 9b919a5d9..000000000
--- a/src/xenia/gpu/texture.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_TEXTURE_H_
-#define XENIA_GPU_TEXTURE_H_
-
-#include <xenia/core.h>
-#include <xenia/gpu/xenos/xenos.h>
-
-// TODO(benvanik): replace DXGI constants with xenia constants.
-#include <d3d11.h>
-
-
-namespace xe {
-namespace gpu {
-
-
-class Texture;
-
-struct TextureSizes1D {};
-struct TextureSizes2D {
-  uint32_t logical_width;
-  uint32_t logical_height;
-  uint32_t block_width;
-  uint32_t block_height;
-  uint32_t input_width;
-  uint32_t input_height;
-  uint32_t output_width;
-  uint32_t output_height;
-  uint32_t logical_pitch;
-  uint32_t input_pitch;
-};
-struct TextureSizes3D {};
-struct TextureSizesCube {};
-
-struct TextureView {
-  Texture* texture;
-  xenos::xe_gpu_texture_fetch_t fetch;
-  uint64_t hash;
-
-  union {
-    TextureSizes1D sizes_1d;
-    TextureSizes2D sizes_2d;
-    TextureSizes3D sizes_3d;
-    TextureSizesCube sizes_cube;
-  };
-
-  int dimensions;
-  uint32_t width;
-  uint32_t height;
-  uint32_t depth;
-  uint32_t block_size;
-  uint32_t texel_pitch;
-  bool is_compressed;
-  DXGI_FORMAT format;
-
-  TextureView()
-    : texture(nullptr),
-      dimensions(0),
-      width(0), height(0), depth(0),
-      block_size(0), texel_pitch(0),
-      is_compressed(false), format(DXGI_FORMAT_UNKNOWN) {}
-};
-
-
-class Texture {
-public:
-  Texture(uint32_t address, const uint8_t* host_address);
-  virtual ~Texture();
-
-  TextureView* Fetch(
-      const xenos::xe_gpu_texture_fetch_t& fetch);
-
-protected:
-  bool FillViewInfo(TextureView* view,
-                    const xenos::xe_gpu_texture_fetch_t& fetch);
-
-  virtual TextureView* FetchNew(
-      const xenos::xe_gpu_texture_fetch_t& fetch) = 0;
-  virtual bool FetchDirty(
-      TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) = 0;
-
-  const TextureSizes2D GetTextureSizes2D(TextureView* view);
-
-  static void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch,
-                          xenos::XE_GPU_ENDIAN endianness);
-  static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
-                                     uint32_t log_bpp);
-  static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
-                                     uint32_t base_offset);
-
-  uint32_t address_;
-  const uint8_t* host_address_;
-
-  // TODO(benvanik): replace with LRU keyed list.
-  std::vector<TextureView*> views_;
-};
-
-
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_TEXTURE_H_
diff --git a/src/xenia/gpu/texture_cache.cc b/src/xenia/gpu/texture_cache.cc
deleted file mode 100644
index 1f0a4a5ac..000000000
--- a/src/xenia/gpu/texture_cache.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include <xenia/gpu/texture_cache.h>
-
-#include <xenia/gpu/xenos/ucode.h>
-
-
-using namespace xe;
-using namespace xe::gpu;
-using namespace xe::gpu::xenos;
-
-
-// https://github.com/ivmai/bdwgc/blob/master/os_dep.c
-
-TextureCache::TextureCache(Memory* memory)
-    : memory_(memory) {
-}
-
-TextureCache::~TextureCache() {
-  for (auto it = textures_.begin(); it != textures_.end(); ++it) {
-    auto texture = it->second;
-    delete texture;
-  }
-  textures_.clear();
-}
-
-TextureView* TextureCache::FetchTexture(
-    uint32_t address, const xenos::xe_gpu_texture_fetch_t& fetch) {
-  auto it = textures_.find(address);
-  if (it == textures_.end()) {
-    // Texture not found.
-    const uint8_t* host_address = memory_->Translate(address);
-    auto texture = CreateTexture(address, host_address, fetch);
-    if (!texture) {
-      return nullptr;
-    }
-    textures_.insert({ address, texture });
-    return texture->Fetch(fetch);
-  } else {
-    // Texture found.
-    return it->second->Fetch(fetch);
-  }
-}
diff --git a/src/xenia/gpu/texture_cache.h b/src/xenia/gpu/texture_cache.h
deleted file mode 100644
index 285ffe1d7..000000000
--- a/src/xenia/gpu/texture_cache.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_TEXTURE_CACHE_H_
-#define XENIA_GPU_TEXTURE_CACHE_H_
-
-#include <xenia/core.h>
-#include <xenia/gpu/texture.h>
-#include <xenia/gpu/xenos/xenos.h>
-
-
-namespace xe {
-namespace gpu {
-
-
-// TODO(benvanik): overlapping textures.
-// TODO(benvanik): multiple textures (differing formats/etc) per address.
-class TextureCache {
-public:
-  TextureCache(Memory* memory);
-  virtual ~TextureCache();
-  
-  Memory* memory() const { return memory_; }
-
-  TextureView* FetchTexture(
-      uint32_t address, const xenos::xe_gpu_texture_fetch_t& fetch);
-
-protected:
-  virtual Texture* CreateTexture(
-      uint32_t address, const uint8_t* host_address,
-      const xenos::xe_gpu_texture_fetch_t& fetch) = 0;
-
-  Memory* memory_;
-
-  // Mapped by guest address.
-  std::unordered_map<uint32_t, Texture*> textures_;
-};
-
-
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_TEXTURE_CACHE_H_
diff --git a/src/xenia/gpu/texture_resource.cc b/src/xenia/gpu/texture_resource.cc
index 5875e76f3..d063acc56 100644
--- a/src/xenia/gpu/texture_resource.cc
+++ b/src/xenia/gpu/texture_resource.cc
@@ -9,9 +9,342 @@
 
 #include <xenia/gpu/texture_resource.h>
 
+#include <xenia/gpu/xenos/ucode.h>
+#include <xenia/gpu/xenos/xenos.h>
+
 
 using namespace std;
 using namespace xe;
 using namespace xe::gpu;
 using namespace xe::gpu::xenos;
 
+
+bool TextureResource::Info::Prepare(const xe_gpu_texture_fetch_t& fetch,
+                                    Info& info) {
+  // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx
+  // a2xx_sq_surfaceformat
+
+  info.dimension = (TextureDimension)fetch.dimension;
+  switch (info.dimension) {
+  case TEXTURE_DIMENSION_1D:
+    info.width = fetch.size_1d.width;
+    break;
+  case TEXTURE_DIMENSION_2D:
+    info.width = fetch.size_2d.width;
+    info.height = fetch.size_2d.height;
+    break;
+  case TEXTURE_DIMENSION_3D:
+  case TEXTURE_DIMENSION_CUBE:
+    info.width = fetch.size_3d.width;
+    info.height = fetch.size_3d.height;
+    info.depth = fetch.size_3d.depth;
+    break;
+  }
+  info.block_size = 0;
+  info.texel_pitch = 0;
+  info.endianness = (XE_GPU_ENDIAN)fetch.endianness;
+  info.is_tiled = fetch.tiled;
+  info.is_compressed = false;
+  info.input_length = 0;
+  info.format = DXGI_FORMAT_UNKNOWN;
+  switch (fetch.format) {
+  case FMT_8:
+    switch (fetch.swizzle) {
+    case XE_GPU_SWIZZLE_RRR1:
+      info.format = DXGI_FORMAT_R8_UNORM;
+      break;
+    case XE_GPU_SWIZZLE_000R:
+      info.format = DXGI_FORMAT_A8_UNORM;
+      break;
+    default:
+      XELOGW("D3D11: unhandled swizzle for FMT_8");
+      info.format = DXGI_FORMAT_A8_UNORM;
+      break;
+    }
+    info.block_size = 1;
+    info.texel_pitch = 1;
+    break;
+  case FMT_1_5_5_5:
+    switch (fetch.swizzle) {
+    case XE_GPU_SWIZZLE_BGRA:
+      info.format = DXGI_FORMAT_B5G5R5A1_UNORM;
+      break;
+    default:
+      XELOGW("D3D11: unhandled swizzle for FMT_1_5_5_5");
+      info.format = DXGI_FORMAT_B5G5R5A1_UNORM;
+      break;
+    }
+    info.block_size = 1;
+    info.texel_pitch = 2;
+    break;
+  case FMT_8_8_8_8:
+    switch (fetch.swizzle) {
+    case XE_GPU_SWIZZLE_RGBA:
+      info.format = DXGI_FORMAT_R8G8B8A8_UNORM;
+      break;
+    case XE_GPU_SWIZZLE_BGRA:
+      info.format = DXGI_FORMAT_B8G8R8A8_UNORM;
+      break;
+    case XE_GPU_SWIZZLE_RGB1:
+      info.format = DXGI_FORMAT_R8G8B8A8_UNORM; // ?
+      break;
+    case XE_GPU_SWIZZLE_BGR1:
+      info.format = DXGI_FORMAT_B8G8R8X8_UNORM;
+      break;
+    default:
+      XELOGW("D3D11: unhandled swizzle for FMT_8_8_8_8");
+      info.format = DXGI_FORMAT_R8G8B8A8_UNORM;
+      break;
+    }
+    info.block_size = 1;
+    info.texel_pitch = 4;
+    break;
+  case FMT_4_4_4_4:
+    switch (fetch.swizzle) {
+    case XE_GPU_SWIZZLE_BGRA:
+      info.format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+
+      break;
+    default:
+      XELOGW("D3D11: unhandled swizzle for FMT_4_4_4_4");
+      info.format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+
+      break;
+    }
+    info.block_size = 1;
+    info.texel_pitch = 2;
+    break;
+  case FMT_16_16_16_16_FLOAT:
+    switch (fetch.swizzle) {
+    case XE_GPU_SWIZZLE_RGBA:
+      info.format = DXGI_FORMAT_R16G16B16A16_FLOAT;
+      break;
+    default:
+      XELOGW("D3D11: unhandled swizzle for FMT_16_16_16_16_FLOAT");
+      info.format = DXGI_FORMAT_R16G16B16A16_FLOAT;
+      break;
+    }
+    info.block_size = 1;
+    info.texel_pitch = 8;
+    break;
+  case FMT_32_FLOAT:
+    switch (fetch.swizzle) {
+    case XE_GPU_SWIZZLE_R111:
+      info.format = DXGI_FORMAT_R32_FLOAT;
+      break;
+    default:
+      XELOGW("D3D11: unhandled swizzle for FMT_32_FLOAT");
+      info.format = DXGI_FORMAT_R32_FLOAT;
+      break;
+    }
+    info.block_size = 1;
+    info.texel_pitch = 4;
+    break;
+  case FMT_DXT1:
+    info.format = DXGI_FORMAT_BC1_UNORM;
+    info.block_size = 4;
+    info.texel_pitch = 8;
+    info.is_compressed = true;
+    break;
+  case FMT_DXT2_3:
+  case FMT_DXT4_5:
+    info.format = (fetch.format == FMT_DXT4_5 ? DXGI_FORMAT_BC3_UNORM : DXGI_FORMAT_BC2_UNORM);
+    info.block_size = 4;
+    info.texel_pitch = 16;
+    info.is_compressed = true;
+    break;
+  case FMT_1_REVERSE:
+  case FMT_1:
+  case FMT_5_6_5:
+  case FMT_6_5_5:
+  case FMT_2_10_10_10:
+  case FMT_8_A:
+  case FMT_8_B:
+  case FMT_8_8:
+  case FMT_Cr_Y1_Cb_Y0:
+  case FMT_Y1_Cr_Y0_Cb:
+  case FMT_5_5_5_1:
+  case FMT_8_8_8_8_A:
+  case FMT_10_11_11:
+  case FMT_11_11_10:
+  case FMT_24_8:
+  case FMT_24_8_FLOAT:
+  case FMT_16:
+  case FMT_16_16:
+  case FMT_16_16_16_16:
+  case FMT_16_EXPAND:
+  case FMT_16_16_EXPAND:
+  case FMT_16_16_16_16_EXPAND:
+  case FMT_16_FLOAT:
+  case FMT_16_16_FLOAT:
+  case FMT_32:
+  case FMT_32_32:
+  case FMT_32_32_32_32:
+  case FMT_32_32_FLOAT:
+  case FMT_32_32_32_32_FLOAT:
+  case FMT_32_AS_8:
+  case FMT_32_AS_8_8:
+  case FMT_16_MPEG:
+  case FMT_16_16_MPEG:
+  case FMT_8_INTERLACED:
+  case FMT_32_AS_8_INTERLACED:
+  case FMT_32_AS_8_8_INTERLACED:
+  case FMT_16_INTERLACED:
+  case FMT_16_MPEG_INTERLACED:
+  case FMT_16_16_MPEG_INTERLACED:
+  case FMT_DXN:
+  case FMT_8_8_8_8_AS_16_16_16_16:
+  case FMT_DXT1_AS_16_16_16_16:
+  case FMT_DXT2_3_AS_16_16_16_16:
+  case FMT_DXT4_5_AS_16_16_16_16:
+  case FMT_2_10_10_10_AS_16_16_16_16:
+  case FMT_10_11_11_AS_16_16_16_16:
+  case FMT_11_11_10_AS_16_16_16_16:
+  case FMT_32_32_32_FLOAT:
+  case FMT_DXT3A:
+  case FMT_DXT5A:
+  case FMT_CTX1:
+  case FMT_DXT3A_AS_1_1_1_1:
+    info.format = DXGI_FORMAT_UNKNOWN;
+    break;
+  }
+
+  if (info.format == DXGI_FORMAT_UNKNOWN) {
+    return false;
+  }
+
+  // Must be called here when we know the format.
+  switch (info.dimension) {
+  case TEXTURE_DIMENSION_1D:
+    info.CalculateTextureSizes1D(fetch);
+    break;
+  case TEXTURE_DIMENSION_2D:
+    info.CalculateTextureSizes2D(fetch);
+    break;
+  case TEXTURE_DIMENSION_3D:
+    // TODO(benvanik): calculate size.
+    return false;
+  case TEXTURE_DIMENSION_CUBE:
+    // TODO(benvanik): calculate size.
+    return false;
+  }
+  return true;
+}
+
+void TextureResource::Info::CalculateTextureSizes1D(
+    const xe_gpu_texture_fetch_t& fetch) {
+  // ?
+  size_1d.width = fetch.size_1d.width;
+}
+
+void TextureResource::Info::CalculateTextureSizes2D(
+    const xe_gpu_texture_fetch_t& fetch) {
+  size_2d.logical_width = 1 + fetch.size_2d.width;
+  size_2d.logical_height = 1 + fetch.size_2d.height;
+
+  size_2d.block_width = size_2d.logical_width / block_size;
+  size_2d.block_height = size_2d.logical_height / block_size;
+
+  if (!is_compressed) {
+    // must be 32x32 but also must have a pitch that is a multiple of 256 bytes
+    uint32_t bytes_per_block = block_size * block_size * texel_pitch;
+    uint32_t width_multiple = 32;
+    if (bytes_per_block) {
+      uint32_t minimum_multiple = 256 / bytes_per_block;
+      if (width_multiple < minimum_multiple) {
+        width_multiple = minimum_multiple;
+      }
+    }
+    size_2d.input_width = XEROUNDUP(size_2d.logical_width, width_multiple);
+    size_2d.input_height = XEROUNDUP(size_2d.logical_height, 32);
+    size_2d.output_width = size_2d.logical_width;
+    size_2d.output_height = size_2d.logical_height;
+  } else {
+    // must be 128x128
+    size_2d.input_width = XEROUNDUP(size_2d.logical_width, 128);
+    size_2d.input_height = XEROUNDUP(size_2d.logical_height, 128);
+    size_2d.output_width = XENEXTPOW2(size_2d.logical_width);
+    size_2d.output_height = XENEXTPOW2(size_2d.logical_height);
+  }
+
+  size_2d.logical_pitch = (size_2d.logical_width / block_size) * texel_pitch;
+  size_2d.input_pitch = (size_2d.input_width / block_size) * texel_pitch;
+
+  if (!is_tiled) {
+    input_length = size_2d.block_height * size_2d.logical_pitch;
+  } else {
+    input_length = size_2d.block_height * size_2d.logical_pitch; // ?
+  }
+}
+
+TextureResource::TextureResource(const MemoryRange& memory_range,
+                                 const Info& info)
+    : PagedResource(memory_range),
+      info_(info) {
+}
+
+TextureResource::~TextureResource() {
+}
+
+int TextureResource::Prepare() {
+  if (!handle()) {
+    if (CreateHandle()) {
+      XELOGE("Unable to create texture handle");
+      return 1;
+    }
+  }
+  
+  if (!dirtied_) {
+    return 0;
+  }
+  dirtied_ = false;
+
+  // pass dirty regions?
+  return InvalidateRegion(memory_range_);
+}
+
+void TextureResource::TextureSwap(uint8_t* dest, const uint8_t* src,
+                                  uint32_t pitch) const {
+  // TODO(benvanik): optimize swapping paths.
+  switch (info_.endianness) {
+    case XE_GPU_ENDIAN_8IN16:
+      for (uint32_t i = 0; i < pitch; i += 2, src += 2, dest += 2) {
+        *(uint16_t*)dest = XESWAP16(*(uint16_t*)src);
+      }
+      break;
+    case XE_GPU_ENDIAN_8IN32: // Swap bytes.
+      for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) {
+        *(uint32_t*)dest = XESWAP32(*(uint32_t*)src);
+      }
+      break;
+    case XE_GPU_ENDIAN_16IN32: // Swap half words.
+      for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) {
+        uint32_t value = *(uint32_t*)src;
+        *(uint32_t*)dest = ((value >> 16) & 0xFFFF) | (value << 16);
+      }
+      break;
+    default:
+    case XE_GPU_ENDIAN_NONE:
+      memcpy(dest, src, pitch);
+      break;
+  }
+}
+
+// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104
+uint32_t TextureResource::TiledOffset2DOuter(uint32_t y, uint32_t width,
+                                             uint32_t log_bpp) const {
+  uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7);
+  uint32_t micro = ((y & 6) << 2) << log_bpp;
+  return macro +
+         ((micro & ~15) << 1) +
+         (micro & 15) +
+         ((y & 8) << (3 + log_bpp)) +
+         ((y & 1) << 4);
+}
+
+uint32_t TextureResource::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
+                                             uint32_t base_offset) const {
+  uint32_t macro = (x >> 5) << (bpp + 7);
+  uint32_t micro = (x & 7) << bpp;
+  uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15));
+  return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) +
+         ((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
+}
diff --git a/src/xenia/gpu/texture_resource.h b/src/xenia/gpu/texture_resource.h
index 35f83bcda..57dc63422 100644
--- a/src/xenia/gpu/texture_resource.h
+++ b/src/xenia/gpu/texture_resource.h
@@ -10,7 +10,7 @@
 #ifndef XENIA_GPU_TEXTURE_RESOURCE_H_
 #define XENIA_GPU_TEXTURE_RESOURCE_H_
 
-#include <xenia/core.h>
+#include <xenia/gpu/resource.h>
 #include <xenia/gpu/xenos/xenos.h>
 
 // TODO(benvanik): replace DXGI constants with xenia constants.
@@ -21,8 +21,85 @@ namespace xe {
 namespace gpu {
 
 
-class TextureResource : public Resource {
+enum TextureDimension {
+  TEXTURE_DIMENSION_1D = 0,
+  TEXTURE_DIMENSION_2D = 1,
+  TEXTURE_DIMENSION_3D = 2,
+  TEXTURE_DIMENSION_CUBE = 3,
+};
+
+
+class TextureResource : public PagedResource {
 public:
+  struct Info {
+    TextureDimension dimension;
+    uint32_t width;
+    uint32_t height;
+    uint32_t depth;
+    uint32_t block_size;
+    uint32_t texel_pitch;
+    xenos::XE_GPU_ENDIAN endianness;
+    bool is_tiled;
+    bool is_compressed;
+    uint32_t input_length;
+
+    // TODO(benvanik): replace with our own constants.
+    DXGI_FORMAT format;
+
+    union {
+      struct {
+        uint32_t width;
+      } size_1d;
+      struct {
+        uint32_t logical_width;
+        uint32_t logical_height;
+        uint32_t block_width;
+        uint32_t block_height;
+        uint32_t input_width;
+        uint32_t input_height;
+        uint32_t output_width;
+        uint32_t output_height;
+        uint32_t logical_pitch;
+        uint32_t input_pitch;
+      } size_2d;
+      struct {
+      } size_3d;
+      struct {
+      } size_cube;
+    };
+
+    static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
+                        Info& out_info);
+
+  private:
+    void CalculateTextureSizes1D(const xenos::xe_gpu_texture_fetch_t& fetch);
+    void CalculateTextureSizes2D(const xenos::xe_gpu_texture_fetch_t& fetch);
+  };
+
+  TextureResource(const MemoryRange& memory_range,
+                  const Info& info);
+  ~TextureResource() override;
+
+  const Info& info() const { return info_; }
+
+  bool Equals(const void* info_ptr, size_t info_length) override {
+    return info_length == sizeof(Info) &&
+           memcmp(info_ptr, &info_, info_length) == 0;
+  }
+  
+  virtual int Prepare();
+
+protected:
+  virtual int CreateHandle() = 0;
+  virtual int InvalidateRegion(const MemoryRange& memory_range) = 0;
+
+  void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch) const;
+  uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
+                              uint32_t log_bpp) const;
+  uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
+                              uint32_t base_offset) const;
+
+  Info info_;
 };
 
 
diff --git a/src/xenia/gpu/xenos/registers.h b/src/xenia/gpu/xenos/registers.h
deleted file mode 100644
index 39a0d43db..000000000
--- a/src/xenia/gpu/xenos/registers.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#ifndef XENIA_GPU_XENOS_REGISTERS_H_
-#define XENIA_GPU_XENOS_REGISTERS_H_
-
-#include <xenia/core.h>
-
-
-namespace xe {
-namespace gpu {
-namespace xenos {
-
-
-static const uint32_t kXEGpuRegisterCount = 0x5003;
-
-
-enum Registers {
-#define XE_GPU_REGISTER(index, type, name) \
-    XE_GPU_REG_##name = index,
-#include <xenia/gpu/xenos/register_table.inc>
-#undef XE_GPU_REGISTER
-};
-
-
-const char* GetRegisterName(uint32_t index);
-
-
-union RegisterValue {
-  uint32_t  u32;
-  float     f32;
-};
-
-
-struct RegisterFile {
-  RegisterValue   values[kXEGpuRegisterCount];
-};
-
-
-}  // namespace xenos
-}  // namespace gpu
-}  // namespace xe
-
-
-#endif  // XENIA_GPU_XENOS_REGISTERS_H_
diff --git a/src/xenia/gpu/xenos/sources.gypi b/src/xenia/gpu/xenos/sources.gypi
index c1f677682..998444938 100644
--- a/src/xenia/gpu/xenos/sources.gypi
+++ b/src/xenia/gpu/xenos/sources.gypi
@@ -3,8 +3,6 @@
   'sources': [
     'packets.h',
     'register_table.inc',
-    'registers.cc',
-    'registers.h',
     'ucode.h',
     'ucode_disassembler.cc',
     'ucode_disassembler.h',