From 529a1478d894fdbfafa51c7fb3be8e33e4030fa0 Mon Sep 17 00:00:00 2001
From: Ben Vanik <ben.vanik@gmail.com>
Date: Sat, 31 May 2014 16:34:05 -0700
Subject: [PATCH] Sampler state caching.

---
 src/xenia/gpu/d3d11/d3d11_graphics_driver.cc | 25 --------------------
 src/xenia/gpu/d3d11/d3d11_texture_cache.cc   | 25 ++++++++++++++++++++
 src/xenia/gpu/d3d11/d3d11_texture_cache.h    |  6 +++++
 src/xenia/types.h                            | 10 ++++++++
 4 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc
index 6c466ccac..f29afd285 100644
--- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc
+++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc
@@ -845,9 +845,6 @@ int D3D11GraphicsDriver::BindShaders() {
 
     // Setup input layout (as encoded in vertex shader).
     context_->IASetInputLayout(vs->input_layout());
-
-    //context_->VSSetSamplers
-    //context_->VSSetShaderResources
   } else {
     context_->VSSetShader(NULL, NULL, 0);
     context_->IASetInputLayout(NULL);
@@ -877,27 +874,6 @@ int D3D11GraphicsDriver::BindShaders() {
     };
     context_->PSSetConstantBuffers(
         0, XECOUNT(vs_constant_buffers), vs_constant_buffers);
-
-    // TODO(benvanik): set samplers for all inputs.
-    D3D11_SAMPLER_DESC sampler_desc;
-    xe_zero_struct(&sampler_desc, sizeof(sampler_desc));
-    //sampler_desc.Filter = ?
-    sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
-    sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
-    sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
-    sampler_desc.MipLODBias = 0;
-    sampler_desc.MaxAnisotropy = 1;
-    sampler_desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS;
-    //sampler_desc.BorderColor = ...;
-    sampler_desc.MinLOD = 0;
-    sampler_desc.MaxLOD = 0;
-    ID3D11SamplerState* sampler_state = NULL;
-    device_->CreateSamplerState(&sampler_desc, &sampler_state);
-    ID3D11SamplerState* sampler_states[] = { sampler_state };
-    context_->PSSetSamplers(0, XECOUNT(sampler_states), sampler_states);
-    sampler_state->Release();
-
-    //context_->PSSetShaderResources
   } else {
     context_->PSSetShader(NULL, NULL, 0);
     return 1;
@@ -1109,7 +1085,6 @@ int D3D11GraphicsDriver::PrepareTextureSampler(
   } else {
     context_->PSSetSamplers(desc.input_index, 1, &sampler_state);
   }
-  XESAFERELEASE(sampler_state);
 
   return 0;
 }
diff --git a/src/xenia/gpu/d3d11/d3d11_texture_cache.cc b/src/xenia/gpu/d3d11/d3d11_texture_cache.cc
index ca8a2f88d..ad8e4d09e 100644
--- a/src/xenia/gpu/d3d11/d3d11_texture_cache.cc
+++ b/src/xenia/gpu/d3d11/d3d11_texture_cache.cc
@@ -27,6 +27,12 @@ D3D11TextureCache::D3D11TextureCache(
 }
 
 D3D11TextureCache::~D3D11TextureCache() {
+  for (auto it = samplers_.begin(); it != samplers_.end(); ++it) {
+    auto& cached_state = it->second;
+    XESAFERELEASE(cached_state.state);
+  }
+  samplers_.clear();
+
   XESAFERELEASE(device_);
   XESAFERELEASE(context_);
 }
@@ -103,11 +109,30 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
   sampler_desc.BorderColor[3];
   sampler_desc.MinLOD;
   sampler_desc.MaxLOD;
+
+  // TODO(benvanik): do this earlier without having to setup the whole struct?
+  size_t hash = hash_combine(
+      sampler_desc.Filter,
+      sampler_desc.AddressU,
+      sampler_desc.AddressV,
+      sampler_desc.AddressW);
+  auto range = samplers_.equal_range(hash);
+  for (auto it = range.first; it != range.second; ++it) {
+    const auto& cached_state = it->second;
+    // TODO(benvanik): faster compare?
+    if (memcmp(&sampler_desc, &cached_state.desc, sizeof(sampler_desc)) == 0) {
+      return cached_state.state;
+    }
+  }
+
   ID3D11SamplerState* sampler_state = NULL;
   HRESULT hr = device_->CreateSamplerState(&sampler_desc, &sampler_state);
   if (FAILED(hr)) {
     XELOGE("D3D11: unable to create sampler state");
     return nullptr;
   }
+
+  samplers_.insert({ hash, { sampler_desc, sampler_state } });
+
   return sampler_state;
 }
diff --git a/src/xenia/gpu/d3d11/d3d11_texture_cache.h b/src/xenia/gpu/d3d11/d3d11_texture_cache.h
index 4405a331c..ce0fdc310 100644
--- a/src/xenia/gpu/d3d11/d3d11_texture_cache.h
+++ b/src/xenia/gpu/d3d11/d3d11_texture_cache.h
@@ -44,6 +44,12 @@ protected:
 private:
   ID3D11DeviceContext* context_;
   ID3D11Device* device_;
+
+  struct CachedSamplerState {
+    D3D11_SAMPLER_DESC desc;
+    ID3D11SamplerState* state;
+  };
+  std::unordered_multimap<size_t, CachedSamplerState> samplers_;
 };
 
 
diff --git a/src/xenia/types.h b/src/xenia/types.h
index 42d6aa658..4cd3f5daf 100644
--- a/src/xenia/types.h
+++ b/src/xenia/types.h
@@ -134,6 +134,16 @@ typedef XECACHEALIGN volatile void xe_aligned_void_t;
 #endif  // GNUC
 #endif  // !MIN
 
+XEFORCEINLINE size_t hash_combine(size_t seed) {
+  return seed;
+}
+template <typename T, typename... Ts>
+size_t hash_combine(size_t seed, const T& v, const Ts&... vs) {
+  std::hash<T> hasher;
+  seed ^= hasher(v) + 0x9E3779B9 + (seed << 6) + (seed >> 2);
+  return hash_combine(seed, vs...);
+}
+
 #if XE_PLATFORM_WIN32
 #define XESAFERELEASE(p)        if (p) { p->Release(); }
 #endif  // WIN32