From 3a8065b7b13a05b1eb87d2b2c5ad988dd1bb45c8 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 1 Jun 2014 09:42:07 -0700 Subject: [PATCH] Vertex buffer caching. Doesn't help, though, as buffers are weird. Need to rethink all of this. --- src/xenia/gpu/buffer.cc | 6 +- src/xenia/gpu/buffer.h | 21 +++++- src/xenia/gpu/buffer_cache.cc | 22 ++++++ src/xenia/gpu/buffer_cache.h | 8 +++ src/xenia/gpu/d3d11/d3d11_buffer.cc | 73 +++++++++++++++++++- src/xenia/gpu/d3d11/d3d11_buffer.h | 12 ++++ src/xenia/gpu/d3d11/d3d11_buffer_cache.cc | 6 ++ src/xenia/gpu/d3d11/d3d11_buffer_cache.h | 3 + src/xenia/gpu/d3d11/d3d11_graphics_driver.cc | 70 ++++++++----------- 9 files changed, 175 insertions(+), 46 deletions(-) diff --git a/src/xenia/gpu/buffer.cc b/src/xenia/gpu/buffer.cc index 0b7fe9ad6..499cb43a6 100644 --- a/src/xenia/gpu/buffer.cc +++ b/src/xenia/gpu/buffer.cc @@ -33,8 +33,10 @@ IndexBuffer::IndexBuffer(const IndexBufferInfo& info, IndexBuffer::~IndexBuffer() {} -VertexBuffer::VertexBuffer(const uint8_t* src_ptr, size_t length) - : Buffer(src_ptr, length) { +VertexBuffer::VertexBuffer(const VertexBufferInfo& info, + const uint8_t* src_ptr, size_t length) + : Buffer(src_ptr, length), + info_(info) { } VertexBuffer::~VertexBuffer() {} diff --git a/src/xenia/gpu/buffer.h b/src/xenia/gpu/buffer.h index bc83ed20d..9c8e3c654 100644 --- a/src/xenia/gpu/buffer.h +++ b/src/xenia/gpu/buffer.h @@ -57,10 +57,29 @@ protected: }; +struct VertexBufferLayout { + uint32_t stride_words; + uint32_t element_count; + struct { + uint32_t format; + uint32_t offset_words; + uint32_t size_words; + } elements[16]; +}; + +struct VertexBufferInfo { + VertexBufferLayout layout; +}; + + class VertexBuffer : public Buffer { public: - VertexBuffer(const uint8_t* src_ptr, size_t length); + VertexBuffer(const VertexBufferInfo& info, + const uint8_t* src_ptr, size_t length); virtual ~VertexBuffer(); + +protected: + VertexBufferInfo info_; }; diff --git a/src/xenia/gpu/buffer_cache.cc b/src/xenia/gpu/buffer_cache.cc index 1f1d9ac00..cc963d817 100644 --- a/src/xenia/gpu/buffer_cache.cc +++ b/src/xenia/gpu/buffer_cache.cc @@ -47,6 +47,28 @@ IndexBuffer* BufferCache::FetchIndexBuffer( } } +VertexBuffer* BufferCache::FetchVertexBuffer( + const VertexBufferInfo& info, + const uint8_t* src_ptr, size_t length) { + size_t key = reinterpret_cast(src_ptr); + size_t hash = xe_hash64(src_ptr, length); + auto it = vertex_buffer_map_.find(key); + if (it != vertex_buffer_map_.end()) { + if (hash == it->second->hash()) { + return it->second; + } else { + return it->second->FetchDirty(hash) ? it->second : nullptr; + } + } else { + auto buffer = CreateVertexBuffer(info, src_ptr, length); + vertex_buffer_map_.insert({ key, buffer }); + if (!buffer->FetchNew(hash)) { + return nullptr; + } + return buffer; + } +} + void BufferCache::Clear() { for (auto it = index_buffer_map_.begin(); it != index_buffer_map_.end(); ++it) { diff --git a/src/xenia/gpu/buffer_cache.h b/src/xenia/gpu/buffer_cache.h index 21a057a0c..bcba6f9de 100644 --- a/src/xenia/gpu/buffer_cache.h +++ b/src/xenia/gpu/buffer_cache.h @@ -28,15 +28,23 @@ public: const IndexBufferInfo& info, const uint8_t* src_ptr, size_t length); + VertexBuffer* FetchVertexBuffer( + const VertexBufferInfo& info, + const uint8_t* src_ptr, size_t length); + void Clear(); protected: virtual IndexBuffer* CreateIndexBuffer( const IndexBufferInfo& info, const uint8_t* src_ptr, size_t length) = 0; + virtual VertexBuffer* CreateVertexBuffer( + const VertexBufferInfo& info, + const uint8_t* src_ptr, size_t length) = 0; private: std::unordered_map index_buffer_map_; + std::unordered_map vertex_buffer_map_; }; diff --git a/src/xenia/gpu/d3d11/d3d11_buffer.cc b/src/xenia/gpu/d3d11/d3d11_buffer.cc index 98ea3ba9b..84c0d901e 100644 --- a/src/xenia/gpu/d3d11/d3d11_buffer.cc +++ b/src/xenia/gpu/d3d11/d3d11_buffer.cc @@ -57,7 +57,13 @@ bool D3D11IndexBuffer::FetchDirty(uint64_t hash) { XEASSERT(info_.endianness == 0x2); D3D11_MAPPED_SUBRESOURCE res; - buffer_cache_->context()->Map(handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); + HRESULT hr = buffer_cache_->context()->Map( + handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); + if (FAILED(hr)) { + XELOGE("D3D11: unable to map index buffer"); + return false; + } + if (info_.index_32bit) { const uint32_t* src = reinterpret_cast(src_); uint32_t* dest = reinterpret_cast(res.pData); @@ -77,3 +83,68 @@ bool D3D11IndexBuffer::FetchDirty(uint64_t hash) { return true; } + + +D3D11VertexBuffer::D3D11VertexBuffer( + D3D11BufferCache* buffer_cache, + const VertexBufferInfo& info, + const uint8_t* src_ptr, size_t length) + : VertexBuffer(info, src_ptr, length), + buffer_cache_(buffer_cache), + handle_(nullptr) { +} + +D3D11VertexBuffer::~D3D11VertexBuffer() { + XESAFERELEASE(handle_); +} + +bool D3D11VertexBuffer::FetchNew(uint64_t hash) { + hash_ = hash; + + D3D11_BUFFER_DESC buffer_desc; + xe_zero_struct(&buffer_desc, sizeof(buffer_desc)); + buffer_desc.ByteWidth = static_cast(length_); + buffer_desc.Usage = D3D11_USAGE_DYNAMIC; + buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + HRESULT hr = buffer_cache_->device()->CreateBuffer(&buffer_desc, NULL, &handle_); + if (FAILED(hr)) { + XELOGW("D3D11: failed to create index buffer"); + return false; + } + + return FetchDirty(hash); +} + +bool D3D11VertexBuffer::FetchDirty(uint64_t hash) { + hash_ = hash; + + D3D11_MAPPED_SUBRESOURCE res; + HRESULT hr = buffer_cache_->context()->Map( + handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); + if (FAILED(hr)) { + XELOGE("D3D11: unable to map vertex buffer"); + return false; + } + uint8_t* dest = reinterpret_cast(res.pData); + + // TODO(benvanik): rewrite to be faster/special case common/etc + uint32_t stride = info_.layout.stride_words; + size_t count = (length_ / 4) / stride; + for (size_t n = 0; n < info_.layout.element_count; n++) { + const auto& el = info_.layout.elements[n]; + const uint32_t* src_ptr = (const uint32_t*)(src_ + el.offset_words * 4); + uint32_t* dest_ptr = (uint32_t*)(dest + el.offset_words * 4); + uint32_t o = 0; + for (uint32_t i = 0; i < count; i++) { + for (uint32_t j = 0; j < el.size_words; j++) { + dest_ptr[o + j] = XESWAP32(src_ptr[o + j]); + } + o += stride; + } + } + + + buffer_cache_->context()->Unmap(handle_, 0); + return true; +} diff --git a/src/xenia/gpu/d3d11/d3d11_buffer.h b/src/xenia/gpu/d3d11/d3d11_buffer.h index 02160db2e..924fb3da4 100644 --- a/src/xenia/gpu/d3d11/d3d11_buffer.h +++ b/src/xenia/gpu/d3d11/d3d11_buffer.h @@ -45,7 +45,19 @@ private: class D3D11VertexBuffer : public VertexBuffer { public: + D3D11VertexBuffer(D3D11BufferCache* buffer_cache, + const VertexBufferInfo& info, + const uint8_t* src_ptr, size_t length); + virtual ~D3D11VertexBuffer(); + + ID3D11Buffer* handle() const { return handle_; } + + bool FetchNew(uint64_t hash) override; + bool FetchDirty(uint64_t hash) override; + private: + D3D11BufferCache* buffer_cache_; + ID3D11Buffer* handle_; }; diff --git a/src/xenia/gpu/d3d11/d3d11_buffer_cache.cc b/src/xenia/gpu/d3d11/d3d11_buffer_cache.cc index b6aac9d1b..48eb8fbf8 100644 --- a/src/xenia/gpu/d3d11/d3d11_buffer_cache.cc +++ b/src/xenia/gpu/d3d11/d3d11_buffer_cache.cc @@ -36,3 +36,9 @@ IndexBuffer* D3D11BufferCache::CreateIndexBuffer( const uint8_t* src_ptr, size_t length) { return new D3D11IndexBuffer(this, info, src_ptr, length); } + +VertexBuffer* D3D11BufferCache::CreateVertexBuffer( + const VertexBufferInfo& info, + const uint8_t* src_ptr, size_t length) { + return new D3D11VertexBuffer(this, info, src_ptr, length); +} diff --git a/src/xenia/gpu/d3d11/d3d11_buffer_cache.h b/src/xenia/gpu/d3d11/d3d11_buffer_cache.h index eca2f5b55..284536ab7 100644 --- a/src/xenia/gpu/d3d11/d3d11_buffer_cache.h +++ b/src/xenia/gpu/d3d11/d3d11_buffer_cache.h @@ -35,6 +35,9 @@ protected: IndexBuffer* CreateIndexBuffer( const IndexBufferInfo& info, const uint8_t* src_ptr, size_t length) override; + VertexBuffer* CreateVertexBuffer( + const VertexBufferInfo& info, + const uint8_t* src_ptr, size_t length) override; protected: ID3D11DeviceContext* context_; diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc index 209313091..886643e32 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc +++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc @@ -933,6 +933,11 @@ int D3D11GraphicsDriver::PrepareFetchers() { int D3D11GraphicsDriver::PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc) { SCOPE_profile_cpu_f("gpu"); + D3D11VertexShader* vs = state_.vertex_shader; + if (!vs) { + return 1; + } + RegisterFile& rf = register_file_; int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6; xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r]; @@ -953,56 +958,37 @@ int D3D11GraphicsDriver::PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc) { XEASSERT(fetch->type == 0x3); XEASSERTNOTZERO(fetch->size); - ID3D11Buffer* buffer = 0; - D3D11_BUFFER_DESC buffer_desc; - xe_zero_struct(&buffer_desc, sizeof(buffer_desc)); - buffer_desc.ByteWidth = fetch->size * 4; - buffer_desc.Usage = D3D11_USAGE_DYNAMIC; - buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; - buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - HRESULT hr = device_->CreateBuffer(&buffer_desc, NULL, &buffer); - if (FAILED(hr)) { + VertexBufferInfo info; + // TODO(benvanik): make these structs the same so we can share. + info.layout.stride_words = desc.stride_words; + info.layout.element_count = desc.element_count; + for (uint32_t i = 0; i < desc.element_count; ++i) { + const auto& src_el = desc.elements[i]; + auto& dest_el = info.layout.elements[i]; + dest_el.format = src_el.format; + dest_el.offset_words = src_el.offset_words; + dest_el.size_words = src_el.size_words; + } + + uint32_t address = (fetch->address << 2) + address_translation_; + const uint8_t* src = reinterpret_cast( + memory_->Translate(address)); + + VertexBuffer* vertex_buffer = buffer_cache_->FetchVertexBuffer( + info, src, fetch->size * 4); + if (!vertex_buffer) { XELOGE("D3D11: unable to create vertex fetch buffer"); return 1; } - D3D11_MAPPED_SUBRESOURCE res; - hr = context_->Map(buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); - if (FAILED(hr)) { - XELOGE("D3D11: unable to map vertex fetch buffer"); - XESAFERELEASE(buffer); - return 1; - } - uint32_t address = (fetch->address << 2) + address_translation_; - uint8_t* src = (uint8_t*)memory_->Translate(address); - uint8_t* dest = (uint8_t*)res.pData; - // TODO(benvanik): rewrite to be faster/special case common/etc - for (size_t n = 0; n < desc.element_count; n++) { - auto& el = desc.elements[n]; - uint32_t stride = desc.stride_words; - uint32_t count = fetch->size / stride; - uint32_t* src_ptr = (uint32_t*)(src + el.offset_words * 4); - uint32_t* dest_ptr = (uint32_t*)(dest + el.offset_words * 4); - uint32_t o = 0; - for (uint32_t i = 0; i < count; i++) { - for (uint32_t j = 0; j < el.size_words; j++) { - dest_ptr[o + j] = XESWAP32(src_ptr[o + j]); - } - o += stride; - } - } - context_->Unmap(buffer, 0); + auto d3d_vb = static_cast(vertex_buffer); - D3D11VertexShader* vs = state_.vertex_shader; - if (!vs) { - return 1; - } // TODO(benvanik): always dword aligned? uint32_t stride = desc.stride_words * 4; uint32_t offset = 0; int vb_slot = desc.input_index; - context_->IASetVertexBuffers(vb_slot, 1, &buffer, &stride, &offset); - - buffer->Release(); + ID3D11Buffer* buffers[] = { d3d_vb->handle() }; + context_->IASetVertexBuffers(vb_slot, XECOUNT(buffers), buffers, + &stride, &offset); return 0; }