Vulkan: Add basic one-frame cache to BufferCache

This commit is contained in:
Dr. Chat 2017-01-29 00:24:31 -06:00
parent d9b52d1afa
commit 8c79051a94
5 changed files with 81 additions and 28 deletions

View File

@ -106,8 +106,12 @@ void copy_and_swap_64_unaligned(void* dest_ptr, const void* src_ptr,
} }
} }
void copy_and_swap_16_in_32_aligned(void* dest_ptr, const void* src_ptr, void copy_and_swap_16_in_32_aligned(void* dest, const void* src, size_t count) {
size_t count) { return copy_and_swap_16_in_32_unaligned(dest, src, count);
}
void copy_and_swap_16_in_32_unaligned(void* dest_ptr, const void* src_ptr,
size_t count) {
auto dest = reinterpret_cast<uint64_t*>(dest_ptr); auto dest = reinterpret_cast<uint64_t*>(dest_ptr);
auto src = reinterpret_cast<const uint64_t*>(src_ptr); auto src = reinterpret_cast<const uint64_t*>(src_ptr);
size_t i; size_t i;

View File

@ -130,6 +130,8 @@ void copy_and_swap_32_unaligned(void* dest, const void* src, size_t count);
void copy_and_swap_64_aligned(void* dest, const void* src, size_t count); void copy_and_swap_64_aligned(void* dest, const void* src, size_t count);
void copy_and_swap_64_unaligned(void* dest, const void* src, size_t count); void copy_and_swap_64_unaligned(void* dest, const void* src, size_t count);
void copy_and_swap_16_in_32_aligned(void* dest, const void* src, size_t count); void copy_and_swap_16_in_32_aligned(void* dest, const void* src, size_t count);
void copy_and_swap_16_in_32_unaligned(void* dest, const void* src,
size_t count);
template <typename T> template <typename T>
void copy_and_swap(T* dest, const T* src, size_t count) { void copy_and_swap(T* dest, const T* src, size_t count) {

View File

@ -25,9 +25,9 @@ using xe::ui::vulkan::CheckResult;
constexpr VkDeviceSize kConstantRegisterUniformRange = constexpr VkDeviceSize kConstantRegisterUniformRange =
512 * 4 * 4 + 8 * 4 + 32 * 4; 512 * 4 * 4 + 8 * 4 + 32 * 4;
BufferCache::BufferCache(RegisterFile* register_file, BufferCache::BufferCache(RegisterFile* register_file, Memory* memory,
ui::vulkan::VulkanDevice* device, size_t capacity) ui::vulkan::VulkanDevice* device, size_t capacity)
: register_file_(register_file), device_(*device) { : register_file_(register_file), memory_(memory), device_(*device) {
transient_buffer_ = std::make_unique<ui::vulkan::CircularBuffer>( transient_buffer_ = std::make_unique<ui::vulkan::CircularBuffer>(
device, device,
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
@ -229,15 +229,22 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
} }
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer( std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
const void* source_ptr, size_t source_length, IndexFormat format, uint32_t source_addr, uint32_t source_length, IndexFormat format,
VkFence fence) { VkFence fence) {
auto offset = FindCachedTransientData(source_addr, source_length);
if (offset != VK_WHOLE_SIZE) {
return {transient_buffer_->gpu_buffer(), offset};
}
// Allocate space in the buffer for our data. // Allocate space in the buffer for our data.
auto offset = AllocateTransientData(source_length, fence); offset = AllocateTransientData(source_length, fence);
if (offset == VK_WHOLE_SIZE) { if (offset == VK_WHOLE_SIZE) {
// OOM. // OOM.
return {nullptr, VK_WHOLE_SIZE}; return {nullptr, VK_WHOLE_SIZE};
} }
const void* source_ptr = memory_->TranslatePhysical(source_addr);
// Copy data into the buffer. // Copy data into the buffer.
// TODO(benvanik): get min/max indices and pass back? // TODO(benvanik): get min/max indices and pass back?
// TODO(benvanik): memcpy then use compute shaders to swap? // TODO(benvanik): memcpy then use compute shaders to swap?
@ -251,28 +258,41 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
source_ptr, source_length / 4); source_ptr, source_length / 4);
} }
CacheTransientData(source_addr, source_length, offset);
return {transient_buffer_->gpu_buffer(), offset}; return {transient_buffer_->gpu_buffer(), offset};
} }
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer( std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
const void* source_ptr, size_t source_length, Endian endian, uint32_t source_addr, uint32_t source_length, Endian endian,
VkFence fence) { VkFence fence) {
auto offset = FindCachedTransientData(source_addr, source_length);
if (offset != VK_WHOLE_SIZE) {
return {transient_buffer_->gpu_buffer(), offset};
}
// Allocate space in the buffer for our data. // Allocate space in the buffer for our data.
auto offset = AllocateTransientData(source_length, fence); offset = AllocateTransientData(source_length, fence);
if (offset == VK_WHOLE_SIZE) { if (offset == VK_WHOLE_SIZE) {
// OOM. // OOM.
return {nullptr, VK_WHOLE_SIZE}; return {nullptr, VK_WHOLE_SIZE};
} }
const void* source_ptr = memory_->TranslatePhysical(source_addr);
// Copy data into the buffer. // Copy data into the buffer.
// TODO(benvanik): memcpy then use compute shaders to swap? // TODO(benvanik): memcpy then use compute shaders to swap?
assert_true(endian == Endian::k8in32);
if (endian == Endian::k8in32) { if (endian == Endian::k8in32) {
// Endian::k8in32, swap words. // Endian::k8in32, swap words.
xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset, xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset,
source_ptr, source_length / 4); source_ptr, source_length / 4);
} else if (endian == Endian::k16in32) {
xe::copy_and_swap_16_in_32_aligned(transient_buffer_->host_base() + offset,
source_ptr, source_length / 4);
} else {
assert_always();
} }
CacheTransientData(source_addr, source_length, offset);
return {transient_buffer_->gpu_buffer(), offset}; return {transient_buffer_->gpu_buffer(), offset};
} }
@ -304,6 +324,24 @@ VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize length,
return VK_WHOLE_SIZE; return VK_WHOLE_SIZE;
} }
VkDeviceSize BufferCache::FindCachedTransientData(uint32_t guest_address,
uint32_t guest_length) {
uint64_t key = uint64_t(guest_length) << 32 | uint64_t(guest_address);
auto it = transient_cache_.find(key);
if (it != transient_cache_.end()) {
return it->second;
}
return VK_WHOLE_SIZE;
}
void BufferCache::CacheTransientData(uint32_t guest_address,
uint32_t guest_length,
VkDeviceSize offset) {
uint64_t key = uint64_t(guest_length) << 32 | uint64_t(guest_address);
transient_cache_[key] = offset;
}
void BufferCache::Flush(VkCommandBuffer command_buffer) { void BufferCache::Flush(VkCommandBuffer command_buffer) {
// If we are flushing a big enough chunk queue up an event. // If we are flushing a big enough chunk queue up an event.
// We don't want to do this for everything but often enough so that we won't // We don't want to do this for everything but often enough so that we won't
@ -331,7 +369,10 @@ void BufferCache::InvalidateCache() {
void BufferCache::ClearCache() { transient_cache_.clear(); } void BufferCache::ClearCache() { transient_cache_.clear(); }
void BufferCache::Scavenge() { transient_buffer_->Scavenge(); } void BufferCache::Scavenge() {
transient_cache_.clear();
transient_buffer_->Scavenge();
}
} // namespace vulkan } // namespace vulkan
} // namespace gpu } // namespace gpu

View File

@ -13,11 +13,12 @@
#include "xenia/gpu/register_file.h" #include "xenia/gpu/register_file.h"
#include "xenia/gpu/shader.h" #include "xenia/gpu/shader.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
#include "xenia/memory.h"
#include "xenia/ui/vulkan/circular_buffer.h" #include "xenia/ui/vulkan/circular_buffer.h"
#include "xenia/ui/vulkan/vulkan.h" #include "xenia/ui/vulkan/vulkan.h"
#include "xenia/ui/vulkan/vulkan_device.h" #include "xenia/ui/vulkan/vulkan_device.h"
#include <unordered_map> #include <map>
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -28,8 +29,8 @@ namespace vulkan {
// transient data like shader constants. // transient data like shader constants.
class BufferCache { class BufferCache {
public: public:
BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device, BufferCache(RegisterFile* register_file, Memory* memory,
size_t capacity); ui::vulkan::VulkanDevice* device, size_t capacity);
~BufferCache(); ~BufferCache();
// Descriptor set containing the dynamic uniform buffer used for constant // Descriptor set containing the dynamic uniform buffer used for constant
@ -60,8 +61,8 @@ class BufferCache {
// recently uploaded data or cached copies. // recently uploaded data or cached copies.
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer. // Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(const void* source_ptr, std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(uint32_t source_addr,
size_t source_length, uint32_t source_length,
IndexFormat format, IndexFormat format,
VkFence fence); VkFence fence);
@ -69,8 +70,8 @@ class BufferCache {
// recently uploaded data or cached copies. // recently uploaded data or cached copies.
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers. // Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(const void* source_ptr, std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(uint32_t source_addr,
size_t source_length, uint32_t source_length,
Endian endian, Endian endian,
VkFence fence); VkFence fence);
@ -99,8 +100,16 @@ class BufferCache {
// Tries to allocate a block of memory in the transient buffer. // Tries to allocate a block of memory in the transient buffer.
// Returns VK_WHOLE_SIZE if requested amount of memory is not available. // Returns VK_WHOLE_SIZE if requested amount of memory is not available.
VkDeviceSize TryAllocateTransientData(VkDeviceSize length, VkFence fence); VkDeviceSize TryAllocateTransientData(VkDeviceSize length, VkFence fence);
// Finds a block of data in the transient buffer sourced from the specified
// guest address and length.
VkDeviceSize FindCachedTransientData(uint32_t guest_address,
uint32_t guest_length);
// Adds a block of data to the frame cache.
void CacheTransientData(uint32_t guest_address, uint32_t guest_length,
VkDeviceSize offset);
RegisterFile* register_file_ = nullptr; RegisterFile* register_file_ = nullptr;
Memory* memory_ = nullptr;
VkDevice device_ = nullptr; VkDevice device_ = nullptr;
VkDeviceMemory gpu_memory_pool_ = nullptr; VkDeviceMemory gpu_memory_pool_ = nullptr;
@ -108,7 +117,7 @@ class BufferCache {
// Staging ringbuffer we cycle through fast. Used for data we don't // Staging ringbuffer we cycle through fast. Used for data we don't
// plan on keeping past the current frame. // plan on keeping past the current frame.
std::unique_ptr<ui::vulkan::CircularBuffer> transient_buffer_ = nullptr; std::unique_ptr<ui::vulkan::CircularBuffer> transient_buffer_ = nullptr;
std::unordered_map<uint64_t, VkDeviceSize> transient_cache_; std::map<uint64_t, VkDeviceSize> transient_cache_;
VkDescriptorPool descriptor_pool_ = nullptr; VkDescriptorPool descriptor_pool_ = nullptr;
VkDescriptorSetLayout descriptor_set_layout_ = nullptr; VkDescriptorSetLayout descriptor_set_layout_ = nullptr;

View File

@ -73,8 +73,8 @@ bool VulkanCommandProcessor::SetupContext() {
*device_, device_->queue_family_index(), VK_COMMAND_BUFFER_LEVEL_PRIMARY); *device_, device_->queue_family_index(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
// Initialize the state machine caches. // Initialize the state machine caches.
buffer_cache_ = std::make_unique<BufferCache>(register_file_, device_, buffer_cache_ = std::make_unique<BufferCache>(
kDefaultBufferCacheCapacity); register_file_, memory_, device_, kDefaultBufferCacheCapacity);
texture_cache_ = std::make_unique<TextureCache>(memory_, register_file_, texture_cache_ = std::make_unique<TextureCache>(memory_, register_file_,
&trace_writer_, device_); &trace_writer_, device_);
pipeline_cache_ = std::make_unique<PipelineCache>( pipeline_cache_ = std::make_unique<PipelineCache>(
@ -696,13 +696,12 @@ bool VulkanCommandProcessor::PopulateIndexBuffer(
trace_writer_.WriteMemoryRead(info.guest_base, info.length); trace_writer_.WriteMemoryRead(info.guest_base, info.length);
// Upload (or get a cached copy of) the buffer. // Upload (or get a cached copy of) the buffer.
const void* source_ptr = uint32_t source_addr = info.guest_base;
memory_->TranslatePhysical<const void*>(info.guest_base); uint32_t source_length =
size_t source_length =
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t) info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
: sizeof(uint16_t)); : sizeof(uint16_t));
auto buffer_ref = buffer_cache_->UploadIndexBuffer( auto buffer_ref = buffer_cache_->UploadIndexBuffer(
source_ptr, source_length, info.format, current_batch_fence_); source_addr, source_length, info.format, current_batch_fence_);
if (buffer_ref.second == VK_WHOLE_SIZE) { if (buffer_ref.second == VK_WHOLE_SIZE) {
// Failed to upload buffer. // Failed to upload buffer.
return false; return false;
@ -764,11 +763,9 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
trace_writer_.WriteMemoryRead(physical_address, valid_range); trace_writer_.WriteMemoryRead(physical_address, valid_range);
// Upload (or get a cached copy of) the buffer. // Upload (or get a cached copy of) the buffer.
const void* source_ptr = uint32_t source_length = uint32_t(valid_range);
memory_->TranslatePhysical<const void*>(physical_address);
size_t source_length = valid_range;
auto buffer_ref = buffer_cache_->UploadVertexBuffer( auto buffer_ref = buffer_cache_->UploadVertexBuffer(
source_ptr, source_length, static_cast<Endian>(fetch->endian), physical_address, source_length, static_cast<Endian>(fetch->endian),
current_batch_fence_); current_batch_fence_);
if (buffer_ref.second == VK_WHOLE_SIZE) { if (buffer_ref.second == VK_WHOLE_SIZE) {
// Failed to upload buffer. // Failed to upload buffer.