Vulkan: Add basic one-frame cache to BufferCache
This commit is contained in:
parent
d9b52d1afa
commit
8c79051a94
|
@ -106,8 +106,12 @@ void copy_and_swap_64_unaligned(void* dest_ptr, const void* src_ptr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void copy_and_swap_16_in_32_aligned(void* dest_ptr, const void* src_ptr,
|
void copy_and_swap_16_in_32_aligned(void* dest, const void* src, size_t count) {
|
||||||
size_t count) {
|
return copy_and_swap_16_in_32_unaligned(dest, src, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
void copy_and_swap_16_in_32_unaligned(void* dest_ptr, const void* src_ptr,
|
||||||
|
size_t count) {
|
||||||
auto dest = reinterpret_cast<uint64_t*>(dest_ptr);
|
auto dest = reinterpret_cast<uint64_t*>(dest_ptr);
|
||||||
auto src = reinterpret_cast<const uint64_t*>(src_ptr);
|
auto src = reinterpret_cast<const uint64_t*>(src_ptr);
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
|
@ -130,6 +130,8 @@ void copy_and_swap_32_unaligned(void* dest, const void* src, size_t count);
|
||||||
void copy_and_swap_64_aligned(void* dest, const void* src, size_t count);
|
void copy_and_swap_64_aligned(void* dest, const void* src, size_t count);
|
||||||
void copy_and_swap_64_unaligned(void* dest, const void* src, size_t count);
|
void copy_and_swap_64_unaligned(void* dest, const void* src, size_t count);
|
||||||
void copy_and_swap_16_in_32_aligned(void* dest, const void* src, size_t count);
|
void copy_and_swap_16_in_32_aligned(void* dest, const void* src, size_t count);
|
||||||
|
void copy_and_swap_16_in_32_unaligned(void* dest, const void* src,
|
||||||
|
size_t count);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void copy_and_swap(T* dest, const T* src, size_t count) {
|
void copy_and_swap(T* dest, const T* src, size_t count) {
|
||||||
|
|
|
@ -25,9 +25,9 @@ using xe::ui::vulkan::CheckResult;
|
||||||
constexpr VkDeviceSize kConstantRegisterUniformRange =
|
constexpr VkDeviceSize kConstantRegisterUniformRange =
|
||||||
512 * 4 * 4 + 8 * 4 + 32 * 4;
|
512 * 4 * 4 + 8 * 4 + 32 * 4;
|
||||||
|
|
||||||
BufferCache::BufferCache(RegisterFile* register_file,
|
BufferCache::BufferCache(RegisterFile* register_file, Memory* memory,
|
||||||
ui::vulkan::VulkanDevice* device, size_t capacity)
|
ui::vulkan::VulkanDevice* device, size_t capacity)
|
||||||
: register_file_(register_file), device_(*device) {
|
: register_file_(register_file), memory_(memory), device_(*device) {
|
||||||
transient_buffer_ = std::make_unique<ui::vulkan::CircularBuffer>(
|
transient_buffer_ = std::make_unique<ui::vulkan::CircularBuffer>(
|
||||||
device,
|
device,
|
||||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||||
|
@ -229,15 +229,22 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
||||||
const void* source_ptr, size_t source_length, IndexFormat format,
|
uint32_t source_addr, uint32_t source_length, IndexFormat format,
|
||||||
VkFence fence) {
|
VkFence fence) {
|
||||||
|
auto offset = FindCachedTransientData(source_addr, source_length);
|
||||||
|
if (offset != VK_WHOLE_SIZE) {
|
||||||
|
return {transient_buffer_->gpu_buffer(), offset};
|
||||||
|
}
|
||||||
|
|
||||||
// Allocate space in the buffer for our data.
|
// Allocate space in the buffer for our data.
|
||||||
auto offset = AllocateTransientData(source_length, fence);
|
offset = AllocateTransientData(source_length, fence);
|
||||||
if (offset == VK_WHOLE_SIZE) {
|
if (offset == VK_WHOLE_SIZE) {
|
||||||
// OOM.
|
// OOM.
|
||||||
return {nullptr, VK_WHOLE_SIZE};
|
return {nullptr, VK_WHOLE_SIZE};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const void* source_ptr = memory_->TranslatePhysical(source_addr);
|
||||||
|
|
||||||
// Copy data into the buffer.
|
// Copy data into the buffer.
|
||||||
// TODO(benvanik): get min/max indices and pass back?
|
// TODO(benvanik): get min/max indices and pass back?
|
||||||
// TODO(benvanik): memcpy then use compute shaders to swap?
|
// TODO(benvanik): memcpy then use compute shaders to swap?
|
||||||
|
@ -251,28 +258,41 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
||||||
source_ptr, source_length / 4);
|
source_ptr, source_length / 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CacheTransientData(source_addr, source_length, offset);
|
||||||
return {transient_buffer_->gpu_buffer(), offset};
|
return {transient_buffer_->gpu_buffer(), offset};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
||||||
const void* source_ptr, size_t source_length, Endian endian,
|
uint32_t source_addr, uint32_t source_length, Endian endian,
|
||||||
VkFence fence) {
|
VkFence fence) {
|
||||||
|
auto offset = FindCachedTransientData(source_addr, source_length);
|
||||||
|
if (offset != VK_WHOLE_SIZE) {
|
||||||
|
return {transient_buffer_->gpu_buffer(), offset};
|
||||||
|
}
|
||||||
|
|
||||||
// Allocate space in the buffer for our data.
|
// Allocate space in the buffer for our data.
|
||||||
auto offset = AllocateTransientData(source_length, fence);
|
offset = AllocateTransientData(source_length, fence);
|
||||||
if (offset == VK_WHOLE_SIZE) {
|
if (offset == VK_WHOLE_SIZE) {
|
||||||
// OOM.
|
// OOM.
|
||||||
return {nullptr, VK_WHOLE_SIZE};
|
return {nullptr, VK_WHOLE_SIZE};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const void* source_ptr = memory_->TranslatePhysical(source_addr);
|
||||||
|
|
||||||
// Copy data into the buffer.
|
// Copy data into the buffer.
|
||||||
// TODO(benvanik): memcpy then use compute shaders to swap?
|
// TODO(benvanik): memcpy then use compute shaders to swap?
|
||||||
assert_true(endian == Endian::k8in32);
|
|
||||||
if (endian == Endian::k8in32) {
|
if (endian == Endian::k8in32) {
|
||||||
// Endian::k8in32, swap words.
|
// Endian::k8in32, swap words.
|
||||||
xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset,
|
xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset,
|
||||||
source_ptr, source_length / 4);
|
source_ptr, source_length / 4);
|
||||||
|
} else if (endian == Endian::k16in32) {
|
||||||
|
xe::copy_and_swap_16_in_32_aligned(transient_buffer_->host_base() + offset,
|
||||||
|
source_ptr, source_length / 4);
|
||||||
|
} else {
|
||||||
|
assert_always();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CacheTransientData(source_addr, source_length, offset);
|
||||||
return {transient_buffer_->gpu_buffer(), offset};
|
return {transient_buffer_->gpu_buffer(), offset};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -304,6 +324,24 @@ VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize length,
|
||||||
return VK_WHOLE_SIZE;
|
return VK_WHOLE_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VkDeviceSize BufferCache::FindCachedTransientData(uint32_t guest_address,
|
||||||
|
uint32_t guest_length) {
|
||||||
|
uint64_t key = uint64_t(guest_length) << 32 | uint64_t(guest_address);
|
||||||
|
auto it = transient_cache_.find(key);
|
||||||
|
if (it != transient_cache_.end()) {
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return VK_WHOLE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BufferCache::CacheTransientData(uint32_t guest_address,
|
||||||
|
uint32_t guest_length,
|
||||||
|
VkDeviceSize offset) {
|
||||||
|
uint64_t key = uint64_t(guest_length) << 32 | uint64_t(guest_address);
|
||||||
|
transient_cache_[key] = offset;
|
||||||
|
}
|
||||||
|
|
||||||
void BufferCache::Flush(VkCommandBuffer command_buffer) {
|
void BufferCache::Flush(VkCommandBuffer command_buffer) {
|
||||||
// If we are flushing a big enough chunk queue up an event.
|
// If we are flushing a big enough chunk queue up an event.
|
||||||
// We don't want to do this for everything but often enough so that we won't
|
// We don't want to do this for everything but often enough so that we won't
|
||||||
|
@ -331,7 +369,10 @@ void BufferCache::InvalidateCache() {
|
||||||
|
|
||||||
void BufferCache::ClearCache() { transient_cache_.clear(); }
|
void BufferCache::ClearCache() { transient_cache_.clear(); }
|
||||||
|
|
||||||
void BufferCache::Scavenge() { transient_buffer_->Scavenge(); }
|
void BufferCache::Scavenge() {
|
||||||
|
transient_cache_.clear();
|
||||||
|
transient_buffer_->Scavenge();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace vulkan
|
} // namespace vulkan
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
|
|
@ -13,11 +13,12 @@
|
||||||
#include "xenia/gpu/register_file.h"
|
#include "xenia/gpu/register_file.h"
|
||||||
#include "xenia/gpu/shader.h"
|
#include "xenia/gpu/shader.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
|
#include "xenia/memory.h"
|
||||||
#include "xenia/ui/vulkan/circular_buffer.h"
|
#include "xenia/ui/vulkan/circular_buffer.h"
|
||||||
#include "xenia/ui/vulkan/vulkan.h"
|
#include "xenia/ui/vulkan/vulkan.h"
|
||||||
#include "xenia/ui/vulkan/vulkan_device.h"
|
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||||
|
|
||||||
#include <unordered_map>
|
#include <map>
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
@ -28,8 +29,8 @@ namespace vulkan {
|
||||||
// transient data like shader constants.
|
// transient data like shader constants.
|
||||||
class BufferCache {
|
class BufferCache {
|
||||||
public:
|
public:
|
||||||
BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device,
|
BufferCache(RegisterFile* register_file, Memory* memory,
|
||||||
size_t capacity);
|
ui::vulkan::VulkanDevice* device, size_t capacity);
|
||||||
~BufferCache();
|
~BufferCache();
|
||||||
|
|
||||||
// Descriptor set containing the dynamic uniform buffer used for constant
|
// Descriptor set containing the dynamic uniform buffer used for constant
|
||||||
|
@ -60,8 +61,8 @@ class BufferCache {
|
||||||
// recently uploaded data or cached copies.
|
// recently uploaded data or cached copies.
|
||||||
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
|
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
|
||||||
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||||
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(const void* source_ptr,
|
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(uint32_t source_addr,
|
||||||
size_t source_length,
|
uint32_t source_length,
|
||||||
IndexFormat format,
|
IndexFormat format,
|
||||||
VkFence fence);
|
VkFence fence);
|
||||||
|
|
||||||
|
@ -69,8 +70,8 @@ class BufferCache {
|
||||||
// recently uploaded data or cached copies.
|
// recently uploaded data or cached copies.
|
||||||
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
|
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
|
||||||
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||||
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(const void* source_ptr,
|
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(uint32_t source_addr,
|
||||||
size_t source_length,
|
uint32_t source_length,
|
||||||
Endian endian,
|
Endian endian,
|
||||||
VkFence fence);
|
VkFence fence);
|
||||||
|
|
||||||
|
@ -99,8 +100,16 @@ class BufferCache {
|
||||||
// Tries to allocate a block of memory in the transient buffer.
|
// Tries to allocate a block of memory in the transient buffer.
|
||||||
// Returns VK_WHOLE_SIZE if requested amount of memory is not available.
|
// Returns VK_WHOLE_SIZE if requested amount of memory is not available.
|
||||||
VkDeviceSize TryAllocateTransientData(VkDeviceSize length, VkFence fence);
|
VkDeviceSize TryAllocateTransientData(VkDeviceSize length, VkFence fence);
|
||||||
|
// Finds a block of data in the transient buffer sourced from the specified
|
||||||
|
// guest address and length.
|
||||||
|
VkDeviceSize FindCachedTransientData(uint32_t guest_address,
|
||||||
|
uint32_t guest_length);
|
||||||
|
// Adds a block of data to the frame cache.
|
||||||
|
void CacheTransientData(uint32_t guest_address, uint32_t guest_length,
|
||||||
|
VkDeviceSize offset);
|
||||||
|
|
||||||
RegisterFile* register_file_ = nullptr;
|
RegisterFile* register_file_ = nullptr;
|
||||||
|
Memory* memory_ = nullptr;
|
||||||
VkDevice device_ = nullptr;
|
VkDevice device_ = nullptr;
|
||||||
|
|
||||||
VkDeviceMemory gpu_memory_pool_ = nullptr;
|
VkDeviceMemory gpu_memory_pool_ = nullptr;
|
||||||
|
@ -108,7 +117,7 @@ class BufferCache {
|
||||||
// Staging ringbuffer we cycle through fast. Used for data we don't
|
// Staging ringbuffer we cycle through fast. Used for data we don't
|
||||||
// plan on keeping past the current frame.
|
// plan on keeping past the current frame.
|
||||||
std::unique_ptr<ui::vulkan::CircularBuffer> transient_buffer_ = nullptr;
|
std::unique_ptr<ui::vulkan::CircularBuffer> transient_buffer_ = nullptr;
|
||||||
std::unordered_map<uint64_t, VkDeviceSize> transient_cache_;
|
std::map<uint64_t, VkDeviceSize> transient_cache_;
|
||||||
|
|
||||||
VkDescriptorPool descriptor_pool_ = nullptr;
|
VkDescriptorPool descriptor_pool_ = nullptr;
|
||||||
VkDescriptorSetLayout descriptor_set_layout_ = nullptr;
|
VkDescriptorSetLayout descriptor_set_layout_ = nullptr;
|
||||||
|
|
|
@ -73,8 +73,8 @@ bool VulkanCommandProcessor::SetupContext() {
|
||||||
*device_, device_->queue_family_index(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
*device_, device_->queue_family_index(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
||||||
|
|
||||||
// Initialize the state machine caches.
|
// Initialize the state machine caches.
|
||||||
buffer_cache_ = std::make_unique<BufferCache>(register_file_, device_,
|
buffer_cache_ = std::make_unique<BufferCache>(
|
||||||
kDefaultBufferCacheCapacity);
|
register_file_, memory_, device_, kDefaultBufferCacheCapacity);
|
||||||
texture_cache_ = std::make_unique<TextureCache>(memory_, register_file_,
|
texture_cache_ = std::make_unique<TextureCache>(memory_, register_file_,
|
||||||
&trace_writer_, device_);
|
&trace_writer_, device_);
|
||||||
pipeline_cache_ = std::make_unique<PipelineCache>(
|
pipeline_cache_ = std::make_unique<PipelineCache>(
|
||||||
|
@ -696,13 +696,12 @@ bool VulkanCommandProcessor::PopulateIndexBuffer(
|
||||||
trace_writer_.WriteMemoryRead(info.guest_base, info.length);
|
trace_writer_.WriteMemoryRead(info.guest_base, info.length);
|
||||||
|
|
||||||
// Upload (or get a cached copy of) the buffer.
|
// Upload (or get a cached copy of) the buffer.
|
||||||
const void* source_ptr =
|
uint32_t source_addr = info.guest_base;
|
||||||
memory_->TranslatePhysical<const void*>(info.guest_base);
|
uint32_t source_length =
|
||||||
size_t source_length =
|
|
||||||
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
|
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
|
||||||
: sizeof(uint16_t));
|
: sizeof(uint16_t));
|
||||||
auto buffer_ref = buffer_cache_->UploadIndexBuffer(
|
auto buffer_ref = buffer_cache_->UploadIndexBuffer(
|
||||||
source_ptr, source_length, info.format, current_batch_fence_);
|
source_addr, source_length, info.format, current_batch_fence_);
|
||||||
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||||
// Failed to upload buffer.
|
// Failed to upload buffer.
|
||||||
return false;
|
return false;
|
||||||
|
@ -764,11 +763,9 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
|
||||||
trace_writer_.WriteMemoryRead(physical_address, valid_range);
|
trace_writer_.WriteMemoryRead(physical_address, valid_range);
|
||||||
|
|
||||||
// Upload (or get a cached copy of) the buffer.
|
// Upload (or get a cached copy of) the buffer.
|
||||||
const void* source_ptr =
|
uint32_t source_length = uint32_t(valid_range);
|
||||||
memory_->TranslatePhysical<const void*>(physical_address);
|
|
||||||
size_t source_length = valid_range;
|
|
||||||
auto buffer_ref = buffer_cache_->UploadVertexBuffer(
|
auto buffer_ref = buffer_cache_->UploadVertexBuffer(
|
||||||
source_ptr, source_length, static_cast<Endian>(fetch->endian),
|
physical_address, source_length, static_cast<Endian>(fetch->endian),
|
||||||
current_batch_fence_);
|
current_batch_fence_);
|
||||||
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||||
// Failed to upload buffer.
|
// Failed to upload buffer.
|
||||||
|
|
Loading…
Reference in New Issue