Vulkan: Add basic one-frame cache to BufferCache
This commit is contained in:
parent
d9b52d1afa
commit
8c79051a94
|
@ -106,8 +106,12 @@ void copy_and_swap_64_unaligned(void* dest_ptr, const void* src_ptr,
|
|||
}
|
||||
}
|
||||
|
||||
void copy_and_swap_16_in_32_aligned(void* dest_ptr, const void* src_ptr,
|
||||
size_t count) {
|
||||
void copy_and_swap_16_in_32_aligned(void* dest, const void* src, size_t count) {
|
||||
return copy_and_swap_16_in_32_unaligned(dest, src, count);
|
||||
}
|
||||
|
||||
void copy_and_swap_16_in_32_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
size_t count) {
|
||||
auto dest = reinterpret_cast<uint64_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint64_t*>(src_ptr);
|
||||
size_t i;
|
||||
|
|
|
@ -130,6 +130,8 @@ void copy_and_swap_32_unaligned(void* dest, const void* src, size_t count);
|
|||
void copy_and_swap_64_aligned(void* dest, const void* src, size_t count);
|
||||
void copy_and_swap_64_unaligned(void* dest, const void* src, size_t count);
|
||||
void copy_and_swap_16_in_32_aligned(void* dest, const void* src, size_t count);
|
||||
void copy_and_swap_16_in_32_unaligned(void* dest, const void* src,
|
||||
size_t count);
|
||||
|
||||
template <typename T>
|
||||
void copy_and_swap(T* dest, const T* src, size_t count) {
|
||||
|
|
|
@ -25,9 +25,9 @@ using xe::ui::vulkan::CheckResult;
|
|||
constexpr VkDeviceSize kConstantRegisterUniformRange =
|
||||
512 * 4 * 4 + 8 * 4 + 32 * 4;
|
||||
|
||||
BufferCache::BufferCache(RegisterFile* register_file,
|
||||
BufferCache::BufferCache(RegisterFile* register_file, Memory* memory,
|
||||
ui::vulkan::VulkanDevice* device, size_t capacity)
|
||||
: register_file_(register_file), device_(*device) {
|
||||
: register_file_(register_file), memory_(memory), device_(*device) {
|
||||
transient_buffer_ = std::make_unique<ui::vulkan::CircularBuffer>(
|
||||
device,
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||
|
@ -229,15 +229,22 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
|||
}
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
||||
const void* source_ptr, size_t source_length, IndexFormat format,
|
||||
uint32_t source_addr, uint32_t source_length, IndexFormat format,
|
||||
VkFence fence) {
|
||||
auto offset = FindCachedTransientData(source_addr, source_length);
|
||||
if (offset != VK_WHOLE_SIZE) {
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
}
|
||||
|
||||
// Allocate space in the buffer for our data.
|
||||
auto offset = AllocateTransientData(source_length, fence);
|
||||
offset = AllocateTransientData(source_length, fence);
|
||||
if (offset == VK_WHOLE_SIZE) {
|
||||
// OOM.
|
||||
return {nullptr, VK_WHOLE_SIZE};
|
||||
}
|
||||
|
||||
const void* source_ptr = memory_->TranslatePhysical(source_addr);
|
||||
|
||||
// Copy data into the buffer.
|
||||
// TODO(benvanik): get min/max indices and pass back?
|
||||
// TODO(benvanik): memcpy then use compute shaders to swap?
|
||||
|
@ -251,28 +258,41 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
|||
source_ptr, source_length / 4);
|
||||
}
|
||||
|
||||
CacheTransientData(source_addr, source_length, offset);
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
}
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
||||
const void* source_ptr, size_t source_length, Endian endian,
|
||||
uint32_t source_addr, uint32_t source_length, Endian endian,
|
||||
VkFence fence) {
|
||||
auto offset = FindCachedTransientData(source_addr, source_length);
|
||||
if (offset != VK_WHOLE_SIZE) {
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
}
|
||||
|
||||
// Allocate space in the buffer for our data.
|
||||
auto offset = AllocateTransientData(source_length, fence);
|
||||
offset = AllocateTransientData(source_length, fence);
|
||||
if (offset == VK_WHOLE_SIZE) {
|
||||
// OOM.
|
||||
return {nullptr, VK_WHOLE_SIZE};
|
||||
}
|
||||
|
||||
const void* source_ptr = memory_->TranslatePhysical(source_addr);
|
||||
|
||||
// Copy data into the buffer.
|
||||
// TODO(benvanik): memcpy then use compute shaders to swap?
|
||||
assert_true(endian == Endian::k8in32);
|
||||
if (endian == Endian::k8in32) {
|
||||
// Endian::k8in32, swap words.
|
||||
xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset,
|
||||
source_ptr, source_length / 4);
|
||||
} else if (endian == Endian::k16in32) {
|
||||
xe::copy_and_swap_16_in_32_aligned(transient_buffer_->host_base() + offset,
|
||||
source_ptr, source_length / 4);
|
||||
} else {
|
||||
assert_always();
|
||||
}
|
||||
|
||||
CacheTransientData(source_addr, source_length, offset);
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
}
|
||||
|
||||
|
@ -304,6 +324,24 @@ VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize length,
|
|||
return VK_WHOLE_SIZE;
|
||||
}
|
||||
|
||||
VkDeviceSize BufferCache::FindCachedTransientData(uint32_t guest_address,
|
||||
uint32_t guest_length) {
|
||||
uint64_t key = uint64_t(guest_length) << 32 | uint64_t(guest_address);
|
||||
auto it = transient_cache_.find(key);
|
||||
if (it != transient_cache_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
return VK_WHOLE_SIZE;
|
||||
}
|
||||
|
||||
void BufferCache::CacheTransientData(uint32_t guest_address,
|
||||
uint32_t guest_length,
|
||||
VkDeviceSize offset) {
|
||||
uint64_t key = uint64_t(guest_length) << 32 | uint64_t(guest_address);
|
||||
transient_cache_[key] = offset;
|
||||
}
|
||||
|
||||
void BufferCache::Flush(VkCommandBuffer command_buffer) {
|
||||
// If we are flushing a big enough chunk queue up an event.
|
||||
// We don't want to do this for everything but often enough so that we won't
|
||||
|
@ -331,7 +369,10 @@ void BufferCache::InvalidateCache() {
|
|||
|
||||
void BufferCache::ClearCache() { transient_cache_.clear(); }
|
||||
|
||||
void BufferCache::Scavenge() { transient_buffer_->Scavenge(); }
|
||||
void BufferCache::Scavenge() {
|
||||
transient_cache_.clear();
|
||||
transient_buffer_->Scavenge();
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
|
|
|
@ -13,11 +13,12 @@
|
|||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/memory.h"
|
||||
#include "xenia/ui/vulkan/circular_buffer.h"
|
||||
#include "xenia/ui/vulkan/vulkan.h"
|
||||
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -28,8 +29,8 @@ namespace vulkan {
|
|||
// transient data like shader constants.
|
||||
class BufferCache {
|
||||
public:
|
||||
BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device,
|
||||
size_t capacity);
|
||||
BufferCache(RegisterFile* register_file, Memory* memory,
|
||||
ui::vulkan::VulkanDevice* device, size_t capacity);
|
||||
~BufferCache();
|
||||
|
||||
// Descriptor set containing the dynamic uniform buffer used for constant
|
||||
|
@ -60,8 +61,8 @@ class BufferCache {
|
|||
// recently uploaded data or cached copies.
|
||||
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
|
||||
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(const void* source_ptr,
|
||||
size_t source_length,
|
||||
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(uint32_t source_addr,
|
||||
uint32_t source_length,
|
||||
IndexFormat format,
|
||||
VkFence fence);
|
||||
|
||||
|
@ -69,8 +70,8 @@ class BufferCache {
|
|||
// recently uploaded data or cached copies.
|
||||
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
|
||||
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(const void* source_ptr,
|
||||
size_t source_length,
|
||||
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(uint32_t source_addr,
|
||||
uint32_t source_length,
|
||||
Endian endian,
|
||||
VkFence fence);
|
||||
|
||||
|
@ -99,8 +100,16 @@ class BufferCache {
|
|||
// Tries to allocate a block of memory in the transient buffer.
|
||||
// Returns VK_WHOLE_SIZE if requested amount of memory is not available.
|
||||
VkDeviceSize TryAllocateTransientData(VkDeviceSize length, VkFence fence);
|
||||
// Finds a block of data in the transient buffer sourced from the specified
|
||||
// guest address and length.
|
||||
VkDeviceSize FindCachedTransientData(uint32_t guest_address,
|
||||
uint32_t guest_length);
|
||||
// Adds a block of data to the frame cache.
|
||||
void CacheTransientData(uint32_t guest_address, uint32_t guest_length,
|
||||
VkDeviceSize offset);
|
||||
|
||||
RegisterFile* register_file_ = nullptr;
|
||||
Memory* memory_ = nullptr;
|
||||
VkDevice device_ = nullptr;
|
||||
|
||||
VkDeviceMemory gpu_memory_pool_ = nullptr;
|
||||
|
@ -108,7 +117,7 @@ class BufferCache {
|
|||
// Staging ringbuffer we cycle through fast. Used for data we don't
|
||||
// plan on keeping past the current frame.
|
||||
std::unique_ptr<ui::vulkan::CircularBuffer> transient_buffer_ = nullptr;
|
||||
std::unordered_map<uint64_t, VkDeviceSize> transient_cache_;
|
||||
std::map<uint64_t, VkDeviceSize> transient_cache_;
|
||||
|
||||
VkDescriptorPool descriptor_pool_ = nullptr;
|
||||
VkDescriptorSetLayout descriptor_set_layout_ = nullptr;
|
||||
|
|
|
@ -73,8 +73,8 @@ bool VulkanCommandProcessor::SetupContext() {
|
|||
*device_, device_->queue_family_index(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
||||
|
||||
// Initialize the state machine caches.
|
||||
buffer_cache_ = std::make_unique<BufferCache>(register_file_, device_,
|
||||
kDefaultBufferCacheCapacity);
|
||||
buffer_cache_ = std::make_unique<BufferCache>(
|
||||
register_file_, memory_, device_, kDefaultBufferCacheCapacity);
|
||||
texture_cache_ = std::make_unique<TextureCache>(memory_, register_file_,
|
||||
&trace_writer_, device_);
|
||||
pipeline_cache_ = std::make_unique<PipelineCache>(
|
||||
|
@ -696,13 +696,12 @@ bool VulkanCommandProcessor::PopulateIndexBuffer(
|
|||
trace_writer_.WriteMemoryRead(info.guest_base, info.length);
|
||||
|
||||
// Upload (or get a cached copy of) the buffer.
|
||||
const void* source_ptr =
|
||||
memory_->TranslatePhysical<const void*>(info.guest_base);
|
||||
size_t source_length =
|
||||
uint32_t source_addr = info.guest_base;
|
||||
uint32_t source_length =
|
||||
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
|
||||
: sizeof(uint16_t));
|
||||
auto buffer_ref = buffer_cache_->UploadIndexBuffer(
|
||||
source_ptr, source_length, info.format, current_batch_fence_);
|
||||
source_addr, source_length, info.format, current_batch_fence_);
|
||||
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||
// Failed to upload buffer.
|
||||
return false;
|
||||
|
@ -764,11 +763,9 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
|
|||
trace_writer_.WriteMemoryRead(physical_address, valid_range);
|
||||
|
||||
// Upload (or get a cached copy of) the buffer.
|
||||
const void* source_ptr =
|
||||
memory_->TranslatePhysical<const void*>(physical_address);
|
||||
size_t source_length = valid_range;
|
||||
uint32_t source_length = uint32_t(valid_range);
|
||||
auto buffer_ref = buffer_cache_->UploadVertexBuffer(
|
||||
source_ptr, source_length, static_cast<Endian>(fetch->endian),
|
||||
physical_address, source_length, static_cast<Endian>(fetch->endian),
|
||||
current_batch_fence_);
|
||||
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||
// Failed to upload buffer.
|
||||
|
|
Loading…
Reference in New Issue