[Vulkan] Upload entire vertex allocation into the transient cache (for now)

This commit is contained in:
DrChat 2018-02-11 20:43:32 -06:00
parent d0460122f4
commit c9a5553fe9
1 changed files with 32 additions and 16 deletions

View File

@ -34,7 +34,7 @@ BufferCache::BufferCache(RegisterFile* register_file, Memory* memory,
device_, device_,
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
capacity); capacity, 4096);
} }
BufferCache::~BufferCache() { Shutdown(); } BufferCache::~BufferCache() { Shutdown(); }
@ -284,13 +284,15 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer( std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
VkCommandBuffer command_buffer, uint32_t source_addr, VkCommandBuffer command_buffer, uint32_t source_addr,
uint32_t source_length, IndexFormat format, VkFence fence) { uint32_t source_length, IndexFormat format, VkFence fence) {
/*
auto offset = FindCachedTransientData(source_addr, source_length); auto offset = FindCachedTransientData(source_addr, source_length);
if (offset != VK_WHOLE_SIZE) { if (offset != VK_WHOLE_SIZE) {
return {transient_buffer_->gpu_buffer(), offset}; return {transient_buffer_->gpu_buffer(), offset};
} }
*/
// Allocate space in the buffer for our data. // Allocate space in the buffer for our data.
offset = AllocateTransientData(source_length, fence); auto offset = AllocateTransientData(source_length, fence);
if (offset == VK_WHOLE_SIZE) { if (offset == VK_WHOLE_SIZE) {
// OOM. // OOM.
return {nullptr, VK_WHOLE_SIZE}; return {nullptr, VK_WHOLE_SIZE};
@ -329,7 +331,7 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr); &barrier, 0, nullptr);
CacheTransientData(source_addr, source_length, offset); // CacheTransientData(source_addr, source_length, offset);
return {transient_buffer_->gpu_buffer(), offset}; return {transient_buffer_->gpu_buffer(), offset};
} }
@ -341,29 +343,43 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
return {transient_buffer_->gpu_buffer(), offset}; return {transient_buffer_->gpu_buffer(), offset};
} }
// Slow path :)
// Expand the region up to the allocation boundary
auto physical_heap = memory_->GetPhysicalHeap();
uint32_t upload_base = source_addr;
uint32_t upload_size = source_length;
// Ping the memory subsystem for allocation size.
physical_heap->QueryBaseAndSize(&upload_base, &upload_size);
assert(upload_base <= source_addr);
uint32_t source_offset = source_addr - upload_base;
// Allocate space in the buffer for our data. // Allocate space in the buffer for our data.
offset = AllocateTransientData(source_length, fence); offset = AllocateTransientData(upload_size, fence);
if (offset == VK_WHOLE_SIZE) { if (offset == VK_WHOLE_SIZE) {
// OOM. // OOM.
return {nullptr, VK_WHOLE_SIZE}; return {nullptr, VK_WHOLE_SIZE};
} }
const void* source_ptr = memory_->TranslatePhysical(source_addr); const void* upload_ptr = memory_->TranslatePhysical(upload_base);
// Copy data into the buffer. // Copy data into the buffer.
// TODO(benvanik): memcpy then use compute shaders to swap? // TODO(benvanik): memcpy then use compute shaders to swap?
if (endian == Endian::k8in32) { if (endian == Endian::k8in32) {
// Endian::k8in32, swap words. // Endian::k8in32, swap words.
xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset, xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset,
source_ptr, source_length / 4); upload_ptr, upload_size / 4);
} else if (endian == Endian::k16in32) { } else if (endian == Endian::k16in32) {
// TODO(DrChat): Investigate what 16-in-32 actually does.
assert_always();
xe::copy_and_swap_16_in_32_aligned(transient_buffer_->host_base() + offset, xe::copy_and_swap_16_in_32_aligned(transient_buffer_->host_base() + offset,
source_ptr, source_length / 4); upload_ptr, upload_size / 4);
} else { } else {
assert_always(); assert_always();
} }
transient_buffer_->Flush(offset, source_length); transient_buffer_->Flush(offset, upload_size);
// Append a barrier to the command buffer. // Append a barrier to the command buffer.
VkBufferMemoryBarrier barrier = { VkBufferMemoryBarrier barrier = {
@ -375,14 +391,14 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
transient_buffer_->gpu_buffer(), transient_buffer_->gpu_buffer(),
offset, offset,
source_length, upload_size,
}; };
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr); &barrier, 0, nullptr);
CacheTransientData(source_addr, source_length, offset); CacheTransientData(upload_base, upload_size, offset);
return {transient_buffer_->gpu_buffer(), offset}; return {transient_buffer_->gpu_buffer(), offset + source_offset};
} }
VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize length, VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize length,
@ -423,13 +439,13 @@ VkDeviceSize BufferCache::FindCachedTransientData(uint32_t guest_address,
// Find the first element > guest_address // Find the first element > guest_address
auto it = transient_cache_.upper_bound(guest_address); auto it = transient_cache_.upper_bound(guest_address);
if (it != transient_cache_.begin()) { if (it != transient_cache_.begin()) {
// it = first element < guest_address // it = first element <= guest_address
--it; --it;
if (it->first <= guest_address && if ((it->first + it->second.first) >= (guest_address + guest_length)) {
(it->first + it->second.first) >= (guest_address + guest_length)) { // This data is contained within some existing transient data.
// This element is contained within some existing transient data. auto source_offset = static_cast<VkDeviceSize>(guest_address - it->first);
return it->second.second + (guest_address - it->first); return it->second.second + source_offset;
} }
} }