From 44e7c3712acd6f3d9b52af4f77fd49f628f06767 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Mon, 13 Mar 2017 23:43:34 -0500 Subject: [PATCH] Vulkan: Fix register constants on AMD --- src/xenia/gpu/spirv_shader_translator.cc | 17 +++-- src/xenia/gpu/vulkan/buffer_cache.cc | 64 +++++++++++++++++-- src/xenia/gpu/vulkan/buffer_cache.h | 15 ++--- .../gpu/vulkan/vulkan_command_processor.cc | 9 +-- src/xenia/ui/vulkan/circular_buffer.cc | 10 +++ src/xenia/ui/vulkan/circular_buffer.h | 1 + 6 files changed, 91 insertions(+), 25 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 69af65c35..fc35c4a19 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -117,6 +117,14 @@ void SpirvShaderTranslator::StartTranslation() { Id loop_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(32), 1); Id bool_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(8), 1); + // Strides + b.addDecoration(float_consts_type, spv::Decoration::DecorationArrayStride, + 4 * sizeof(float)); + b.addDecoration(loop_consts_type, spv::Decoration::DecorationArrayStride, + sizeof(uint32_t)); + b.addDecoration(bool_consts_type, spv::Decoration::DecorationArrayStride, + sizeof(uint32_t)); + Id consts_struct_type = b.makeStructType( {float_consts_type, loop_consts_type, bool_consts_type}, "consts_type"); b.addDecoration(consts_struct_type, spv::Decoration::DecorationBlock); @@ -124,25 +132,16 @@ void SpirvShaderTranslator::StartTranslation() { // Constants member decorations. b.addMemberDecoration(consts_struct_type, 0, spv::Decoration::DecorationOffset, 0); - b.addMemberDecoration(consts_struct_type, 0, - spv::Decoration::DecorationArrayStride, - 4 * sizeof(float)); b.addMemberName(consts_struct_type, 0, "float_consts"); b.addMemberDecoration(consts_struct_type, 1, spv::Decoration::DecorationOffset, 512 * 4 * sizeof(float)); - b.addMemberDecoration(consts_struct_type, 1, - spv::Decoration::DecorationArrayStride, - sizeof(uint32_t)); b.addMemberName(consts_struct_type, 1, "loop_consts"); b.addMemberDecoration(consts_struct_type, 2, spv::Decoration::DecorationOffset, 512 * 4 * sizeof(float) + 32 * sizeof(uint32_t)); - b.addMemberDecoration(consts_struct_type, 2, - spv::Decoration::DecorationArrayStride, - sizeof(uint32_t)); b.addMemberName(consts_struct_type, 2, "bool_consts"); consts_ = b.createVariable(spv::StorageClass::StorageClassUniform, diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index bcd2e98c9..43d4a4ce8 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -110,6 +110,7 @@ BufferCache::BufferCache(RegisterFile* register_file, Memory* memory, buffer_info.buffer = transient_buffer_->gpu_buffer(); buffer_info.offset = 0; buffer_info.range = kConstantRegisterUniformRange; + VkWriteDescriptorSet descriptor_writes[2]; auto& vertex_uniform_binding_write = descriptor_writes[0]; vertex_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -147,6 +148,7 @@ BufferCache::~BufferCache() { } std::pair BufferCache::UploadConstantRegisters( + VkCommandBuffer command_buffer, const Shader::ConstantRegisterMap& vertex_constant_register_map, const Shader::ConstantRegisterMap& pixel_constant_register_map, VkFence fence) { @@ -175,6 +177,24 @@ std::pair BufferCache::UploadConstantRegisters( 32 * 4); dest_ptr += 32 * 4; + transient_buffer_->Flush(offset, kConstantRegisterUniformRange); + + // Append a barrier to the command buffer. + VkBufferMemoryBarrier barrier = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + VK_ACCESS_HOST_WRITE_BIT, + VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + transient_buffer_->gpu_buffer(), + offset, + kConstantRegisterUniformRange, + }; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1, + &barrier, 0, nullptr); + return {offset, offset}; // Packed upload code. @@ -229,8 +249,8 @@ std::pair BufferCache::UploadConstantRegisters( } std::pair BufferCache::UploadIndexBuffer( - uint32_t source_addr, uint32_t source_length, IndexFormat format, - VkFence fence) { + VkCommandBuffer command_buffer, uint32_t source_addr, + uint32_t source_length, IndexFormat format, VkFence fence) { auto offset = FindCachedTransientData(source_addr, source_length); if (offset != VK_WHOLE_SIZE) { return {transient_buffer_->gpu_buffer(), offset}; @@ -258,13 +278,31 @@ std::pair BufferCache::UploadIndexBuffer( source_ptr, source_length / 4); } + transient_buffer_->Flush(offset, source_length); + + // Append a barrier to the command buffer. + VkBufferMemoryBarrier barrier = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + VK_ACCESS_HOST_WRITE_BIT, + VK_ACCESS_INDEX_READ_BIT, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + transient_buffer_->gpu_buffer(), + offset, + source_length, + }; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, + &barrier, 0, nullptr); + CacheTransientData(source_addr, source_length, offset); return {transient_buffer_->gpu_buffer(), offset}; } std::pair BufferCache::UploadVertexBuffer( - uint32_t source_addr, uint32_t source_length, Endian endian, - VkFence fence) { + VkCommandBuffer command_buffer, uint32_t source_addr, + uint32_t source_length, Endian endian, VkFence fence) { auto offset = FindCachedTransientData(source_addr, source_length); if (offset != VK_WHOLE_SIZE) { return {transient_buffer_->gpu_buffer(), offset}; @@ -292,6 +330,24 @@ std::pair BufferCache::UploadVertexBuffer( assert_always(); } + transient_buffer_->Flush(offset, source_length); + + // Append a barrier to the command buffer. + VkBufferMemoryBarrier barrier = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + VK_ACCESS_HOST_WRITE_BIT, + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + transient_buffer_->gpu_buffer(), + offset, + source_length, + }; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, + &barrier, 0, nullptr); + CacheTransientData(source_addr, source_length, offset); return {transient_buffer_->gpu_buffer(), offset}; } diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h index 8740c45f6..d8e2489de 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.h +++ b/src/xenia/gpu/vulkan/buffer_cache.h @@ -53,6 +53,7 @@ class BufferCache { // VK_WHOLE_SIZE if the constants could not be uploaded (OOM). // The returned offsets may alias. std::pair UploadConstantRegisters( + VkCommandBuffer command_buffer, const Shader::ConstantRegisterMap& vertex_constant_register_map, const Shader::ConstantRegisterMap& pixel_constant_register_map, VkFence fence); @@ -61,19 +62,17 @@ class BufferCache { // recently uploaded data or cached copies. // Returns a buffer and offset that can be used with vkCmdBindIndexBuffer. // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). - std::pair UploadIndexBuffer(uint32_t source_addr, - uint32_t source_length, - IndexFormat format, - VkFence fence); + std::pair UploadIndexBuffer( + VkCommandBuffer command_buffer, uint32_t source_addr, + uint32_t source_length, IndexFormat format, VkFence fence); // Uploads vertex buffer data from guest memory, possibly eliding with // recently uploaded data or cached copies. // Returns a buffer and offset that can be used with vkCmdBindVertexBuffers. // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). - std::pair UploadVertexBuffer(uint32_t source_addr, - uint32_t source_length, - Endian endian, - VkFence fence); + std::pair UploadVertexBuffer( + VkCommandBuffer command_buffer, uint32_t source_addr, + uint32_t source_length, Endian endian, VkFence fence); // Flushes all pending data to the GPU. // Until this is called the GPU is not guaranteed to see any data. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 9b4b5c5c2..bc303f989 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -626,7 +626,7 @@ bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer, // Upload the constants the shaders require. // These are optional, and if none are defined 0 will be returned. auto constant_offsets = buffer_cache_->UploadConstantRegisters( - vertex_shader->constant_register_map(), + current_setup_buffer_, vertex_shader->constant_register_map(), pixel_shader ? pixel_shader->constant_register_map() : dummy_map, current_batch_fence_); if (constant_offsets.first == VK_WHOLE_SIZE || @@ -681,7 +681,8 @@ bool VulkanCommandProcessor::PopulateIndexBuffer( info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t) : sizeof(uint16_t)); auto buffer_ref = buffer_cache_->UploadIndexBuffer( - source_addr, source_length, info.format, current_batch_fence_); + current_setup_buffer_, source_addr, source_length, info.format, + current_batch_fence_); if (buffer_ref.second == VK_WHOLE_SIZE) { // Failed to upload buffer. return false; @@ -745,8 +746,8 @@ bool VulkanCommandProcessor::PopulateVertexBuffers( // Upload (or get a cached copy of) the buffer. uint32_t source_length = uint32_t(valid_range); auto buffer_ref = buffer_cache_->UploadVertexBuffer( - physical_address, source_length, static_cast(fetch->endian), - current_batch_fence_); + current_setup_buffer_, physical_address, source_length, + static_cast(fetch->endian), current_batch_fence_); if (buffer_ref.second == VK_WHOLE_SIZE) { // Failed to upload buffer. return false; diff --git a/src/xenia/ui/vulkan/circular_buffer.cc b/src/xenia/ui/vulkan/circular_buffer.cc index 5812b0baa..7b9b4bae2 100644 --- a/src/xenia/ui/vulkan/circular_buffer.cc +++ b/src/xenia/ui/vulkan/circular_buffer.cc @@ -232,6 +232,16 @@ void CircularBuffer::Flush(Allocation* allocation) { vkFlushMappedMemoryRanges(*device_, 1, &range); } +void CircularBuffer::Flush(VkDeviceSize offset, VkDeviceSize length) { + VkMappedMemoryRange range; + range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + range.pNext = nullptr; + range.memory = gpu_memory_; + range.offset = gpu_base_ + offset; + range.size = length; + vkFlushMappedMemoryRanges(*device_, 1, &range); +} + void CircularBuffer::Clear() { for (auto alloc : allocations_) { delete alloc; diff --git a/src/xenia/ui/vulkan/circular_buffer.h b/src/xenia/ui/vulkan/circular_buffer.h index 36a660903..d7504f5e8 100644 --- a/src/xenia/ui/vulkan/circular_buffer.h +++ b/src/xenia/ui/vulkan/circular_buffer.h @@ -61,6 +61,7 @@ class CircularBuffer { // reaches the signaled state. Allocation* Acquire(VkDeviceSize length, VkFence fence); void Flush(Allocation* allocation); + void Flush(VkDeviceSize offset, VkDeviceSize length); // Clears all allocations, regardless of whether they've been consumed or not. void Clear();