From 4c4a641096f0012731dc225c9ff9e0f7ca98c289 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 18 Feb 2016 16:43:17 -0800 Subject: [PATCH] WIP rough sketch of vulkan backend structure. --- src/xenia/base/memory.cc | 4 + src/xenia/base/memory.h | 2 + src/xenia/gpu/vulkan/buffer_cache.cc | 330 ++++++++++ src/xenia/gpu/vulkan/buffer_cache.h | 111 ++++ src/xenia/gpu/vulkan/pipeline_cache.cc | 335 ++++++++++ src/xenia/gpu/vulkan/pipeline_cache.h | 167 +++++ src/xenia/gpu/vulkan/render_cache.cc | 45 ++ src/xenia/gpu/vulkan/render_cache.h | 46 ++ src/xenia/gpu/vulkan/texture_cache.cc | 37 ++ src/xenia/gpu/vulkan/texture_cache.h | 47 ++ .../gpu/vulkan/vulkan_command_processor.cc | 601 ++++++++---------- .../gpu/vulkan/vulkan_command_processor.h | 121 +--- src/xenia/ui/vulkan/fenced_pools.cc | 81 +++ src/xenia/ui/vulkan/fenced_pools.h | 200 ++++++ 14 files changed, 1691 insertions(+), 436 deletions(-) create mode 100644 src/xenia/gpu/vulkan/buffer_cache.cc create mode 100644 src/xenia/gpu/vulkan/buffer_cache.h create mode 100644 src/xenia/gpu/vulkan/pipeline_cache.cc create mode 100644 src/xenia/gpu/vulkan/pipeline_cache.h create mode 100644 src/xenia/gpu/vulkan/render_cache.cc create mode 100644 src/xenia/gpu/vulkan/render_cache.h create mode 100644 src/xenia/gpu/vulkan/texture_cache.cc create mode 100644 src/xenia/gpu/vulkan/texture_cache.h create mode 100644 src/xenia/ui/vulkan/fenced_pools.cc create mode 100644 src/xenia/ui/vulkan/fenced_pools.h diff --git a/src/xenia/base/memory.cc b/src/xenia/base/memory.cc index 5656b9798..f83b01d72 100644 --- a/src/xenia/base/memory.cc +++ b/src/xenia/base/memory.cc @@ -18,6 +18,10 @@ namespace xe { // http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/f2bc76cc65ffba51a141950f98e75364e49df874/entry/volk/kernels/volk/volk_32u_byteswap.h // http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/2c4c371885c31222362f70a1cd714415d1398021/entry/volk/kernels/volk/volk_64u_byteswap.h +void copy_128_aligned(void* dest, const void* src, size_t count) { + std::memcpy(dest, src, count * 16); +} + void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src, size_t count) { return copy_and_swap_16_unaligned(dest, src, count); diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h index 05517a8e8..183843416 100644 --- a/src/xenia/base/memory.h +++ b/src/xenia/base/memory.h @@ -121,6 +121,8 @@ inline void* low_address(void* address) { return reinterpret_cast(uint64_t(address) & 0xFFFFFFFF); } +void copy_128_aligned(void* dest, const void* src, size_t count); + void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src, size_t count); void copy_and_swap_16_unaligned(uint16_t* dest, const uint16_t* src, diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc new file mode 100644 index 000000000..4cace24ba --- /dev/null +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -0,0 +1,330 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/buffer_cache.h" + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/memory.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +using xe::ui::vulkan::CheckResult; + +BufferCache::BufferCache(RegisterFile* register_file, + ui::vulkan::VulkanDevice* device, size_t capacity) + : register_file_(register_file), + device_(*device), + transient_capacity_(capacity) { + // Uniform buffer. + VkBufferCreateInfo uniform_buffer_info; + uniform_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + uniform_buffer_info.pNext = nullptr; + uniform_buffer_info.flags = 0; + uniform_buffer_info.size = transient_capacity_; + uniform_buffer_info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + uniform_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + uniform_buffer_info.queueFamilyIndexCount = 0; + uniform_buffer_info.pQueueFamilyIndices = nullptr; + auto err = vkCreateBuffer(device_, &uniform_buffer_info, nullptr, + &transient_uniform_buffer_); + CheckResult(err, "vkCreateBuffer"); + + // Index buffer. + VkBufferCreateInfo index_buffer_info; + index_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + index_buffer_info.pNext = nullptr; + index_buffer_info.flags = 0; + index_buffer_info.size = transient_capacity_; + index_buffer_info.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + index_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + index_buffer_info.queueFamilyIndexCount = 0; + index_buffer_info.pQueueFamilyIndices = nullptr; + err = vkCreateBuffer(device_, &index_buffer_info, nullptr, + &transient_index_buffer_); + CheckResult(err, "vkCreateBuffer"); + + // Vertex buffer. + VkBufferCreateInfo vertex_buffer_info; + vertex_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + vertex_buffer_info.pNext = nullptr; + vertex_buffer_info.flags = 0; + vertex_buffer_info.size = transient_capacity_; + vertex_buffer_info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + vertex_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + vertex_buffer_info.queueFamilyIndexCount = 0; + vertex_buffer_info.pQueueFamilyIndices = nullptr; + err = vkCreateBuffer(*device, &vertex_buffer_info, nullptr, + &transient_vertex_buffer_); + CheckResult(err, "vkCreateBuffer"); + + // Allocate the underlying buffer we use for all storage. + // We query all types and take the max alignment. + VkMemoryRequirements uniform_buffer_requirements; + VkMemoryRequirements index_buffer_requirements; + VkMemoryRequirements vertex_buffer_requirements; + vkGetBufferMemoryRequirements(device_, transient_uniform_buffer_, + &uniform_buffer_requirements); + vkGetBufferMemoryRequirements(device_, transient_index_buffer_, + &index_buffer_requirements); + vkGetBufferMemoryRequirements(device_, transient_vertex_buffer_, + &vertex_buffer_requirements); + uniform_buffer_alignment_ = uniform_buffer_requirements.alignment; + index_buffer_alignment_ = index_buffer_requirements.alignment; + vertex_buffer_alignment_ = vertex_buffer_requirements.alignment; + VkMemoryRequirements buffer_requirements; + buffer_requirements.size = transient_capacity_; + buffer_requirements.alignment = + std::max(uniform_buffer_requirements.alignment, + std::max(index_buffer_requirements.alignment, + vertex_buffer_requirements.alignment)); + buffer_requirements.memoryTypeBits = + uniform_buffer_requirements.memoryTypeBits | + index_buffer_requirements.memoryTypeBits | + vertex_buffer_requirements.memoryTypeBits; + transient_buffer_memory_ = device->AllocateMemory( + buffer_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + + // Alias all buffers to our memory. + vkBindBufferMemory(device_, transient_uniform_buffer_, + transient_buffer_memory_, 0); + vkBindBufferMemory(device_, transient_index_buffer_, transient_buffer_memory_, + 0); + vkBindBufferMemory(device_, transient_vertex_buffer_, + transient_buffer_memory_, 0); + + // Map memory and keep it mapped while we use it. + err = vkMapMemory(device_, transient_buffer_memory_, 0, VK_WHOLE_SIZE, 0, + &transient_buffer_data_); + CheckResult(err, "vkMapMemory"); + + // Descriptor pool used for all of our cached descriptors. + // In the steady state we don't allocate anything, so these are all manually + // managed. + VkDescriptorPoolCreateInfo descriptor_pool_info; + descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + descriptor_pool_info.pNext = nullptr; + descriptor_pool_info.flags = + VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; + descriptor_pool_info.maxSets = 1; + VkDescriptorPoolSize pool_sizes[1]; + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + pool_sizes[0].descriptorCount = 2; + descriptor_pool_info.poolSizeCount = 1; + descriptor_pool_info.pPoolSizes = pool_sizes; + err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr, + &descriptor_pool_); + CheckResult(err, "vkCreateDescriptorPool"); + + // Create the descriptor set layout used for our uniform buffer. + // As it is a static binding that uses dynamic offsets during draws we can + // create this once and reuse it forever. + VkDescriptorSetLayoutBinding vertex_uniform_binding; + vertex_uniform_binding.binding = 0; + vertex_uniform_binding.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + vertex_uniform_binding.descriptorCount = 1; + vertex_uniform_binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + vertex_uniform_binding.pImmutableSamplers = nullptr; + VkDescriptorSetLayoutBinding fragment_uniform_binding; + fragment_uniform_binding.binding = 1; + fragment_uniform_binding.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + fragment_uniform_binding.descriptorCount = 1; + fragment_uniform_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + fragment_uniform_binding.pImmutableSamplers = nullptr; + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info; + descriptor_set_layout_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_info.pNext = nullptr; + descriptor_set_layout_info.flags = 0; + VkDescriptorSetLayoutBinding uniform_bindings[] = { + vertex_uniform_binding, fragment_uniform_binding, + }; + descriptor_set_layout_info.bindingCount = + static_cast(xe::countof(uniform_bindings)); + descriptor_set_layout_info.pBindings = uniform_bindings; + err = vkCreateDescriptorSetLayout(device_, &descriptor_set_layout_info, + nullptr, &descriptor_set_layout_); + CheckResult(err, "vkCreateDescriptorSetLayout"); + + // Create the descriptor we'll use for the uniform buffer. + // This is what we hand out to everyone (who then also needs to use our + // offsets). + VkDescriptorSetAllocateInfo set_alloc_info; + set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + set_alloc_info.pNext = nullptr; + set_alloc_info.descriptorPool = descriptor_pool_; + set_alloc_info.descriptorSetCount = 1; + set_alloc_info.pSetLayouts = &descriptor_set_layout_; + err = vkAllocateDescriptorSets(device_, &set_alloc_info, + &transient_descriptor_set_); + CheckResult(err, "vkAllocateDescriptorSets"); +} + +BufferCache::~BufferCache() { + vkFreeDescriptorSets(device_, descriptor_pool_, 1, + &transient_descriptor_set_); + vkDestroyDescriptorSetLayout(device_, descriptor_set_layout_, nullptr); + vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr); + vkUnmapMemory(device_, transient_buffer_memory_); + vkFreeMemory(device_, transient_buffer_memory_, nullptr); + vkDestroyBuffer(device_, transient_uniform_buffer_, nullptr); + vkDestroyBuffer(device_, transient_index_buffer_, nullptr); + vkDestroyBuffer(device_, transient_vertex_buffer_, nullptr); +} + +VkDeviceSize BufferCache::UploadConstantRegisters( + const Shader::ConstantRegisterMap& constant_register_map) { + // Allocate space in the buffer for our data. + auto offset = AllocateTransientData(uniform_buffer_alignment_, + constant_register_map.packed_byte_length); + if (offset == VK_WHOLE_SIZE) { + // OOM. + return VK_WHOLE_SIZE; + } + + // Run through registers and copy them into the buffer. + // TODO(benvanik): optimize this - it's hit twice every call. + const auto& values = register_file_->values; + uint8_t* dest_ptr = + reinterpret_cast(transient_buffer_data_) + offset; + for (int i = 0; i < 4; ++i) { + auto piece = constant_register_map.float_bitmap[i]; + if (!piece) { + continue; + } + for (int j = 0, sh = 0; j < 64; ++j, sh << 1) { + if (piece & sh) { + xe::copy_128_aligned( + dest_ptr, + &values[XE_GPU_REG_SHADER_CONSTANT_000_X + i * 64 + j].f32, 1); + dest_ptr += 16; + } + } + } + for (int i = 0; i < 32; ++i) { + if (constant_register_map.int_bitmap & (1 << i)) { + xe::store(dest_ptr, + values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32); + dest_ptr += 4; + } + } + for (int i = 0; i < 8; ++i) { + if (constant_register_map.bool_bitmap[i]) { + xe::store( + dest_ptr, values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32); + dest_ptr += 4; + } + } + + return offset; +} + +std::pair BufferCache::UploadIndexBuffer( + const void* source_ptr, size_t source_length, IndexFormat format) { + // TODO(benvanik): check cache. + + // Allocate space in the buffer for our data. + auto offset = AllocateTransientData(index_buffer_alignment_, source_length); + if (offset == VK_WHOLE_SIZE) { + // OOM. + return {nullptr, VK_WHOLE_SIZE}; + } + + // Copy data into the buffer. + // TODO(benvanik): get min/max indices and pass back? + // TODO(benvanik): memcpy then use compute shaders to swap? + if (format == IndexFormat::kInt16) { + // Endian::k8in16, swap half-words. + xe::copy_and_swap_16_aligned( + reinterpret_cast(transient_buffer_data_) + offset, + reinterpret_cast(source_ptr), source_length / 2); + } else if (format == IndexFormat::kInt32) { + // Endian::k8in32, swap words. + xe::copy_and_swap_32_aligned( + reinterpret_cast(transient_buffer_data_) + offset, + reinterpret_cast(source_ptr), source_length / 4); + } + + return {transient_index_buffer_, offset}; +} + +std::pair BufferCache::UploadVertexBuffer( + const void* source_ptr, size_t source_length) { + // TODO(benvanik): check cache. + + // Allocate space in the buffer for our data. + auto offset = AllocateTransientData(vertex_buffer_alignment_, source_length); + if (offset == VK_WHOLE_SIZE) { + // OOM. + return {nullptr, VK_WHOLE_SIZE}; + } + + // Copy data into the buffer. + // TODO(benvanik): memcpy then use compute shaders to swap? + // Endian::k8in32, swap words. + xe::copy_and_swap_32_aligned( + reinterpret_cast(transient_buffer_data_) + offset, + reinterpret_cast(source_ptr), source_length / 4); + + return {transient_vertex_buffer_, offset}; +} + +VkDeviceSize BufferCache::AllocateTransientData(size_t alignment, + size_t length) { + // Try to add to end, wrapping if required. + + // Check to ensure there is space. + if (false) { + // Consume all fences. + } + + // Slice off our bit. + + return VK_WHOLE_SIZE; +} + +void BufferCache::Flush(VkCommandBuffer command_buffer) { + // If we are flushing a big enough chunk queue up an event. + // We don't want to do this for everything but often enough so that we won't + // run out of space. + if (true) { + // VkEvent finish_event; + // vkCmdSetEvent(cmd_buffer, finish_event, + // VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + } + + // Flush memory. + // TODO(benvanik): subrange. + VkMappedMemoryRange dirty_range; + dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + dirty_range.pNext = nullptr; + dirty_range.memory = transient_buffer_memory_; + dirty_range.offset = 0; + dirty_range.size = transient_capacity_; + vkFlushMappedMemoryRanges(device_, 1, &dirty_range); +} + +void BufferCache::InvalidateCache() { + // TODO(benvanik): caching. +} + +void BufferCache::ClearCache() { + // TODO(benvanik): caching. +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h new file mode 100644 index 000000000..661e30aa7 --- /dev/null +++ b/src/xenia/gpu/vulkan/buffer_cache.h @@ -0,0 +1,111 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_BUFFER_CACHE_H_ +#define XENIA_GPU_VULKAN_BUFFER_CACHE_H_ + +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_device.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +// Efficiently manages buffers of various kinds. +// Used primarily for uploading index and vertex data from guest memory and +// transient data like shader constants. +class BufferCache { + public: + BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device, + size_t capacity); + ~BufferCache(); + + // Descriptor set containing the dynamic uniform buffer used for constant + // uploads. Used in conjunction with a dynamic offset returned by + // UploadConstantRegisters. + // The set contains two bindings: + // binding = 0: for use in vertex shaders + // binding = 1: for use in fragment shaders + VkDescriptorSet constant_descriptor_set() const { + return transient_descriptor_set_; + } + + // Uploads the constants specified in the register maps to the transient + // uniform storage buffer. + // The registers are tightly packed in order as [floats, ints, bools]. + // Returns an offset that can be used with the transient_descriptor_set or + // VK_WHOLE_SIZE if the constants could not be uploaded (OOM). + VkDeviceSize UploadConstantRegisters( + const Shader::ConstantRegisterMap& constant_register_map); + + // Uploads index buffer data from guest memory, possibly eliding with + // recently uploaded data or cached copies. + // Returns a buffer and offset that can be used with vkCmdBindIndexBuffer. + // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). + std::pair UploadIndexBuffer(const void* source_ptr, + size_t source_length, + IndexFormat format); + + // Uploads vertex buffer data from guest memory, possibly eliding with + // recently uploaded data or cached copies. + // Returns a buffer and offset that can be used with vkCmdBindVertexBuffers. + // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). + std::pair UploadVertexBuffer(const void* source_ptr, + size_t source_length); + + // Flushes all pending data to the GPU. + // Until this is called the GPU is not guaranteed to see any data. + // The given command buffer will be used to queue up events so that the + // cache can determine when data has been consumed. + void Flush(VkCommandBuffer command_buffer); + + // Marks the cache as potentially invalid. + // This is not as strong as ClearCache and is a hint that any and all data + // should be verified before being reused. + void InvalidateCache(); + + // Clears all cached content and prevents future elision with pending data. + void ClearCache(); + + private: + // Allocates a block of memory in the transient buffer. + // Returns VK_WHOLE_SIZE if requested amount of memory is not available. + VkDeviceSize AllocateTransientData(size_t alignment, size_t length); + + RegisterFile* register_file_ = nullptr; + VkDevice device_ = nullptr; + + // Staging ringbuffer we cycle through fast. Used for data we don't + // plan on keeping past the current frame. + size_t transient_capacity_ = 0; + VkBuffer transient_uniform_buffer_ = nullptr; + VkBuffer transient_index_buffer_ = nullptr; + VkBuffer transient_vertex_buffer_ = nullptr; + VkDeviceMemory transient_buffer_memory_ = nullptr; + void* transient_buffer_data_ = nullptr; + + // Required alignemnts for our various types. + // All allocations must start at the appropriate alignment. + VkDeviceSize uniform_buffer_alignment_ = 0; + VkDeviceSize index_buffer_alignment_ = 0; + VkDeviceSize vertex_buffer_alignment_ = 0; + + VkDescriptorPool descriptor_pool_ = nullptr; + VkDescriptorSetLayout descriptor_set_layout_ = nullptr; + VkDescriptorSet transient_descriptor_set_ = nullptr; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_BUFFER_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc new file mode 100644 index 000000000..e09931833 --- /dev/null +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -0,0 +1,335 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/pipeline_cache.h" + +#include "third_party/xxhash/xxhash.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/memory.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +using xe::ui::vulkan::CheckResult; + +PipelineCache::PipelineCache(RegisterFile* register_file, + ui::vulkan::VulkanDevice* device) + : register_file_(register_file), device_(*device) {} + +PipelineCache::~PipelineCache() { + // Destroy all shaders. + for (auto it : shader_map_) { + delete it.second; + } + shader_map_.clear(); +} + +VulkanShader* PipelineCache::LoadShader(ShaderType shader_type, + uint32_t guest_address, + const uint32_t* host_address, + uint32_t dword_count) { + // Hash the input memory and lookup the shader. + uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); + auto it = shader_map_.find(data_hash); + if (it != shader_map_.end()) { + // Shader has been previously loaded. + return it->second; + } + + // Always create the shader and stash it away. + // We need to track it even if it fails translation so we know not to try + // again. + VulkanShader* shader = + new VulkanShader(shader_type, data_hash, host_address, dword_count); + shader_map_.insert({data_hash, shader}); + + // Perform translation. + // If this fails the shader will be marked as invalid and ignored later. + if (!shader_translator_.Translate(shader)) { + XELOGE("Shader translation failed; marking shader as ignored"); + return shader; + } + + // Prepare the shader for use (creates our VkShaderModule). + // It could still fail at this point. + if (!shader->Prepare()) { + XELOGE("Shader preparation failed; marking shader as ignored"); + return shader; + } + + if (shader->is_valid()) { + XELOGGPU("Generated %s shader at 0x%.8X (%db):\n%s", + shader_type == ShaderType::kVertex ? "vertex" : "pixel", + guest_address, dword_count * 4, + shader->ucode_disassembly().c_str()); + } + + // Dump shader files if desired. + if (!FLAGS_dump_shaders.empty()) { + shader->Dump(FLAGS_dump_shaders, "vk"); + } + + return shader; +} + +bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, + VkRenderPass render_pass, + PrimitiveType primitive_type) { + return false; +} + +void PipelineCache::ClearCache() { + // TODO(benvanik): caching. +} + +bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { + uint32_t value = register_file_->values[register_name].u32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + +bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) { + float value = register_file_->values[register_name].f32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateShaders( + PrimitiveType prim_type) { + auto& regs = update_shaders_regs_; + + // These are the constant base addresses/ranges for shaders. + // We have these hardcoded right now cause nothing seems to differ. + assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == + 0x000FF000 || + register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); + assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == + 0x000FF100 || + register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + + bool dirty = false; + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); + dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); + // dirty |= regs.vertex_shader != active_vertex_shader_; + // dirty |= regs.pixel_shader != active_pixel_shader_; + dirty |= regs.prim_type != prim_type; + if (!dirty) { + return UpdateStatus::kCompatible; + } + // regs.vertex_shader = static_cast(active_vertex_shader_); + // regs.pixel_shader = static_cast(active_pixel_shader_); + regs.prim_type = prim_type; + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargets() { + auto& regs = update_render_targets_regs_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); + dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); + dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); + dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); + dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); + dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); + dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); + dirty |= + SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); + dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateState( + PrimitiveType prim_type) { + bool mismatch = false; + +#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ + { \ + if (status == UpdateStatus::kError) { \ + XELOGE(error_message); \ + return status; \ + } else if (status == UpdateStatus::kMismatch) { \ + mismatch = true; \ + } \ + } + + UpdateStatus status; + status = UpdateViewportState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state"); + status = UpdateRasterizerState(prim_type); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state"); + status = UpdateBlendState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state"); + status = UpdateDepthStencilState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); + + return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateViewportState() { + auto& regs = update_viewport_state_regs_; + + bool dirty = false; + // dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl, + // XE_GPU_REG_PA_CL_CLIP_CNTL); + dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + dirty |= SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL); + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.pa_sc_window_offset, + XE_GPU_REG_PA_SC_WINDOW_OFFSET); + dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); + dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); + dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset, + XE_GPU_REG_PA_CL_VPORT_XOFFSET); + dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset, + XE_GPU_REG_PA_CL_VPORT_YOFFSET); + dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset, + XE_GPU_REG_PA_CL_VPORT_ZOFFSET); + dirty |= SetShadowRegister(®s.pa_cl_vport_xscale, + XE_GPU_REG_PA_CL_VPORT_XSCALE); + dirty |= SetShadowRegister(®s.pa_cl_vport_yscale, + XE_GPU_REG_PA_CL_VPORT_YSCALE); + dirty |= SetShadowRegister(®s.pa_cl_vport_zscale, + XE_GPU_REG_PA_CL_VPORT_ZSCALE); + + // Much of this state machine is extracted from: + // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c + // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0. + // = false: multiply the X, Y coordinates by 1/W0. + // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. + // = false: multiply the Z coordinate by 1/W0. + // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to + // get 1/W0. + // draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f, + // (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f, + // (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f); + + // Done in VS, no need to flush state. + // if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) { + // draw_batcher_.set_window_scalar(1.0f, 1.0f); + //} else { + // draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f); + //} + + if (!dirty) { + return UpdateStatus::kCompatible; + } + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState( + PrimitiveType prim_type) { + auto& regs = update_rasterizer_state_regs_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_tl, + XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL); + dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_br, + XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); + dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, + XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); + dirty |= regs.prim_type != prim_type; + if (!dirty) { + return UpdateStatus::kCompatible; + } + + regs.prim_type = prim_type; + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateBlendState() { + auto& reg_file = *register_file_; + auto& regs = update_blend_state_regs_; + + // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE + // Deprecated in GL, implemented in shader. + // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; + // uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32; + // draw_batcher_.set_alpha_test((color_control & 0x4) != 0, // + // ALPAHTESTENABLE + // color_control & 0x7, // ALPHAFUNC + // reg_file[XE_GPU_REG_RB_ALPHA_REF].f32); + + bool dirty = false; + dirty |= + SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); + dirty |= + SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1); + dirty |= + SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); + dirty |= + SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); + dirty |= SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED); + dirty |= SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN); + dirty |= SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE); + dirty |= SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { + auto& regs = update_depth_stencil_state_regs_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); + dirty |= + SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h new file mode 100644 index 000000000..56727e67a --- /dev/null +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -0,0 +1,167 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_PIPELINE_CACHE_H_ +#define XENIA_GPU_VULKAN_PIPELINE_CACHE_H_ + +#include + +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/spirv/spirv_disassembler.h" +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_device.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +// Configures and caches pipelines based on render state. +// This is responsible for properly setting all state required for a draw +// including shaders, various blend/etc options, and input configuration. +class PipelineCache { + public: + PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); + ~PipelineCache(); + + // Loads a shader from the cache, possibly translating it. + VulkanShader* LoadShader(ShaderType shader_type, uint32_t guest_address, + const uint32_t* host_address, uint32_t dword_count); + + // Configures a pipeline using the current render state and the given render + // pass. If a previously available pipeline is available it will be used, + // otherwise a new one may be created. Any state that can be set dynamically + // in the command buffer is issued at this time. + // Returns whether the pipeline could be successfully created. + bool ConfigurePipeline(VkCommandBuffer command_buffer, + VkRenderPass render_pass, + PrimitiveType primitive_type); + + // Currently configured pipeline layout, if any. + VkPipelineLayout current_pipeline_layout() const { return nullptr; } + // Currently configured vertex shader, if any. + VulkanShader* current_vertex_shader() const { return nullptr; } + // Currently configured pixel shader, if any. + VulkanShader* current_pixel_shader() const { return nullptr; } + + // Clears all cached content. + void ClearCache(); + + private: + // TODO(benvanik): geometry shader cache. + // TODO(benvanik): translated shader cache. + // TODO(benvanik): pipeline layouts. + // TODO(benvanik): pipeline cache. + + RegisterFile* register_file_ = nullptr; + VkDevice device_ = nullptr; + + SpirvShaderTranslator shader_translator_; + xe::ui::spirv::SpirvDisassembler disassembler_; + // All loaded shaders mapped by their guest hash key. + std::unordered_map shader_map_; + + private: + enum class UpdateStatus { + kCompatible, + kMismatch, + kError, + }; + + UpdateStatus UpdateShaders(PrimitiveType prim_type); + UpdateStatus UpdateRenderTargets(); + UpdateStatus UpdateState(PrimitiveType prim_type); + UpdateStatus UpdateViewportState(); + UpdateStatus UpdateRasterizerState(PrimitiveType prim_type); + UpdateStatus UpdateBlendState(); + UpdateStatus UpdateDepthStencilState(); + + bool SetShadowRegister(uint32_t* dest, uint32_t register_name); + bool SetShadowRegister(float* dest, uint32_t register_name); + + struct UpdateRenderTargetsRegisters { + uint32_t rb_modecontrol; + uint32_t rb_surface_info; + uint32_t rb_color_info; + uint32_t rb_color1_info; + uint32_t rb_color2_info; + uint32_t rb_color3_info; + uint32_t rb_color_mask; + uint32_t rb_depthcontrol; + uint32_t rb_stencilrefmask; + uint32_t rb_depth_info; + + UpdateRenderTargetsRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_render_targets_regs_; + struct UpdateViewportStateRegisters { + // uint32_t pa_cl_clip_cntl; + uint32_t rb_surface_info; + uint32_t pa_cl_vte_cntl; + uint32_t pa_su_sc_mode_cntl; + uint32_t pa_sc_window_offset; + uint32_t pa_sc_window_scissor_tl; + uint32_t pa_sc_window_scissor_br; + float pa_cl_vport_xoffset; + float pa_cl_vport_yoffset; + float pa_cl_vport_zoffset; + float pa_cl_vport_xscale; + float pa_cl_vport_yscale; + float pa_cl_vport_zscale; + + UpdateViewportStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_viewport_state_regs_; + struct UpdateRasterizerStateRegisters { + uint32_t pa_su_sc_mode_cntl; + uint32_t pa_sc_screen_scissor_tl; + uint32_t pa_sc_screen_scissor_br; + uint32_t multi_prim_ib_reset_index; + PrimitiveType prim_type; + + UpdateRasterizerStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_rasterizer_state_regs_; + struct UpdateBlendStateRegisters { + uint32_t rb_blendcontrol[4]; + float rb_blend_rgba[4]; + + UpdateBlendStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_blend_state_regs_; + struct UpdateDepthStencilStateRegisters { + uint32_t rb_depthcontrol; + uint32_t rb_stencilrefmask; + + UpdateDepthStencilStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_depth_stencil_state_regs_; + struct UpdateShadersRegisters { + PrimitiveType prim_type; + uint32_t pa_su_sc_mode_cntl; + uint32_t sq_program_cntl; + uint32_t sq_context_misc; + VulkanShader* vertex_shader; + VulkanShader* pixel_shader; + + UpdateShadersRegisters() { Reset(); } + void Reset() { + sq_program_cntl = 0; + vertex_shader = pixel_shader = nullptr; + } + } update_shaders_regs_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_PIPELINE_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc new file mode 100644 index 000000000..fef05f11f --- /dev/null +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -0,0 +1,45 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/render_cache.h" + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/memory.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +using xe::ui::vulkan::CheckResult; + +RenderCache::RenderCache(RegisterFile* register_file, + ui::vulkan::VulkanDevice* device) + : register_file_(register_file), device_(*device) {} + +RenderCache::~RenderCache() = default; + +VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer) { + return nullptr; +} + +void RenderCache::EndRenderPass() { + // +} + +void RenderCache::ClearCache() { + // TODO(benvanik): caching. +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h new file mode 100644 index 000000000..fb7c84e6a --- /dev/null +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -0,0 +1,46 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_RENDER_CACHE_H_ +#define XENIA_GPU_VULKAN_RENDER_CACHE_H_ + +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_device.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +// Configures and caches pipelines based on render state. +// This is responsible for properly setting all state required for a draw +// including shaders, various blend/etc options, and input configuration. +class RenderCache { + public: + RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); + ~RenderCache(); + + VkRenderPass BeginRenderPass(VkCommandBuffer command_buffer); + void EndRenderPass(); + + // Clears all cached content. + void ClearCache(); + + private: + RegisterFile* register_file_ = nullptr; + VkDevice device_ = nullptr; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_RENDER_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc new file mode 100644 index 000000000..bf95ef6a4 --- /dev/null +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -0,0 +1,37 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/texture_cache.h" + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/memory.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +using xe::ui::vulkan::CheckResult; + +TextureCache::TextureCache(RegisterFile* register_file, + ui::vulkan::VulkanDevice* device) + : register_file_(register_file), device_(*device) {} + +TextureCache::~TextureCache() = default; + +void TextureCache::ClearCache() { + // TODO(benvanik): caching. +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h new file mode 100644 index 000000000..3545fb72d --- /dev/null +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -0,0 +1,47 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ +#define XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ + +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_device.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +// Configures and caches pipelines based on render state. +// This is responsible for properly setting all state required for a draw +// including shaders, various blend/etc options, and input configuration. +class TextureCache { + public: + TextureCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); + ~TextureCache(); + + // TODO(benvanik): UploadTexture. + // TODO(benvanik): Resolve. + // TODO(benvanik): ReadTexture. + + // Clears all cached content. + void ClearCache(); + + private: + RegisterFile* register_file_ = nullptr; + VkDevice device_ = nullptr; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 3320d2927..6490de44a 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -20,12 +20,16 @@ #include "xenia/gpu/vulkan/vulkan_gpu_flags.h" #include "xenia/gpu/vulkan/vulkan_graphics_system.h" #include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan_util.h" namespace xe { namespace gpu { namespace vulkan { using namespace xe::gpu::xenos; +using xe::ui::vulkan::CheckResult; + +constexpr size_t kDefaultBufferCacheCapacity = 256 * 1024 * 1024; VulkanCommandProcessor::VulkanCommandProcessor( VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state) @@ -33,7 +37,14 @@ VulkanCommandProcessor::VulkanCommandProcessor( VulkanCommandProcessor::~VulkanCommandProcessor() = default; -void VulkanCommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); } +void VulkanCommandProcessor::ClearCaches() { + CommandProcessor::ClearCaches(); + + buffer_cache_->ClearCache(); + pipeline_cache_->ClearCache(); + render_cache_->ClearCache(); + texture_cache_->ClearCache(); +} bool VulkanCommandProcessor::SetupContext() { if (!CommandProcessor::SetupContext()) { @@ -41,10 +52,47 @@ bool VulkanCommandProcessor::SetupContext() { return false; } + // Acquire our device and queue. + auto context = static_cast(context_.get()); + device_ = context->device(); + queue_ = device_->AcquireQueue(); + if (!queue_) { + // Need to reuse primary queue (with locks). + queue_ = device_->primary_queue(); + queue_mutex_ = &device_->primary_queue_mutex(); + } + + // Setup fenced pools used for all our per-frame/per-draw resources. + command_buffer_pool_ = std::make_unique( + *device_, device_->queue_family_index(), VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + // Initialize the state machine caches. + buffer_cache_ = std::make_unique(register_file_, device_, + kDefaultBufferCacheCapacity); + pipeline_cache_ = std::make_unique(register_file_, device_); + render_cache_ = std::make_unique(register_file_, device_); + texture_cache_ = std::make_unique(register_file_, device_); + return true; } void VulkanCommandProcessor::ShutdownContext() { + // TODO(benvanik): wait until idle. + + buffer_cache_.reset(); + pipeline_cache_.reset(); + render_cache_.reset(); + texture_cache_.reset(); + + // Free all pools. This must come after all of our caches clean up. + command_buffer_pool_.reset(); + + // Release queue, if were using an acquired one. + if (!queue_mutex_) { + device_->ReleaseQueue(queue_); + queue_ = nullptr; + } + CommandProcessor::ShutdownContext(); } @@ -55,7 +103,8 @@ void VulkanCommandProcessor::MakeCoherent() { CommandProcessor::MakeCoherent(); if (status_host & 0x80000000ul) { - // scratch_buffer_.ClearCache(); + // TODO(benvanik): less-fine-grained clearing. + buffer_cache_->InvalidateCache(); } } @@ -103,346 +152,167 @@ Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) { - // return shader_cache_.LookupOrInsertShader(shader_type, host_address, - // dword_count); - return nullptr; + return pipeline_cache_->LoadShader(shader_type, guest_address, host_address, + dword_count); } -bool VulkanCommandProcessor::IssueDraw(PrimitiveType prim_type, +bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) { + auto& regs = *register_file_; + #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - // Skip all drawing for now - what did you expect? :) - return true; - - bool draw_valid = false; - // if (index_buffer_info) { - // draw_valid = draw_batcher_.BeginDrawElements(prim_type, index_count, - // index_buffer_info->format); - //} else { - // draw_valid = draw_batcher_.BeginDrawArrays(prim_type, index_count); - //} - if (!draw_valid) { - return false; - } - - auto& regs = *register_file_; - auto enable_mode = static_cast(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); if (enable_mode == ModeControl::kIgnore) { // Ignored. - // draw_batcher_.DiscardDraw(); return true; } else if (enable_mode == ModeControl::kCopy) { // Special copy handling. - // draw_batcher_.DiscardDraw(); return IssueCopy(); } -#define CHECK_ISSUE_UPDATE_STATUS(status, mismatch, error_message) \ - { \ - if (status == UpdateStatus::kError) { \ - XELOGE(error_message); \ - /*draw_batcher_.DiscardDraw(); */ \ - return false; \ - } else if (status == UpdateStatus::kMismatch) { \ - mismatch = true; \ - } \ - } + // TODO(benvanik): bigger batches. + command_buffer_pool_->BeginBatch(); + VkCommandBuffer command_buffer = command_buffer_pool_->AcquireEntry(); + VkCommandBufferBeginInfo command_buffer_begin_info; + command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + command_buffer_begin_info.pNext = nullptr; + command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + command_buffer_begin_info.pInheritanceInfo = nullptr; + auto err = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info); + CheckResult(err, "vkBeginCommandBuffer"); - UpdateStatus status; - bool mismatch = false; - status = UpdateShaders(prim_type); - CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to prepare draw shaders"); - status = UpdateRenderTargets(); - CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render targets"); - // if (!active_framebuffer_) { - // // No framebuffer, so nothing we do will actually have an effect. - // // Treat it as a no-op. - // // TODO(benvanik): if we have a vs export, still allow it to go. - // draw_batcher_.DiscardDraw(); - // return true; - //} - - status = UpdateState(prim_type); - CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render state"); - status = PopulateSamplers(); - CHECK_ISSUE_UPDATE_STATUS(status, mismatch, - "Unable to prepare draw samplers"); - - status = PopulateIndexBuffer(index_buffer_info); - CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup index buffer"); - status = PopulateVertexBuffers(); - CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup vertex buffers"); - - // if (!draw_batcher_.CommitDraw()) { - // return false; - //} - - // draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent); - if (context_->WasLost()) { - // This draw lost us the context. This typically isn't hit. - assert_always(); + // Begin the render pass. + // This will setup our framebuffer and begin the pass in the command buffer. + VkRenderPass render_pass = render_cache_->BeginRenderPass(command_buffer); + if (!render_pass) { return false; } + // Configure the pipeline for drawing. + // This encodes all render state (blend, depth, etc), our shader stages, + // and our vertex input layout. + if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_pass, + primitive_type)) { + render_cache_->EndRenderPass(); + return false; + } + + // Upload the constants the shaders require. + auto vertex_shader = pipeline_cache_->current_vertex_shader(); + auto pixel_shader = pipeline_cache_->current_pixel_shader(); + auto vertex_constant_offset = buffer_cache_->UploadConstantRegisters( + vertex_shader->constant_register_map()); + auto pixel_constant_offset = buffer_cache_->UploadConstantRegisters( + pixel_shader->constant_register_map()); + if (vertex_constant_offset == VK_WHOLE_SIZE || + pixel_constant_offset == VK_WHOLE_SIZE) { + render_cache_->EndRenderPass(); + return false; + } + + // Configure constant uniform access to point at our offsets. + auto constant_descriptor_set = buffer_cache_->constant_descriptor_set(); + auto pipeline_layout = pipeline_cache_->current_pipeline_layout(); + uint32_t constant_offsets[2] = {static_cast(vertex_constant_offset), + static_cast(pixel_constant_offset)}; + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_layout, 0, 1, &constant_descriptor_set, + static_cast(xe::countof(constant_offsets)), + constant_offsets); + + // Upload and bind index buffer data (if we have any). + if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) { + render_cache_->EndRenderPass(); + return false; + } + + // Upload and bind all vertex buffer data. + if (!PopulateVertexBuffers(command_buffer, vertex_shader)) { + render_cache_->EndRenderPass(); + return false; + } + + // Upload and set descriptors for all textures. + if (!PopulateSamplers(command_buffer, vertex_shader, pixel_shader)) { + render_cache_->EndRenderPass(); + return false; + } + +#if 0 + // Actually issue the draw. + if (!index_buffer_info) { + // Auto-indexed draw. + uint32_t instance_count = 1; + uint32_t first_vertex = 0; + uint32_t first_instance = 0; + vkCmdDraw(command_buffer, index_count, instance_count, first_vertex, + first_instance); + } else { + // Index buffer draw. + uint32_t instance_count = 1; + uint32_t first_index = + register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32; + uint32_t vertex_offset = 0; + uint32_t first_instance = 0; + vkCmdDrawIndexed(command_buffer, index_count, instance_count, first_index, + vertex_offset, first_instance); + } +#endif + + // End the rendering pass. + render_cache_->EndRenderPass(); + + // TODO(benvanik): bigger batches. + err = vkEndCommandBuffer(command_buffer); + CheckResult(err, "vkEndCommandBuffer"); + VkFence fence; + VkFenceCreateInfo fence_info; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_info.pNext = nullptr; + fence_info.flags = 0; + vkCreateFence(*device_, &fence_info, nullptr, &fence); + command_buffer_pool_->EndBatch(fence); + VkSubmitInfo submit_info; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = nullptr; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = nullptr; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &command_buffer; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = nullptr; + if (queue_mutex_) { + queue_mutex_->lock(); + } + err = vkQueueSubmit(queue_, 1, &submit_info, fence); + if (queue_mutex_) { + queue_mutex_->unlock(); + } + CheckResult(err, "vkQueueSubmit"); + if (queue_mutex_) { + queue_mutex_->lock(); + } + vkQueueWaitIdle(queue_); + if (queue_mutex_) { + queue_mutex_->unlock(); + } + command_buffer_pool_->Scavenge(); + vkDestroyFence(*device_, fence, nullptr); + return true; } -bool VulkanCommandProcessor::SetShadowRegister(uint32_t* dest, - uint32_t register_name) { - uint32_t value = register_file_->values[register_name].u32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} - -bool VulkanCommandProcessor::SetShadowRegister(float* dest, - uint32_t register_name) { - float value = register_file_->values[register_name].f32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} - -VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateShaders( - PrimitiveType prim_type) { - auto& regs = update_shaders_regs_; - - // These are the constant base addresses/ranges for shaders. - // We have these hardcoded right now cause nothing seems to differ. - assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == - 0x000FF000 || - register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); - assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == - 0x000FF100 || - register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); - - bool dirty = false; - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); - dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); - dirty |= regs.vertex_shader != active_vertex_shader_; - dirty |= regs.pixel_shader != active_pixel_shader_; - dirty |= regs.prim_type != prim_type; - if (!dirty) { - return UpdateStatus::kCompatible; - } - regs.vertex_shader = static_cast(active_vertex_shader_); - regs.pixel_shader = static_cast(active_pixel_shader_); - regs.prim_type = prim_type; - - SCOPE_profile_cpu_f("gpu"); - - return UpdateStatus::kMismatch; -} - -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::UpdateRenderTargets() { - auto& regs = update_render_targets_regs_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); - dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); - dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); - dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); - dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); - dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); - dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); - dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - SCOPE_profile_cpu_f("gpu"); - - return UpdateStatus::kMismatch; -} - -VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateState( - PrimitiveType prim_type) { - bool mismatch = false; - -#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ - { \ - if (status == UpdateStatus::kError) { \ - XELOGE(error_message); \ - return status; \ - } else if (status == UpdateStatus::kMismatch) { \ - mismatch = true; \ - } \ - } - - UpdateStatus status; - status = UpdateViewportState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state"); - status = UpdateRasterizerState(prim_type); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state"); - status = UpdateBlendState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state"); - status = UpdateDepthStencilState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); - - return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; -} - -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::UpdateViewportState() { - auto& regs = update_viewport_state_regs_; - - bool dirty = false; - // dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl, - // XE_GPU_REG_PA_CL_CLIP_CNTL); - dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - dirty |= SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL); - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.pa_sc_window_offset, - XE_GPU_REG_PA_SC_WINDOW_OFFSET); - dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); - dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); - dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset, - XE_GPU_REG_PA_CL_VPORT_XOFFSET); - dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset, - XE_GPU_REG_PA_CL_VPORT_YOFFSET); - dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset, - XE_GPU_REG_PA_CL_VPORT_ZOFFSET); - dirty |= SetShadowRegister(®s.pa_cl_vport_xscale, - XE_GPU_REG_PA_CL_VPORT_XSCALE); - dirty |= SetShadowRegister(®s.pa_cl_vport_yscale, - XE_GPU_REG_PA_CL_VPORT_YSCALE); - dirty |= SetShadowRegister(®s.pa_cl_vport_zscale, - XE_GPU_REG_PA_CL_VPORT_ZSCALE); - - // Much of this state machine is extracted from: - // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c - // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html - // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - - // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0. - // = false: multiply the X, Y coordinates by 1/W0. - // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. - // = false: multiply the Z coordinate by 1/W0. - // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to - // get 1/W0. - // draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f, - // (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f, - // (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f); - - // Done in VS, no need to flush state. - // if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) { - // draw_batcher_.set_window_scalar(1.0f, 1.0f); - //} else { - // draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f); - //} - - if (!dirty) { - return UpdateStatus::kCompatible; - } - - return UpdateStatus::kMismatch; -} - -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::UpdateRasterizerState(PrimitiveType prim_type) { - auto& regs = update_rasterizer_state_regs_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_tl, - XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL); - dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_br, - XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); - dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, - XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); - dirty |= regs.prim_type != prim_type; - if (!dirty) { - return UpdateStatus::kCompatible; - } - - regs.prim_type = prim_type; - - SCOPE_profile_cpu_f("gpu"); - - return UpdateStatus::kMismatch; -} - -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::UpdateBlendState() { - auto& reg_file = *register_file_; - auto& regs = update_blend_state_regs_; - - // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE - // Deprecated in GL, implemented in shader. - // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; - // uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32; - // draw_batcher_.set_alpha_test((color_control & 0x4) != 0, // - // ALPAHTESTENABLE - // color_control & 0x7, // ALPHAFUNC - // reg_file[XE_GPU_REG_RB_ALPHA_REF].f32); - - bool dirty = false; - dirty |= - SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); - dirty |= SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED); - dirty |= SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN); - dirty |= SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE); - dirty |= SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - SCOPE_profile_cpu_f("gpu"); - - return UpdateStatus::kMismatch; -} - -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::UpdateDepthStencilState() { - auto& regs = update_depth_stencil_state_regs_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); - dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - SCOPE_profile_cpu_f("gpu"); - - return UpdateStatus::kMismatch; -} - -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::PopulateIndexBuffer( - IndexBufferInfo* index_buffer_info) { +bool VulkanCommandProcessor::PopulateIndexBuffer( + VkCommandBuffer command_buffer, IndexBufferInfo* index_buffer_info) { auto& regs = *register_file_; if (!index_buffer_info || !index_buffer_info->guest_base) { // No index buffer or auto draw. - return UpdateStatus::kCompatible; + return true; } auto& info = *index_buffer_info; @@ -462,19 +332,44 @@ VulkanCommandProcessor::PopulateIndexBuffer( trace_writer_.WriteMemoryRead(info.guest_base, info.length); - return UpdateStatus::kCompatible; + // Upload (or get a cached copy of) the buffer. + const void* source_ptr = + memory_->TranslatePhysical(info.guest_base); + size_t source_length = + info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t) + : sizeof(uint16_t)); + auto buffer_ref = + buffer_cache_->UploadIndexBuffer(source_ptr, source_length, info.format); + if (buffer_ref.second == VK_WHOLE_SIZE) { + // Failed to upload buffer. + return false; + } + + // Bind the buffer. + VkIndexType index_type = info.format == IndexFormat::kInt32 + ? VK_INDEX_TYPE_UINT32 + : VK_INDEX_TYPE_UINT16; + vkCmdBindIndexBuffer(command_buffer, buffer_ref.first, buffer_ref.second, + index_type); + + return true; } -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::PopulateVertexBuffers() { +bool VulkanCommandProcessor::PopulateVertexBuffers( + VkCommandBuffer command_buffer, VulkanShader* vertex_shader) { + auto& regs = *register_file_; + #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - auto& regs = *register_file_; - assert_not_null(active_vertex_shader_); + auto& vertex_bindings = vertex_shader->vertex_bindings(); + assert_true(vertex_bindings.size() <= 32); + VkBuffer all_buffers[32]; + VkDeviceSize all_buffer_offsets[32]; + uint32_t buffer_index = 0; - for (const auto& vertex_binding : active_vertex_shader_->vertex_bindings()) { + for (const auto& vertex_binding : vertex_bindings) { int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (vertex_binding.fetch_constant / 3) * 6; const auto group = reinterpret_cast(®s.values[r]); @@ -492,58 +387,72 @@ VulkanCommandProcessor::PopulateVertexBuffers() { } assert_true(fetch->endian == 2); + // TODO(benvanik): compute based on indices or vertex count. + // THIS CAN BE MASSIVELY INCORRECT (too large). size_t valid_range = size_t(fetch->size * 4); trace_writer_.WriteMemoryRead(fetch->address << 2, valid_range); + + // Upload (or get a cached copy of) the buffer. + const void* source_ptr = + memory_->TranslatePhysical(fetch->address << 2); + size_t source_length = valid_range; + auto buffer_ref = + buffer_cache_->UploadVertexBuffer(source_ptr, source_length); + if (buffer_ref.second == VK_WHOLE_SIZE) { + // Failed to upload buffer. + return false; + } + + // Stash the buffer reference for our bulk bind at the end. + all_buffers[buffer_index] = buffer_ref.first; + all_buffer_offsets[buffer_index] = buffer_ref.second; + ++buffer_index; } - return UpdateStatus::kCompatible; + // Bind buffers. + vkCmdBindVertexBuffers(command_buffer, 0, buffer_index, all_buffers, + all_buffer_offsets); + + return true; } -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::PopulateSamplers() { +bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader) { #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - bool mismatch = false; + bool any_failed = false; // VS and PS samplers are shared, but may be used exclusively. // We walk each and setup lazily. bool has_setup_sampler[32] = {false}; // Vertex texture samplers. - for (auto& texture_binding : active_vertex_shader_->texture_bindings()) { + for (auto& texture_binding : vertex_shader->texture_bindings()) { if (has_setup_sampler[texture_binding.fetch_constant]) { continue; } has_setup_sampler[texture_binding.fetch_constant] = true; - auto status = PopulateSampler(texture_binding); - if (status == UpdateStatus::kError) { - return status; - } else if (status == UpdateStatus::kMismatch) { - mismatch = true; - } + any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed; } // Pixel shader texture sampler. - for (auto& texture_binding : active_pixel_shader_->texture_bindings()) { + for (auto& texture_binding : pixel_shader->texture_bindings()) { if (has_setup_sampler[texture_binding.fetch_constant]) { continue; } has_setup_sampler[texture_binding.fetch_constant] = true; - auto status = PopulateSampler(texture_binding); - if (status == UpdateStatus::kError) { - return UpdateStatus::kError; - } else if (status == UpdateStatus::kMismatch) { - mismatch = true; - } + any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed; } - return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; + return !any_failed; } -VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler( +bool VulkanCommandProcessor::PopulateSampler( + VkCommandBuffer command_buffer, const Shader::TextureBinding& texture_binding) { auto& regs = *register_file_; int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + @@ -553,30 +462,34 @@ VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler( // ? if (!fetch.type) { - return UpdateStatus::kCompatible; + return true; } assert_true(fetch.type == 0x2); TextureInfo texture_info; if (!TextureInfo::Prepare(fetch, &texture_info)) { XELOGE("Unable to parse texture fetcher info"); - return UpdateStatus::kCompatible; // invalid texture used + return true; // invalid texture used } SamplerInfo sampler_info; if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr, &sampler_info)) { XELOGE("Unable to parse sampler info"); - return UpdateStatus::kCompatible; // invalid texture used + return true; // invalid texture used } trace_writer_.WriteMemoryRead(texture_info.guest_address, texture_info.input_length); - return UpdateStatus::kCompatible; + // TODO(benvanik): texture cache lookup. + // TODO(benvanik): bind or return so PopulateSamplers can batch. + + return true; } bool VulkanCommandProcessor::IssueCopy() { SCOPE_profile_cpu_f("gpu"); + // TODO(benvanik): resolve. return true; } diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index babbc9ab3..c350f77b0 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -23,12 +23,17 @@ #include "xenia/base/threading.h" #include "xenia/gpu/command_processor.h" #include "xenia/gpu/register_file.h" -#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/buffer_cache.h" +#include "xenia/gpu/vulkan/pipeline_cache.h" +#include "xenia/gpu/vulkan/render_cache.h" +#include "xenia/gpu/vulkan/texture_cache.h" #include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/xthread.h" #include "xenia/memory.h" +#include "xenia/ui/vulkan/fenced_pools.h" #include "xenia/ui/vulkan/vulkan_context.h" +#include "xenia/ui/vulkan/vulkan_device.h" namespace xe { namespace gpu { @@ -45,12 +50,6 @@ class VulkanCommandProcessor : public CommandProcessor { void ClearCaches() override; private: - enum class UpdateStatus { - kCompatible, - kMismatch, - kError, - }; - bool SetupContext() override; void ShutdownContext() override; @@ -65,97 +64,35 @@ class VulkanCommandProcessor : public CommandProcessor { const uint32_t* host_address, uint32_t dword_count) override; - bool IssueDraw(PrimitiveType prim_type, uint32_t index_count, + bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) override; - UpdateStatus UpdateShaders(PrimitiveType prim_type); - UpdateStatus UpdateRenderTargets(); - UpdateStatus UpdateState(PrimitiveType prim_type); - UpdateStatus UpdateViewportState(); - UpdateStatus UpdateRasterizerState(PrimitiveType prim_type); - UpdateStatus UpdateBlendState(); - UpdateStatus UpdateDepthStencilState(); - UpdateStatus PopulateIndexBuffer(IndexBufferInfo* index_buffer_info); - UpdateStatus PopulateVertexBuffers(); - UpdateStatus PopulateSamplers(); - UpdateStatus PopulateSampler(const Shader::TextureBinding& texture_binding); + bool PopulateIndexBuffer(VkCommandBuffer command_buffer, + IndexBufferInfo* index_buffer_info); + bool PopulateVertexBuffers(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader); + bool PopulateSamplers(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader); + bool PopulateSampler(VkCommandBuffer command_buffer, + const Shader::TextureBinding& texture_binding); bool IssueCopy() override; - SpirvShaderTranslator shader_translator_; + xe::ui::vulkan::VulkanDevice* device_ = nullptr; - private: - bool SetShadowRegister(uint32_t* dest, uint32_t register_name); - bool SetShadowRegister(float* dest, uint32_t register_name); - struct UpdateRenderTargetsRegisters { - uint32_t rb_modecontrol; - uint32_t rb_surface_info; - uint32_t rb_color_info; - uint32_t rb_color1_info; - uint32_t rb_color2_info; - uint32_t rb_color3_info; - uint32_t rb_color_mask; - uint32_t rb_depthcontrol; - uint32_t rb_stencilrefmask; - uint32_t rb_depth_info; + // TODO(benvanik): abstract behind context? + // Queue used to submit work. This may be a dedicated queue for the command + // processor and no locking will be required for use. If a dedicated queue + // was not available this will be the device primary_queue and the + // queue_mutex must be used to synchronize access to it. + VkQueue queue_ = nullptr; + std::mutex* queue_mutex_ = nullptr; - UpdateRenderTargetsRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_render_targets_regs_; - struct UpdateViewportStateRegisters { - // uint32_t pa_cl_clip_cntl; - uint32_t rb_surface_info; - uint32_t pa_cl_vte_cntl; - uint32_t pa_su_sc_mode_cntl; - uint32_t pa_sc_window_offset; - uint32_t pa_sc_window_scissor_tl; - uint32_t pa_sc_window_scissor_br; - float pa_cl_vport_xoffset; - float pa_cl_vport_yoffset; - float pa_cl_vport_zoffset; - float pa_cl_vport_xscale; - float pa_cl_vport_yscale; - float pa_cl_vport_zscale; + std::unique_ptr buffer_cache_; + std::unique_ptr pipeline_cache_; + std::unique_ptr render_cache_; + std::unique_ptr texture_cache_; - UpdateViewportStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_viewport_state_regs_; - struct UpdateRasterizerStateRegisters { - uint32_t pa_su_sc_mode_cntl; - uint32_t pa_sc_screen_scissor_tl; - uint32_t pa_sc_screen_scissor_br; - uint32_t multi_prim_ib_reset_index; - PrimitiveType prim_type; - - UpdateRasterizerStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_rasterizer_state_regs_; - struct UpdateBlendStateRegisters { - uint32_t rb_blendcontrol[4]; - float rb_blend_rgba[4]; - - UpdateBlendStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_blend_state_regs_; - struct UpdateDepthStencilStateRegisters { - uint32_t rb_depthcontrol; - uint32_t rb_stencilrefmask; - - UpdateDepthStencilStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_depth_stencil_state_regs_; - struct UpdateShadersRegisters { - PrimitiveType prim_type; - uint32_t pa_su_sc_mode_cntl; - uint32_t sq_program_cntl; - uint32_t sq_context_misc; - VulkanShader* vertex_shader; - VulkanShader* pixel_shader; - - UpdateShadersRegisters() { Reset(); } - void Reset() { - sq_program_cntl = 0; - vertex_shader = pixel_shader = nullptr; - } - } update_shaders_regs_; + std::unique_ptr command_buffer_pool_; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/fenced_pools.cc b/src/xenia/ui/vulkan/fenced_pools.cc new file mode 100644 index 000000000..0737b98c4 --- /dev/null +++ b/src/xenia/ui/vulkan/fenced_pools.cc @@ -0,0 +1,81 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/fenced_pools.h" + +#include "xenia/base/assert.h" +#include "xenia/base/math.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace ui { +namespace vulkan { + +using xe::ui::vulkan::CheckResult; + +CommandBufferPool::CommandBufferPool(VkDevice device, + uint32_t queue_family_index, + VkCommandBufferLevel level) + : BaseFencedPool(device), level_(level) { + // Create the pool used for allocating buffers. + // They are marked as transient (short-lived) and cycled frequently. + VkCommandPoolCreateInfo cmd_pool_info; + cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cmd_pool_info.pNext = nullptr; + cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + cmd_pool_info.queueFamilyIndex = queue_family_index; + auto err = + vkCreateCommandPool(device_, &cmd_pool_info, nullptr, &command_pool_); + CheckResult(err, "vkCreateCommandPool"); + + // Allocate a bunch of command buffers to start. + constexpr uint32_t kDefaultCount = 32; + VkCommandBufferAllocateInfo command_buffer_info; + command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + command_buffer_info.pNext = nullptr; + command_buffer_info.commandPool = command_pool_; + command_buffer_info.level = level; + command_buffer_info.commandBufferCount = kDefaultCount; + VkCommandBuffer command_buffers[kDefaultCount]; + err = + vkAllocateCommandBuffers(device_, &command_buffer_info, command_buffers); + CheckResult(err, "vkCreateCommandBuffer"); + for (size_t i = 0; i < xe::countof(command_buffers); ++i) { + PushEntry(command_buffers[i]); + } +} + +CommandBufferPool::~CommandBufferPool() { + vkDestroyCommandPool(device_, command_pool_, nullptr); + command_pool_ = nullptr; +} + +VkCommandBuffer CommandBufferPool::AllocateEntry() { + // TODO(benvanik): allocate a bunch at once? + VkCommandBufferAllocateInfo command_buffer_info; + command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + command_buffer_info.pNext = nullptr; + command_buffer_info.commandPool = command_pool_; + command_buffer_info.level = level_; + command_buffer_info.commandBufferCount = 1; + VkCommandBuffer command_buffer; + auto err = + vkAllocateCommandBuffers(device_, &command_buffer_info, &command_buffer); + CheckResult(err, "vkCreateCommandBuffer"); + return command_buffer; +} + +void CommandBufferPool::FreeEntry(VkCommandBuffer handle) { + vkFreeCommandBuffers(device_, command_pool_, 1, &handle); +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/fenced_pools.h b/src/xenia/ui/vulkan/fenced_pools.h new file mode 100644 index 000000000..52274a9d9 --- /dev/null +++ b/src/xenia/ui/vulkan/fenced_pools.h @@ -0,0 +1,200 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_FENCED_POOLS_H_ +#define XENIA_UI_VULKAN_FENCED_POOLS_H_ + +#include + +#include "xenia/base/assert.h" +#include "xenia/ui/vulkan/vulkan.h" + +namespace xe { +namespace ui { +namespace vulkan { + +// Simple pool for Vulkan homogenous objects that cannot be reused while +// in-flight. +// It batches pooled objects into groups and uses a vkQueueSubmit fence to +// indicate their availability. If no objects are free when one is requested +// the caller is expected to create them. +template +class BaseFencedPool { + public: + BaseFencedPool(VkDevice device) : device_(device) {} + + virtual ~BaseFencedPool() { + // TODO(benvanik): wait on fence until done. + assert_null(pending_batch_list_head_); + + // Run down free lists. + while (free_batch_list_head_) { + auto batch = free_batch_list_head_; + free_batch_list_head_ = batch->next; + delete batch; + } + while (free_entry_list_head_) { + auto entry = free_entry_list_head_; + free_entry_list_head_ = entry->next; + static_cast(this)->FreeEntry(entry->handle); + delete entry; + } + } + + // Checks all pending batches for completion and scavenges their entries. + // This should be called as frequently as reasonable. + void Scavenge() { + while (pending_batch_list_head_) { + auto batch = pending_batch_list_head_; + if (vkGetFenceStatus(device_, batch->fence) == VK_SUCCESS) { + // Batch has completed. Reclaim. + pending_batch_list_head_ = batch->next; + if (batch == pending_batch_list_tail_) { + pending_batch_list_tail_ = nullptr; + } + batch->next = free_batch_list_head_; + free_batch_list_head_ = batch; + batch->entry_list_tail->next = free_entry_list_head_; + free_entry_list_head_ = batch->entry_list_head; + batch->entry_list_head = nullptr; + batch->entry_list_tail = nullptr; + } else { + // Batch is still in-flight. Since batches are executed in order we know + // no others after it could have completed, so early-exit. + return; + } + } + } + + // Begins a new batch. + // All entries acquired within this batch will be marked as in-use until + // the fence specified in EndBatch is signalled. + void BeginBatch() { + assert_null(open_batch_); + Batch* batch = nullptr; + if (free_batch_list_head_) { + // Reuse a batch. + batch = free_batch_list_head_; + free_batch_list_head_ = batch->next; + batch->next = nullptr; + } else { + // Allocate new batch. + batch = new Batch(); + batch->next = nullptr; + } + batch->entry_list_head = nullptr; + batch->entry_list_tail = nullptr; + batch->fence = nullptr; + open_batch_ = batch; + } + + // Attempts to acquire an entry from the pool in the current batch. + // If none are available a new one will be allocated. + HANDLE AcquireEntry() { + Entry* entry = nullptr; + if (free_entry_list_head_) { + // Slice off an entry from the free list. + entry = free_entry_list_head_; + free_entry_list_head_ = entry->next; + } else { + // No entry available; allocate new. + entry = new Entry(); + entry->handle = static_cast(this)->AllocateEntry(); + } + entry->next = nullptr; + if (!open_batch_->entry_list_head) { + open_batch_->entry_list_head = entry; + } + if (open_batch_->entry_list_tail) { + open_batch_->entry_list_tail->next = entry; + } + open_batch_->entry_list_tail = entry; + return entry->handle; + } + + // Ends the current batch using the given fence to indicate when the batch + // has completed execution on the GPU. + void EndBatch(VkFence fence) { + assert_not_null(open_batch_); + + // Close and see if we have anything. + auto batch = open_batch_; + open_batch_ = nullptr; + if (!batch->entry_list_head) { + // Nothing to do. + batch->next = free_batch_list_head_; + free_batch_list_head_ = batch; + return; + } + + // Track the fence. + batch->fence = fence; + + // Append to the end of the batch list. + batch->next = nullptr; + if (!pending_batch_list_head_) { + pending_batch_list_head_ = batch; + } + if (pending_batch_list_tail_) { + pending_batch_list_tail_->next = batch; + } else { + pending_batch_list_tail_ = batch; + } + } + + protected: + void PushEntry(HANDLE handle) { + auto entry = new Entry(); + entry->next = free_entry_list_head_; + entry->handle = handle; + free_entry_list_head_ = entry; + } + + VkDevice device_ = nullptr; + + private: + struct Entry { + Entry* next; + HANDLE handle; + }; + struct Batch { + Batch* next; + Entry* entry_list_head; + Entry* entry_list_tail; + VkFence fence; + }; + + Batch* free_batch_list_head_ = nullptr; + Entry* free_entry_list_head_ = nullptr; + Batch* pending_batch_list_head_ = nullptr; + Batch* pending_batch_list_tail_ = nullptr; + Batch* open_batch_ = nullptr; +}; + +class CommandBufferPool + : public BaseFencedPool { + public: + CommandBufferPool(VkDevice device, uint32_t queue_family_index, + VkCommandBufferLevel level); + ~CommandBufferPool() override; + + protected: + friend class BaseFencedPool; + VkCommandBuffer AllocateEntry(); + void FreeEntry(VkCommandBuffer handle); + + VkCommandPool command_pool_ = nullptr; + VkCommandBufferLevel level_ = VK_COMMAND_BUFFER_LEVEL_PRIMARY; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_FENCED_POOLS_H_