diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 0f8ea1075..9936f182f 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2021 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -13,6 +13,7 @@ #include #include #include +#include #include "xenia/base/assert.h" #include "xenia/base/logging.h" @@ -44,7 +45,10 @@ VulkanCommandProcessor::VulkanCommandProcessor( VulkanCommandProcessor::~VulkanCommandProcessor() = default; void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, - uint32_t length) {} + uint32_t length) { + shared_memory_->MemoryInvalidationCallback(base_ptr, length, true); + primitive_processor_->MemoryInvalidationCallback(base_ptr, length, true); +} void VulkanCommandProcessor::RestoreEdramSnapshot(const void* snapshot) {} @@ -182,6 +186,13 @@ bool VulkanCommandProcessor::SetupContext() { return false; } + primitive_processor_ = std::make_unique( + *register_file_, *memory_, trace_writer_, *shared_memory_, *this); + if (!primitive_processor_->Initialize()) { + XELOGE("Failed to initialize the geometric primitive processor"); + return false; + } + render_target_cache_ = std::make_unique(*this, *register_file_); if (!render_target_cache_->Initialize()) { @@ -285,6 +296,8 @@ void VulkanCommandProcessor::ShutdownContext() { render_target_cache_.reset(); + primitive_processor_.reset(); + shared_memory_.reset(); for (const auto& pipeline_layout_pair : pipeline_layouts_) { @@ -617,7 +630,13 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES - BeginSubmission(true); + const RegisterFile& regs = *register_file_; + + xenos::ModeControl edram_mode = regs.Get().edram_mode; + if (edram_mode == xenos::ModeControl::kCopy) { + // Special copy handling. + return IssueCopy(); + } // Vertex shader analysis. auto vertex_shader = static_cast(active_vertex_shader()); @@ -627,13 +646,30 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, } pipeline_cache_->AnalyzeShaderUcode(*vertex_shader); + BeginSubmission(true); + + // Process primitives. + PrimitiveProcessor::ProcessingResult primitive_processing_result; + if (!primitive_processor_->Process(primitive_processing_result)) { + return false; + } + if (!primitive_processing_result.host_draw_vertex_count) { + // Nothing to draw. + return true; + } + // TODO(Triang3l): Tessellation. + if (primitive_processing_result.host_vertex_shader_type != + Shader::HostVertexShaderType::kVertex) { + return false; + } + // TODO(Triang3l): Get a pixel shader. VulkanShader* pixel_shader = nullptr; // Shader modifications. SpirvShaderTranslator::Modification vertex_shader_modification = pipeline_cache_->GetCurrentVertexShaderModification( - *vertex_shader, Shader::HostVertexShaderType::kVertex); + *vertex_shader, primitive_processing_result.host_vertex_shader_type); SpirvShaderTranslator::Modification pixel_shader_modification = SpirvShaderTranslator::Modification(0); @@ -664,10 +700,10 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // current_graphics_pipeline_layout_. VkPipeline pipeline; const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; - if (!pipeline_cache_->ConfigurePipeline(vertex_shader_translation, - pixel_shader_translation, - framebuffer_key.render_pass_key, - pipeline, pipeline_layout_provider)) { + if (!pipeline_cache_->ConfigurePipeline( + vertex_shader_translation, pixel_shader_translation, + primitive_processing_result, framebuffer_key.render_pass_key, + pipeline, pipeline_layout_provider)) { return false; } deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, @@ -703,7 +739,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, current_graphics_pipeline_layout_ = pipeline_layout; } - const RegisterFile& regs = *register_file_; const ui::vulkan::VulkanProvider& provider = GetVulkanContext().GetVulkanProvider(); const VkPhysicalDeviceProperties& device_properties = @@ -718,7 +753,7 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // offset and is between maxViewportDimensions and viewportBoundsRange[1], // GetHostViewportInfo will adjust ndc_scale/ndc_offset to clamp it, and the // clamped range will be outside the largest possible framebuffer anyway. - // TODO(Triang3l): Possibly handle maxViewportDimensions and + // FIXME(Triang3l): Possibly handle maxViewportDimensions and // viewportBoundsRange separately because when using fragment shader // interlocks, framebuffers are not used, while the range may be wider than // dimensions? Though viewport bigger than 4096 - the smallest possible @@ -793,29 +828,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, << (vfetch_index & 63); } - // Set up the geometry. - if (indexed) { - uint32_t index_size = - index_buffer_info->format == xenos::IndexFormat::kInt32 - ? sizeof(uint32_t) - : sizeof(uint16_t); - assert_false(index_buffer_info->guest_base & (index_size - 1)); - uint32_t index_base = - index_buffer_info->guest_base & 0x1FFFFFFF & ~(index_size - 1); - uint32_t index_buffer_size = index_buffer_info->count * index_size; - if (!shared_memory_->RequestRange(index_base, index_buffer_size)) { - XELOGE( - "Failed to request index buffer at 0x{:08X} (size {}) in the shared " - "memory", - index_base, index_buffer_size); - return false; - } - deferred_command_buffer_.CmdVkBindIndexBuffer( - shared_memory_->buffer(), index_base, - index_buffer_info->format == xenos::IndexFormat::kInt32 - ? VK_INDEX_TYPE_UINT32 - : VK_INDEX_TYPE_UINT16); - } + // Insert the shared memory barrier if needed. + // TODO(Triang3l): Memory export. shared_memory_->Use(VulkanSharedMemory::Usage::kRead); // After all commands that may dispatch or copy, enter the render pass before @@ -843,10 +857,35 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, } // Draw. - if (indexed) { - deferred_command_buffer_.CmdVkDrawIndexed(index_count, 1, 0, 0, 0); + if (primitive_processing_result.index_buffer_type == + PrimitiveProcessor::ProcessedIndexBufferType::kNone) { + deferred_command_buffer_.CmdVkDraw( + primitive_processing_result.host_draw_vertex_count, 1, 0, 0); } else { - deferred_command_buffer_.CmdVkDraw(index_count, 1, 0, 0); + std::pair index_buffer; + switch (primitive_processing_result.index_buffer_type) { + case PrimitiveProcessor::ProcessedIndexBufferType::kGuest: + index_buffer.first = shared_memory_->buffer(); + index_buffer.second = primitive_processing_result.guest_index_base; + break; + case PrimitiveProcessor::ProcessedIndexBufferType::kHostConverted: + index_buffer = primitive_processor_->GetConvertedIndexBuffer( + primitive_processing_result.host_index_buffer_handle); + break; + case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltin: + index_buffer = primitive_processor_->GetBuiltinIndexBuffer( + primitive_processing_result.host_index_buffer_handle); + break; + default: + assert_unhandled_case(primitive_processing_result.index_buffer_type); + return false; + } + deferred_command_buffer_.CmdVkBindIndexBuffer( + index_buffer.first, index_buffer.second, + index_buffer_info->format == xenos::IndexFormat::kInt16 + ? VK_INDEX_TYPE_UINT16 + : VK_INDEX_TYPE_UINT32); + deferred_command_buffer_.CmdVkDrawIndexed(index_count, 1, 0, 0, 0); } return true; @@ -952,6 +991,8 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) { } shared_memory_->CompletedSubmissionUpdated(); + + primitive_processor_->CompletedSubmissionUpdated(); } void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { @@ -1006,6 +1047,8 @@ void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { current_graphics_pipeline_ = VK_NULL_HANDLE; current_graphics_pipeline_layout_ = nullptr; current_graphics_descriptor_sets_bound_up_to_date_ = 0; + + primitive_processor_->BeginSubmission(); } if (is_opening_frame) { @@ -1029,6 +1072,8 @@ void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { // may be reused. transient_descriptor_pool_uniform_buffers_->Reclaim(frame_completed_); uniform_buffer_pool_->Reclaim(frame_completed_); + + primitive_processor_->BeginFrame(); } } @@ -1100,9 +1145,15 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { bool is_closing_frame = is_swap && frame_open_; + if (is_closing_frame) { + primitive_processor_->EndFrame(); + } + if (submission_open_) { EndRenderPass(); + primitive_processor_->EndSubmission(); + shared_memory_->EndSubmission(); uniform_buffer_pool_->FlushWrites(); @@ -1255,6 +1306,8 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { device, descriptor_set_layout_pair.second, nullptr); } descriptor_set_layouts_textures_.clear(); + + primitive_processor_->ClearCache(); } } @@ -1288,20 +1341,21 @@ void VulkanCommandProcessor::UpdateFixedFunctionState( // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c auto pa_sc_window_offset = regs.Get(); - uint32_t pixel_size_x = 1, pixel_size_y = 1; - // Viewport. VkViewport viewport; - if (!viewport_info.xy_extent[0] || !viewport_info.xy_extent[1]) { - viewport.x = -1; - viewport.y = -1; - viewport.width = 1; - viewport.height = 1; - } else { + if (viewport_info.xy_extent[0] && viewport_info.xy_extent[1]) { viewport.x = float(viewport_info.xy_offset[0]); viewport.y = float(viewport_info.xy_offset[1]); viewport.width = float(viewport_info.xy_extent[0]); viewport.height = float(viewport_info.xy_extent[1]); + } else { + // Vulkan viewport width must be greater than 0.0f, but the Xenia viewport + // may be empty for various reasons - set the viewport to outside the + // framebuffer. + viewport.x = -1.0f; + viewport.y = -1.0f; + viewport.width = 1.0f; + viewport.height = 1.0f; } viewport.minDepth = viewport_info.z_min; viewport.maxDepth = viewport_info.z_max; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index e083b3755..0dba7caea 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -24,6 +24,7 @@ #include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_graphics_system.h" #include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" +#include "xenia/gpu/vulkan/vulkan_primitive_processor.h" #include "xenia/gpu/vulkan/vulkan_render_target_cache.h" #include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" @@ -74,6 +75,9 @@ class VulkanCommandProcessor : public CommandProcessor { const VkSparseMemoryBind* binds, VkPipelineStageFlags wait_stage_mask); + uint64_t GetCurrentFrame() const { return frame_current_; } + uint64_t GetCompletedFrame() const { return frame_completed_; } + // Must be called before doing anything outside the render pass scope, // including adding pipeline barriers that are not a part of the render pass // scope. Submission must be open. @@ -247,6 +251,8 @@ class VulkanCommandProcessor : public CommandProcessor { std::unique_ptr shared_memory_; + std::unique_ptr primitive_processor_; + std::unique_ptr pipeline_cache_; std::unique_ptr render_target_cache_; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 631098fcf..5ce43edc7 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -133,6 +133,7 @@ VulkanPipelineCache::GetCurrentPixelShaderModification( bool VulkanPipelineCache::ConfigurePipeline( VulkanShader::VulkanTranslation* vertex_shader, VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, VulkanRenderTargetCache::RenderPassKey render_pass_key, VkPipeline& pipeline_out, const PipelineLayoutProvider*& pipeline_layout_out) { @@ -173,7 +174,8 @@ bool VulkanPipelineCache::ConfigurePipeline( } PipelineDescription description; - if (!GetCurrentStateDescription(vertex_shader, pixel_shader, render_pass_key, + if (!GetCurrentStateDescription(vertex_shader, pixel_shader, + primitive_processing_result, render_pass_key, description)) { return false; } @@ -232,13 +234,13 @@ bool VulkanPipelineCache::TranslateAnalyzedShader( bool VulkanPipelineCache::GetCurrentStateDescription( const VulkanShader::VulkanTranslation* vertex_shader, const VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, VulkanRenderTargetCache::RenderPassKey render_pass_key, PipelineDescription& description_out) const { description_out.Reset(); const RegisterFile& regs = register_file_; auto pa_su_sc_mode_cntl = regs.Get(); - auto vgt_draw_initiator = regs.Get(); description_out.vertex_shader_hash = vertex_shader->shader().ucode_data_hash(); @@ -250,13 +252,8 @@ bool VulkanPipelineCache::GetCurrentStateDescription( } description_out.render_pass_key = render_pass_key; - xenos::PrimitiveType primitive_type = vgt_draw_initiator.prim_type; PipelinePrimitiveTopology primitive_topology; - // Vulkan explicitly allows primitive restart only for specific primitive - // types, unlike Direct3D where it's valid for non-strips, but has - // implementation-defined behavior. - bool primitive_restart_allowed = false; - switch (primitive_type) { + switch (primitive_processing_result.host_primitive_type) { case xenos::PrimitiveType::kPointList: primitive_topology = PipelinePrimitiveTopology::kPointList; break; @@ -265,23 +262,19 @@ bool VulkanPipelineCache::GetCurrentStateDescription( break; case xenos::PrimitiveType::kLineStrip: primitive_topology = PipelinePrimitiveTopology::kLineStrip; - primitive_restart_allowed = true; break; case xenos::PrimitiveType::kTriangleList: case xenos::PrimitiveType::kRectangleList: primitive_topology = PipelinePrimitiveTopology::kTriangleList; break; case xenos::PrimitiveType::kTriangleFan: - if (device_pipeline_features_.triangle_fans) { - primitive_topology = PipelinePrimitiveTopology::kTriangleFan; - primitive_restart_allowed = true; - } else { - primitive_topology = PipelinePrimitiveTopology::kTriangleList; - } + primitive_topology = PipelinePrimitiveTopology::kTriangleFan; break; case xenos::PrimitiveType::kTriangleStrip: primitive_topology = PipelinePrimitiveTopology::kTriangleStrip; - primitive_restart_allowed = true; + break; + case xenos::PrimitiveType::kQuadList: + primitive_topology = PipelinePrimitiveTopology::kLineListWithAdjacency; break; default: // TODO(Triang3l): All primitive types and tessellation. @@ -289,7 +282,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( } description_out.primitive_topology = primitive_topology; description_out.primitive_restart = - primitive_restart_allowed && pa_su_sc_mode_cntl.multi_prim_ib_ena; + primitive_processing_result.host_primitive_reset_enabled; // TODO(Triang3l): Tessellation. bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs); @@ -313,6 +306,9 @@ bool VulkanPipelineCache::GetCurrentStateDescription( polygon_type = std::min(polygon_type, pa_su_sc_mode_cntl.polymode_back_ptype); } + if (pa_su_sc_mode_cntl.poly_mode != xenos::PolygonModeEnable::kDualMode) { + polygon_type = xenos::PolygonType::kTriangles; + } switch (polygon_type) { case xenos::PolygonType::kPoints: // When points are not supported, use lines instead, preserving @@ -418,15 +414,27 @@ bool VulkanPipelineCache::EnsurePipelineCreated( switch (description.primitive_topology) { case PipelinePrimitiveTopology::kPointList: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } break; case PipelinePrimitiveTopology::kLineList: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } break; case PipelinePrimitiveTopology::kLineStrip: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; break; case PipelinePrimitiveTopology::kTriangleList: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } break; case PipelinePrimitiveTopology::kTriangleStrip: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; @@ -441,9 +449,17 @@ bool VulkanPipelineCache::EnsurePipelineCreated( case PipelinePrimitiveTopology::kLineListWithAdjacency: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } break; case PipelinePrimitiveTopology::kPatchList: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } break; default: assert_unhandled_case(description.primitive_topology); diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index 9eb5ed2d3..60654a99d 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -19,6 +19,7 @@ #include "xenia/base/hash.h" #include "xenia/base/platform.h" #include "xenia/base/xxhash.h" +#include "xenia/gpu/primitive_processor.h" #include "xenia/gpu/register_file.h" #include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/vulkan/vulkan_render_target_cache.h" @@ -69,11 +70,13 @@ class VulkanPipelineCache { const Shader& shader) const; // TODO(Triang3l): Return a deferred creation handle. - bool ConfigurePipeline(VulkanShader::VulkanTranslation* vertex_shader, - VulkanShader::VulkanTranslation* pixel_shader, - VulkanRenderTargetCache::RenderPassKey render_pass_key, - VkPipeline& pipeline_out, - const PipelineLayoutProvider*& pipeline_layout_out); + bool ConfigurePipeline( + VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + VkPipeline& pipeline_out, + const PipelineLayoutProvider*& pipeline_layout_out); private: // Can only load pipeline storage if features of the device it was created on @@ -168,6 +171,7 @@ class VulkanPipelineCache { bool GetCurrentStateDescription( const VulkanShader::VulkanTranslation* vertex_shader, const VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, VulkanRenderTargetCache::RenderPassKey render_pass_key, PipelineDescription& description_out) const; diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc new file mode 100644 index 000000000..deeef270f --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc @@ -0,0 +1,236 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2021 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_primitive_processor.h" + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanPrimitiveProcessor::~VulkanPrimitiveProcessor() { Shutdown(true); } + +bool VulkanPrimitiveProcessor::Initialize() { + // TODO(Triang3l): fullDrawIndexUint32 feature check and indirect index fetch. + // TODO(Triang3l): Portability subset triangleFans check when portability + // subset support is added. + // TODO(Triang3l): geometryShader check for quads when geometry shaders are + // added. + if (!InitializeCommon(true, true, false, false)) { + Shutdown(); + return false; + } + frame_index_buffer_pool_ = + std::make_unique( + command_processor_.GetVulkanContext().GetVulkanProvider(), + VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + std::max(size_t(kMinRequiredConvertedIndexBufferSize), + ui::GraphicsUploadBufferPool::kDefaultPageSize)); + return true; +} + +void VulkanPrimitiveProcessor::Shutdown(bool from_destructor) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + frame_index_buffers_.clear(); + frame_index_buffer_pool_.reset(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_upload_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_upload_memory_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_memory_); + + if (!from_destructor) { + ShutdownCommon(); + } +} + +void VulkanPrimitiveProcessor::CompletedSubmissionUpdated() { + if (builtin_index_buffer_upload_ != VK_NULL_HANDLE && + command_processor_.GetCompletedSubmission() >= + builtin_index_buffer_upload_submission_) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_upload_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_upload_memory_); + } +} + +void VulkanPrimitiveProcessor::BeginSubmission() { + if (builtin_index_buffer_upload_ != VK_NULL_HANDLE && + builtin_index_buffer_upload_submission_ == UINT64_MAX) { + // No need to submit deferred barriers - builtin_index_buffer_ has never + // been used yet, and builtin_index_buffer_upload_ is written before + // submitting commands reading it. + + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + + VkBufferCopy* copy_region = command_buffer.CmdCopyBufferEmplace( + builtin_index_buffer_upload_, builtin_index_buffer_, 1); + copy_region->srcOffset = 0; + copy_region->dstOffset = 0; + copy_region->size = builtin_index_buffer_size_; + + VkBufferMemoryBarrier builtin_index_buffer_memory_barrier; + builtin_index_buffer_memory_barrier.sType = + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + builtin_index_buffer_memory_barrier.pNext = nullptr; + builtin_index_buffer_memory_barrier.srcAccessMask = + VK_ACCESS_TRANSFER_WRITE_BIT; + builtin_index_buffer_memory_barrier.dstAccessMask = + VK_ACCESS_INDEX_READ_BIT; + builtin_index_buffer_memory_barrier.srcQueueFamilyIndex = + VK_QUEUE_FAMILY_IGNORED; + builtin_index_buffer_memory_barrier.dstQueueFamilyIndex = + VK_QUEUE_FAMILY_IGNORED; + builtin_index_buffer_memory_barrier.buffer = builtin_index_buffer_; + builtin_index_buffer_memory_barrier.offset = 0; + builtin_index_buffer_memory_barrier.size = VK_WHOLE_SIZE; + command_buffer.CmdVkPipelineBarrier( + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, + 0, nullptr, 1, &builtin_index_buffer_memory_barrier, 0, nullptr); + + builtin_index_buffer_upload_submission_ = + command_processor_.GetCurrentSubmission(); + } +} + +void VulkanPrimitiveProcessor::BeginFrame() { + frame_index_buffer_pool_->Reclaim(command_processor_.GetCompletedFrame()); +} + +void VulkanPrimitiveProcessor::EndSubmission() { + frame_index_buffer_pool_->FlushWrites(); +} + +void VulkanPrimitiveProcessor::EndFrame() { + ClearPerFrameCache(); + frame_index_buffers_.clear(); +} + +bool VulkanPrimitiveProcessor::InitializeBuiltin16BitIndexBuffer( + uint32_t index_count, std::function fill_callback) { + assert_not_zero(index_count); + assert_true(builtin_index_buffer_ == VK_NULL_HANDLE); + assert_true(builtin_index_buffer_memory_ == VK_NULL_HANDLE); + assert_true(builtin_index_buffer_upload_ == VK_NULL_HANDLE); + assert_true(builtin_index_buffer_upload_memory_ == VK_NULL_HANDLE); + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + builtin_index_buffer_size_ = VkDeviceSize(sizeof(uint16_t) * index_count); + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, builtin_index_buffer_size_, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + ui::vulkan::util::MemoryPurpose::kDeviceLocal, builtin_index_buffer_, + builtin_index_buffer_memory_)) { + XELOGE( + "Vulkan primitive processor: Failed to create the built-in index " + "buffer GPU resource with {} 16-bit indices", + index_count); + return false; + } + uint32_t upload_memory_type; + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, builtin_index_buffer_size_, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + ui::vulkan::util::MemoryPurpose::kUpload, + builtin_index_buffer_upload_, builtin_index_buffer_upload_memory_, + &upload_memory_type)) { + XELOGE( + "Vulkan primitive processor: Failed to create the built-in index " + "buffer upload resource with {} 16-bit indices", + index_count); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_memory_); + return false; + } + + void* mapping; + if (dfn.vkMapMemory(device, builtin_index_buffer_upload_memory_, 0, + VK_WHOLE_SIZE, 0, &mapping) != VK_SUCCESS) { + XELOGE( + "Vulkan primitive processor: Failed to map the built-in index buffer " + "upload resource with {} 16-bit indices", + index_count); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_upload_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_upload_memory_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_memory_); + return false; + } + fill_callback(reinterpret_cast(mapping)); + ui::vulkan::util::FlushMappedMemoryRange( + provider, builtin_index_buffer_memory_, upload_memory_type); + dfn.vkUnmapMemory(device, builtin_index_buffer_memory_); + + // Schedule uploading in the first submission. + builtin_index_buffer_upload_submission_ = UINT64_MAX; + return true; +} + +void* VulkanPrimitiveProcessor::RequestHostConvertedIndexBufferForCurrentFrame( + xenos::IndexFormat format, uint32_t index_count, bool coalign_for_simd, + uint32_t coalignment_original_address, size_t& backend_handle_out) { + size_t index_size = format == xenos::IndexFormat::kInt16 ? sizeof(uint16_t) + : sizeof(uint32_t); + VkBuffer buffer; + VkDeviceSize offset; + uint8_t* mapping = frame_index_buffer_pool_->Request( + command_processor_.GetCurrentFrame(), + index_size * index_count + + (coalign_for_simd ? XE_GPU_PRIMITIVE_PROCESSOR_SIMD_SIZE : 0), + index_size, buffer, offset); + if (!mapping) { + return false; + } + if (coalign_for_simd) { + ptrdiff_t coalignment_offset = + GetSimdCoalignmentOffset(mapping, coalignment_original_address); + mapping += coalignment_offset; + offset = VkDeviceSize(offset + coalignment_offset); + } + backend_handle_out = frame_index_buffers_.size(); + frame_index_buffers_.emplace_back(buffer, offset); + return mapping; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.h b/src/xenia/gpu/vulkan/vulkan_primitive_processor.h new file mode 100644 index 000000000..50e729577 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.h @@ -0,0 +1,92 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2021 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_ +#define XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_ + +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/gpu/primitive_processor.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +class VulkanPrimitiveProcessor final : public PrimitiveProcessor { + public: + VulkanPrimitiveProcessor(const RegisterFile& register_file, Memory& memory, + TraceWriter& trace_writer, + SharedMemory& shared_memory, + VulkanCommandProcessor& command_processor) + : PrimitiveProcessor(register_file, memory, trace_writer, shared_memory), + command_processor_(command_processor) {} + ~VulkanPrimitiveProcessor(); + + bool Initialize(); + void Shutdown(bool from_destructor = false); + void ClearCache() { frame_index_buffer_pool_->ClearCache(); } + + void CompletedSubmissionUpdated(); + void BeginSubmission(); + void BeginFrame(); + void EndSubmission(); + void EndFrame(); + + std::pair GetBuiltinIndexBuffer(size_t handle) const { + assert_not_null(builtin_index_buffer_); + return std::make_pair( + builtin_index_buffer_, + VkDeviceSize(GetBuiltinIndexBufferOffsetBytes(handle))); + } + std::pair GetConvertedIndexBuffer( + size_t handle) const { + return frame_index_buffers_[handle]; + } + + protected: + bool InitializeBuiltin16BitIndexBuffer( + uint32_t index_count, + std::function fill_callback) override; + + void* RequestHostConvertedIndexBufferForCurrentFrame( + xenos::IndexFormat format, uint32_t index_count, bool coalign_for_simd, + uint32_t coalignment_original_address, + size_t& backend_handle_out) override; + + private: + VulkanCommandProcessor& command_processor_; + + VkDeviceSize builtin_index_buffer_size_ = 0; + VkBuffer builtin_index_buffer_ = VK_NULL_HANDLE; + VkDeviceMemory builtin_index_buffer_memory_ = VK_NULL_HANDLE; + // Temporary buffer copied in the beginning of the first submission for + // uploading to builtin_index_buffer_, destroyed when the submission when it + // was uploaded is completed. + VkBuffer builtin_index_buffer_upload_ = VK_NULL_HANDLE; + VkDeviceMemory builtin_index_buffer_upload_memory_ = VK_NULL_HANDLE; + // UINT64_MAX means not uploaded yet and needs uploading in the first + // submission (if the upload buffer exists at all). + uint64_t builtin_index_buffer_upload_submission_ = UINT64_MAX; + + std::unique_ptr frame_index_buffer_pool_; + // Indexed by the backend handles. + std::deque> frame_index_buffers_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_