From 65c8d2b28e0c3182efc7f1ccb5b5fcebf59a97eb Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 14 Nov 2020 14:16:04 +0300 Subject: [PATCH] [Vulkan] Basic draw call architecture + [D3D12] Some cleanup --- .../gpu/d3d12/d3d12_command_processor.cc | 47 +- src/xenia/gpu/d3d12/pipeline_cache.cc | 25 +- src/xenia/gpu/d3d12/pipeline_cache.h | 5 +- src/xenia/gpu/spirv_shader_translator.cc | 58 +- src/xenia/gpu/spirv_shader_translator.h | 56 +- .../gpu/vulkan/deferred_command_buffer.cc | 125 ++- .../gpu/vulkan/deferred_command_buffer.h | 193 ++++- .../gpu/vulkan/vulkan_command_processor.cc | 795 +++++++++++++++++- .../gpu/vulkan/vulkan_command_processor.h | 157 +++- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 443 ++++++++++ src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 183 ++++ .../gpu/vulkan/vulkan_render_target_cache.cc | 136 +++ .../gpu/vulkan/vulkan_render_target_cache.h | 95 +++ src/xenia/gpu/vulkan/vulkan_shader.cc | 48 ++ src/xenia/gpu/vulkan/vulkan_shader.h | 39 + src/xenia/gpu/vulkan/vulkan_shared_memory.cc | 5 +- .../ui/vulkan/transient_descriptor_pool.cc | 2 + 17 files changed, 2235 insertions(+), 177 deletions(-) create mode 100644 src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_pipeline_cache.h create mode 100644 src/xenia/gpu/vulkan/vulkan_render_target_cache.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_render_target_cache.h create mode 100644 src/xenia/gpu/vulkan/vulkan_shader.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_shader.h diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index f0be8c50e..30c7d2c13 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -2005,14 +2005,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, } // Must not call anything that can change the descriptor heap from now on! - // Ensure vertex and index buffers are resident and draw. + // Ensure vertex buffers are resident. // TODO(Triang3l): Cache residency for ranges in a way similar to how texture - // validity will be tracked. + // validity is tracked. uint64_t vertex_buffers_resident[2] = {}; - for (const auto& vertex_binding : vertex_shader->vertex_bindings()) { + for (const Shader::VertexBinding& vertex_binding : + vertex_shader->vertex_bindings()) { uint32_t vfetch_index = vertex_binding.fetch_constant; if (vertex_buffers_resident[vfetch_index >> 6] & - (1ull << (vfetch_index & 63))) { + (uint64_t(1) << (vfetch_index & 63))) { continue; } const auto& vfetch_constant = regs.Get( @@ -2045,7 +2046,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, vfetch_constant.address << 2, vfetch_constant.size << 2); return false; } - vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); + vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1) + << (vfetch_index & 63); } // Gather memexport ranges and ensure the heaps for them are resident, and @@ -2745,12 +2747,12 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() { } void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { - auto& regs = *register_file_; - #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES + const RegisterFile& regs = *register_file_; + // Window parameters. // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h // See r200UpdateWindow: @@ -2846,14 +2848,14 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { scissor.right = pa_sc_window_scissor_br.br_x; scissor.bottom = pa_sc_window_scissor_br.br_y; if (!pa_sc_window_scissor_tl.window_offset_disable) { - scissor.left = - std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0)); - scissor.top = - std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0)); - scissor.right = - std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0)); - scissor.bottom = - std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0)); + scissor.left = std::max( + LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0)); + scissor.top = std::max( + LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0)); + scissor.right = std::max( + LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0)); + scissor.bottom = std::max( + LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0)); } scissor.left *= pixel_size_x; scissor.top *= pixel_size_y; @@ -2915,12 +2917,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( uint32_t line_loop_closing_index, xenos::Endian index_endian, uint32_t used_texture_mask, bool early_z, uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]) { - auto& regs = *register_file_; - #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES + const RegisterFile& regs = *register_file_; auto pa_cl_clip_cntl = regs.Get(); auto pa_cl_vte_cntl = regs.Get(); auto pa_su_point_minmax = regs.Get(); @@ -3103,14 +3104,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index; system_constants_.line_loop_closing_index = line_loop_closing_index; - // Vertex index offset. - dirty |= system_constants_.vertex_base_index != vgt_indx_offset; - system_constants_.vertex_base_index = vgt_indx_offset; - // Index or tessellation edge factor buffer endianness. dirty |= system_constants_.vertex_index_endian != index_endian; system_constants_.vertex_index_endian = index_endian; + // Vertex index offset. + dirty |= system_constants_.vertex_base_index != vgt_indx_offset; + system_constants_.vertex_base_index = vgt_indx_offset; + // User clip planes (UCP_ENA_#), when not CLIP_DISABLE. if (!pa_cl_clip_cntl.clip_disable) { for (uint32_t i = 0; i < 6; ++i) { @@ -3574,7 +3575,7 @@ bool D3D12CommandProcessor::UpdateBindings( float_constant_map_vertex.float_bitmap[i]; // If no float constants at all, we can reuse any buffer for them, so not // invalidating. - if (float_constant_map_vertex.float_count != 0) { + if (float_constant_count_vertex) { cbuffer_binding_float_vertex_.up_to_date = false; } } @@ -3589,7 +3590,7 @@ bool D3D12CommandProcessor::UpdateBindings( float_constant_map_pixel.float_bitmap[i]) { current_float_constant_map_pixel_[i] = float_constant_map_pixel.float_bitmap[i]; - if (float_constant_map_pixel.float_count != 0) { + if (float_constant_count_pixel) { cbuffer_binding_float_pixel_.up_to_date = false; } } diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 94b7b4998..a600301bb 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -223,10 +223,10 @@ void PipelineCache::ClearCache(bool shutting_down) { } texture_binding_layout_map_.clear(); texture_binding_layouts_.clear(); - for (auto it : shader_map_) { + for (auto it : shaders_) { delete it.second; } - shader_map_.clear(); + shaders_.clear(); if (reinitialize_shader_storage) { InitializeShaderStorage(shader_storage_root, shader_storage_title_id, @@ -374,8 +374,7 @@ void PipelineCache::InitializeShaderStorage( } size_t ucode_byte_count = shader_header.ucode_dword_count * sizeof(uint32_t); - if (shader_map_.find(shader_header.ucode_data_hash) != - shader_map_.end()) { + if (shaders_.find(shader_header.ucode_data_hash) != shaders_.end()) { // Already added - usually shaders aren't added without the intention of // translating them imminently, so don't do additional checks to // actually ensure that translation happens right now (they would cause @@ -402,7 +401,7 @@ void PipelineCache::InitializeShaderStorage( D3D12Shader* shader = new D3D12Shader(shader_header.type, ucode_data_hash, ucode_dwords.data(), shader_header.ucode_dword_count); - shader_map_.insert({ucode_data_hash, shader}); + shaders_.insert({ucode_data_hash, shader}); // Create new threads if the currently existing threads can't keep up with // file reading, but not more than the number of logical processors minus // one. @@ -439,7 +438,7 @@ void PipelineCache::InitializeShaderStorage( } shader_translation_threads.clear(); for (D3D12Shader* shader : shaders_failed_to_translate) { - shader_map_.erase(shader->ucode_data_hash()); + shaders_.erase(shader->ucode_data_hash()); delete shader; } } @@ -576,8 +575,8 @@ void PipelineCache::InitializeShaderStorage( PipelineRuntimeDescription pipeline_runtime_description; auto vertex_shader_it = - shader_map_.find(pipeline_description.vertex_shader_hash); - if (vertex_shader_it == shader_map_.end()) { + shaders_.find(pipeline_description.vertex_shader_hash); + if (vertex_shader_it == shaders_.end()) { continue; } pipeline_runtime_description.vertex_shader = vertex_shader_it->second; @@ -586,8 +585,8 @@ void PipelineCache::InitializeShaderStorage( } if (pipeline_description.pixel_shader_hash) { auto pixel_shader_it = - shader_map_.find(pipeline_description.pixel_shader_hash); - if (pixel_shader_it == shader_map_.end()) { + shaders_.find(pipeline_description.pixel_shader_hash); + if (pixel_shader_it == shaders_.end()) { continue; } pipeline_runtime_description.pixel_shader = pixel_shader_it->second; @@ -779,8 +778,8 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, uint32_t dword_count) { // Hash the input memory and lookup the shader. uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); - auto it = shader_map_.find(data_hash); - if (it != shader_map_.end()) { + auto it = shaders_.find(data_hash); + if (it != shaders_.end()) { // Shader has been previously loaded. return it->second; } @@ -790,7 +789,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, // again. D3D12Shader* shader = new D3D12Shader(shader_type, data_hash, host_address, dword_count); - shader_map_.insert({data_hash, shader}); + shaders_.insert({data_hash, shader}); return shader; } diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index cdc6ed5f3..7798d3810 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -29,6 +29,7 @@ #include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/register_file.h" #include "xenia/gpu/xenos.h" +#include "xenia/ui/d3d12/d3d12_api.h" namespace xe { namespace gpu { @@ -255,9 +256,9 @@ class PipelineCache { IDxcUtils* dxc_utils_ = nullptr; IDxcCompiler* dxc_compiler_ = nullptr; - // All loaded shaders mapped by their guest hash key. + // Ucode hash -> shader. std::unordered_map> - shader_map_; + shaders_; struct LayoutUID { size_t uid; diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index dd7cac5ba..f13418355 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -288,7 +288,7 @@ void SpirvShaderTranslator::StartTranslation() { id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_)); // Storage buffers have std430 packing, no padding to 4-component vectors. builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, - sizeof(uint32_t) * 4); + sizeof(uint32_t)); spv::Id type_shared_memory = builder_->makeStructType(id_vector_temp_, "XeSharedMemory"); builder_->addMemberName(type_shared_memory, 0, "shared_memory"); @@ -511,7 +511,9 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { ? spv::ExecutionModelTessellationEvaluation : spv::ExecutionModelVertex; } - if (features_.float_controls) { + // TODO(Triang3l): Re-enable float controls when + // VkPhysicalDeviceFloatControlsPropertiesKHR are handled. + /* if (features_.float_controls) { // Flush to zero, similar to the real hardware, also for things like Shader // Model 3 multiplication emulation. builder_->addCapability(spv::CapabilityDenormFlushToZero); @@ -523,7 +525,7 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { builder_->addCapability(spv::CapabilitySignedZeroInfNanPreserve); builder_->addExecutionMode(function_main_, spv::ExecutionModeSignedZeroInfNanPreserve, 32); - } + } */ spv::Instruction* entry_point = builder_->addEntryPoint(execution_model, function_main_, "main"); for (spv::Id interface_id : main_interface_) { @@ -982,7 +984,19 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { } } -void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {} +void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { + // Write 1 to point size (using a geometry shader or another kind of fallback + // to expand point sprites - point size support is not guaranteed, and the + // size would also be limited, and can't be controlled independently along two + // axes). + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kOutputPerVertexMemberPointSize)); + builder_->createStore( + const_float_1_, + builder_->createAccessChain(spv::StorageClassOutput, output_per_vertex_, + id_vector_temp_)); +} void SpirvShaderTranslator::UpdateExecConditionals( ParsedExecInstruction::Type type, uint32_t bool_constant_index, @@ -1054,9 +1068,8 @@ void SpirvShaderTranslator::UpdateExecConditionals( return; } cf_exec_condition_ = condition; - spv::Function& function = builder_->getBuildPoint()->getParent(); - cf_exec_conditional_merge_ = - new spv::Block(builder_->getUniqueId(), function); + cf_exec_conditional_merge_ = new spv::Block( + builder_->getUniqueId(), builder_->getBuildPoint()->getParent()); SpirvCreateSelectionMerge(cf_exec_conditional_merge_->getId()); spv::Block& inner_block = builder_->makeNewBlock(); builder_->createConditionalBranch( @@ -1095,7 +1108,8 @@ void SpirvShaderTranslator::UpdateInstructionPredication(bool predicated, spv::Id predicate_id = builder_->createLoad(var_main_predicate_, spv::NoPrecision); spv::Block& predicated_block = builder_->makeNewBlock(); - cf_instruction_predicate_merge_ = &builder_->makeNewBlock(); + cf_instruction_predicate_merge_ = new spv::Block( + builder_->getUniqueId(), builder_->getBuildPoint()->getParent()); SpirvCreateSelectionMerge(cf_instruction_predicate_merge_->getId()); builder_->createConditionalBranch( predicate_id, @@ -1135,12 +1149,23 @@ void SpirvShaderTranslator::CloseExecConditionals() { } spv::Id SpirvShaderTranslator::GetStorageAddressingIndex( - InstructionStorageAddressingMode addressing_mode, uint32_t storage_index) { + InstructionStorageAddressingMode addressing_mode, uint32_t storage_index, + bool is_float_constant) { EnsureBuildPointAvailable(); spv::Id base_pointer = spv::NoResult; switch (addressing_mode) { - case InstructionStorageAddressingMode::kStatic: - return builder_->makeIntConstant(int(storage_index)); + case InstructionStorageAddressingMode::kStatic: { + uint32_t static_storage_index = storage_index; + if (is_float_constant) { + static_storage_index = + constant_register_map().GetPackedFloatConstantIndex(storage_index); + assert_true(static_storage_index != UINT32_MAX); + if (static_storage_index == UINT32_MAX) { + static_storage_index = 0; + } + } + return builder_->makeIntConstant(int(static_storage_index)); + } case InstructionStorageAddressingMode::kAddressAbsolute: base_pointer = var_main_address_absolute_; break; @@ -1153,6 +1178,8 @@ spv::Id SpirvShaderTranslator::GetStorageAddressingIndex( id_vector_temp_util_); break; } + assert_true(!is_float_constant || + constant_register_map().float_dynamic_addressing); assert_true(base_pointer != spv::NoResult); spv::Id index = builder_->createLoad(base_pointer, spv::NoPrecision); if (storage_index) { @@ -1165,8 +1192,9 @@ spv::Id SpirvShaderTranslator::GetStorageAddressingIndex( spv::Id SpirvShaderTranslator::LoadOperandStorage( const InstructionOperand& operand) { - spv::Id index = GetStorageAddressingIndex(operand.storage_addressing_mode, - operand.storage_index); + spv::Id index = GetStorageAddressingIndex( + operand.storage_addressing_mode, operand.storage_index, + operand.storage_source == InstructionStorageSource::kConstantFloat); EnsureBuildPointAvailable(); spv::Id vec4_pointer = spv::NoResult; switch (operand.storage_source) { @@ -1592,7 +1620,7 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) { builder_->makeUintConstant( static_cast(xenos::Endian::k8in32))); spv::Id is_8in16_or_8in32 = - builder_->createBinOp(spv::OpLogicalAnd, type_bool_, is_8in16, is_8in32); + builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in16, is_8in32); spv::Block& block_pre_8in16 = *builder_->getBuildPoint(); assert_false(block_pre_8in16.isTerminated()); spv::Block& block_8in16 = builder_->makeNewBlock(); @@ -1633,7 +1661,7 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) { builder_->makeUintConstant( static_cast(xenos::Endian::k16in32))); spv::Id is_8in32_or_16in32 = - builder_->createBinOp(spv::OpLogicalAnd, type_bool_, is_8in32, is_16in32); + builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in32, is_16in32); spv::Block& block_pre_16in32 = *builder_->getBuildPoint(); spv::Block& block_16in32 = builder_->makeNewBlock(); spv::Block& block_16in32_merge = builder_->makeNewBlock(); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index bb74d5ab9..978ad8789 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -39,26 +39,49 @@ class SpirvShaderTranslator : public ShaderTranslator { // therefore SSBOs must only be used for shared memory - all other storage // resources must be images or texel buffers. enum DescriptorSet : uint32_t { - // In order of update frequency. - // Very frequently changed, especially for UI draws, and for models drawn in - // multiple parts - contains vertex and texture fetch constants. - kDescriptorSetFetchConstants, + // According to the "Pipeline Layout Compatibility" section of the Vulkan + // specification: + // "Two pipeline layouts are defined to be "compatible for set N" if they + // were created with identically defined descriptor set layouts for sets + // zero through N, and if they were created with identical push constant + // ranges." + // "Place the least frequently changing descriptor sets near the start of + // the pipeline layout, and place the descriptor sets representing the most + // frequently changing resources near the end. When pipelines are switched, + // only the descriptor set bindings that have been invalidated will need to + // be updated and the remainder of the descriptor set bindings will remain + // in place." + // This is partially the reverse of the Direct3D 12's rule of placing the + // most frequently changed descriptor sets in the beginning. Here all + // descriptor sets with an immutable layout are placed first, in reverse + // frequency of changing, and sets that may be different for different + // pipeline states last. + + // Always the same descriptor set layouts for all pipeline layouts: + + // Never changed. + kDescriptorSetSharedMemoryAndEdram, + // Pretty rarely used and rarely changed - flow control constants. + kDescriptorSetBoolLoopConstants, + // May stay the same across many draws. + kDescriptorSetSystemConstants, + // Less frequently changed (per-material). + kDescriptorSetFloatConstantsPixel, // Quite frequently changed (for one object drawn multiple times, for // instance - may contain projection matrices). kDescriptorSetFloatConstantsVertex, - // Less frequently changed (per-material). - kDescriptorSetFloatConstantsPixel, - // Per-material, combined images and samplers. - kDescriptorSetTexturesPixel, + // Very frequently changed, especially for UI draws, and for models drawn in + // multiple parts - contains vertex and texture fetch constants. + kDescriptorSetFetchConstants, + + // Mutable part of the pipeline layout: + kDescriptorSetMutableLayoutsStart, + // Rarely used at all, but may be changed at an unpredictable rate when // vertex textures are used, combined images and samplers. - kDescriptorSetTexturesVertex, - // May stay the same across many draws. - kDescriptorSetSystemConstants, - // Pretty rarely used and rarely changed - flow control constants. - kDescriptorSetBoolLoopConstants, - // Never changed. - kDescriptorSetSharedMemoryAndEdram, + kDescriptorSetTexturesVertex = kDescriptorSetMutableLayoutsStart, + // Per-material, combined images and samplers. + kDescriptorSetTexturesPixel, kDescriptorSetCount, }; @@ -162,7 +185,8 @@ class SpirvShaderTranslator : public ShaderTranslator { void CloseExecConditionals(); spv::Id GetStorageAddressingIndex( - InstructionStorageAddressingMode addressing_mode, uint32_t storage_index); + InstructionStorageAddressingMode addressing_mode, uint32_t storage_index, + bool is_float_constant = false); // Loads unswizzled operand without sign modifiers as float4. spv::Id LoadOperandStorage(const InstructionOperand& operand); spv::Id ApplyOperandModifiers(spv::Id operand_value, diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.cc b/src/xenia/gpu/vulkan/deferred_command_buffer.cc index f9c359506..641843fc9 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.cc +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.cc @@ -46,15 +46,65 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { stream_remaining -= kCommandHeaderSizeElements; switch (header.command) { + case Command::kVkBeginRenderPass: { + auto& args = *reinterpret_cast(stream); + size_t offset_bytes = sizeof(ArgsVkBeginRenderPass); + VkRenderPassBeginInfo render_pass_begin_info; + render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_begin_info.pNext = nullptr; + render_pass_begin_info.renderPass = args.render_pass; + render_pass_begin_info.framebuffer = args.framebuffer; + render_pass_begin_info.renderArea = args.render_area; + render_pass_begin_info.clearValueCount = args.clear_value_count; + if (render_pass_begin_info.clearValueCount) { + offset_bytes = xe::align(offset_bytes, alignof(VkClearValue)); + render_pass_begin_info.pClearValues = + reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes += + sizeof(VkClearValue) * render_pass_begin_info.clearValueCount; + } else { + render_pass_begin_info.pClearValues = nullptr; + } + dfn.vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info, + args.contents); + } break; + + case Command::kVkBindDescriptorSets: { + auto& args = *reinterpret_cast(stream); + size_t offset_bytes = xe::align(sizeof(ArgsVkBindDescriptorSets), + alignof(VkDescriptorSet)); + const VkDescriptorSet* descriptor_sets = + reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes += sizeof(VkDescriptorSet) * args.descriptor_set_count; + const uint32_t* dynamic_offsets = nullptr; + if (args.dynamic_offset_count) { + offset_bytes = xe::align(offset_bytes, alignof(uint32_t)); + dynamic_offsets = reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes += sizeof(uint32_t) * args.dynamic_offset_count; + } + dfn.vkCmdBindDescriptorSets(command_buffer, args.pipeline_bind_point, + args.layout, args.first_set, + args.descriptor_set_count, descriptor_sets, + args.dynamic_offset_count, dynamic_offsets); + } break; + case Command::kVkBindIndexBuffer: { auto& args = *reinterpret_cast(stream); dfn.vkCmdBindIndexBuffer(command_buffer, args.buffer, args.offset, args.index_type); } break; + case Command::kVkBindPipeline: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdBindPipeline(command_buffer, args.pipeline_bind_point, + args.pipeline); + } break; + case Command::kVkCopyBuffer: { auto& args = *reinterpret_cast(stream); - static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); dfn.vkCmdCopyBuffer( command_buffer, args.src_buffer, args.dst_buffer, args.region_count, reinterpret_cast( @@ -62,26 +112,37 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)))); } break; + case Command::kVkDraw: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdDraw(command_buffer, args.vertex_count, args.instance_count, + args.first_vertex, args.first_instance); + } break; + + case Command::kVkDrawIndexed: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdDrawIndexed(command_buffer, args.index_count, + args.instance_count, args.first_index, + args.vertex_offset, args.first_instance); + } break; + + case Command::kVkEndRenderPass: + dfn.vkCmdEndRenderPass(command_buffer); + break; + case Command::kVkPipelineBarrier: { auto& args = *reinterpret_cast(stream); size_t barrier_offset_bytes = sizeof(ArgsVkPipelineBarrier); - - const VkMemoryBarrier* memory_barriers; + const VkMemoryBarrier* memory_barriers = nullptr; if (args.memory_barrier_count) { - static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t)); barrier_offset_bytes = xe::align(barrier_offset_bytes, alignof(VkMemoryBarrier)); memory_barriers = reinterpret_cast( reinterpret_cast(stream) + barrier_offset_bytes); barrier_offset_bytes += sizeof(VkMemoryBarrier) * args.memory_barrier_count; - } else { - memory_barriers = nullptr; } - - const VkBufferMemoryBarrier* buffer_memory_barriers; + const VkBufferMemoryBarrier* buffer_memory_barriers = nullptr; if (args.buffer_memory_barrier_count) { - static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t)); barrier_offset_bytes = xe::align(barrier_offset_bytes, alignof(VkBufferMemoryBarrier)); buffer_memory_barriers = @@ -90,23 +151,16 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { barrier_offset_bytes); barrier_offset_bytes += sizeof(VkBufferMemoryBarrier) * args.buffer_memory_barrier_count; - } else { - buffer_memory_barriers = nullptr; } - - const VkImageMemoryBarrier* image_memory_barriers; + const VkImageMemoryBarrier* image_memory_barriers = nullptr; if (args.image_memory_barrier_count) { - static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); barrier_offset_bytes = xe::align(barrier_offset_bytes, alignof(VkImageMemoryBarrier)); image_memory_barriers = reinterpret_cast( reinterpret_cast(stream) + barrier_offset_bytes); barrier_offset_bytes += sizeof(VkImageMemoryBarrier) * args.image_memory_barrier_count; - } else { - image_memory_barriers = nullptr; } - dfn.vkCmdPipelineBarrier( command_buffer, args.src_stage_mask, args.dst_stage_mask, args.dependency_flags, args.memory_barrier_count, memory_barriers, @@ -114,6 +168,24 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { args.image_memory_barrier_count, image_memory_barriers); } break; + case Command::kVkSetScissor: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdSetScissor( + command_buffer, args.first_scissor, args.scissor_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D)))); + } break; + + case Command::kVkSetViewport: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdSetViewport( + command_buffer, args.first_viewport, args.viewport_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport)))); + } break; + default: assert_unhandled_case(header.command); break; @@ -133,38 +205,25 @@ void DeferredCommandBuffer::CmdVkPipelineBarrier( uint32_t image_memory_barrier_count, const VkImageMemoryBarrier* image_memory_barriers) { size_t arguments_size = sizeof(ArgsVkPipelineBarrier); - - size_t memory_barriers_offset; + size_t memory_barriers_offset = 0; if (memory_barrier_count) { - static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t)); arguments_size = xe::align(arguments_size, alignof(VkMemoryBarrier)); memory_barriers_offset = arguments_size; arguments_size += sizeof(VkMemoryBarrier) * memory_barrier_count; - } else { - memory_barriers_offset = 0; } - - size_t buffer_memory_barriers_offset; + size_t buffer_memory_barriers_offset = 0; if (buffer_memory_barrier_count) { - static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t)); arguments_size = xe::align(arguments_size, alignof(VkBufferMemoryBarrier)); buffer_memory_barriers_offset = arguments_size; arguments_size += sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count; - } else { - buffer_memory_barriers_offset = 0; } - - size_t image_memory_barriers_offset; + size_t image_memory_barriers_offset = 0; if (image_memory_barrier_count) { - static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); arguments_size = xe::align(arguments_size, alignof(VkImageMemoryBarrier)); image_memory_barriers_offset = arguments_size; arguments_size += sizeof(VkImageMemoryBarrier) * image_memory_barrier_count; - } else { - image_memory_barriers_offset = 0; } - uint8_t* args_ptr = reinterpret_cast( WriteCommand(Command::kVkPipelineBarrier, arguments_size)); auto& args = *reinterpret_cast(args_ptr); diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.h b/src/xenia/gpu/vulkan/deferred_command_buffer.h index 879c92d5a..9ed39557b 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.h +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.h @@ -14,6 +14,7 @@ #include #include +#include "xenia/base/assert.h" #include "xenia/base/math.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -31,6 +32,65 @@ class DeferredCommandBuffer { void Reset(); void Execute(VkCommandBuffer command_buffer); + // render_pass_begin->pNext of all barriers must be null. + void CmdVkBeginRenderPass(const VkRenderPassBeginInfo* render_pass_begin, + VkSubpassContents contents) { + assert_null(render_pass_begin->pNext); + size_t arguments_size = sizeof(ArgsVkBeginRenderPass); + uint32_t clear_value_count = render_pass_begin->clearValueCount; + size_t clear_values_offset = 0; + if (clear_value_count) { + arguments_size = xe::align(arguments_size, alignof(VkClearValue)); + clear_values_offset = arguments_size; + arguments_size += sizeof(VkClearValue) * clear_value_count; + } + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkBeginRenderPass, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.render_pass = render_pass_begin->renderPass; + args.framebuffer = render_pass_begin->framebuffer; + args.render_area = render_pass_begin->renderArea; + args.clear_value_count = clear_value_count; + args.contents = contents; + if (clear_value_count) { + std::memcpy(args_ptr + clear_values_offset, + render_pass_begin->pClearValues, + sizeof(VkClearValue) * clear_value_count); + } + } + + void CmdVkBindDescriptorSets(VkPipelineBindPoint pipeline_bind_point, + VkPipelineLayout layout, uint32_t first_set, + uint32_t descriptor_set_count, + const VkDescriptorSet* descriptor_sets, + uint32_t dynamic_offset_count, + const uint32_t* dynamic_offsets) { + size_t arguments_size = + xe::align(sizeof(ArgsVkBindDescriptorSets), alignof(VkDescriptorSet)); + size_t descriptor_sets_offset = arguments_size; + arguments_size += sizeof(VkDescriptorSet) * descriptor_set_count; + size_t dynamic_offsets_offset = 0; + if (dynamic_offset_count) { + arguments_size = xe::align(arguments_size, alignof(uint32_t)); + dynamic_offsets_offset = arguments_size; + arguments_size += sizeof(uint32_t) * dynamic_offset_count; + } + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkBindDescriptorSets, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.pipeline_bind_point = pipeline_bind_point; + args.layout = layout; + args.first_set = first_set; + args.descriptor_set_count = descriptor_set_count; + args.dynamic_offset_count = dynamic_offset_count; + std::memcpy(args_ptr + descriptor_sets_offset, descriptor_sets, + sizeof(VkDescriptorSet) * descriptor_set_count); + if (dynamic_offset_count) { + std::memcpy(args_ptr + dynamic_offsets_offset, dynamic_offsets, + sizeof(uint32_t) * dynamic_offset_count); + } + } + void CmdVkBindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType index_type) { auto& args = *reinterpret_cast(WriteCommand( @@ -40,9 +100,16 @@ class DeferredCommandBuffer { args.index_type = index_type; } + void CmdVkBindPipeline(VkPipelineBindPoint pipeline_bind_point, + VkPipeline pipeline) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkBindPipeline, sizeof(ArgsVkBindPipeline))); + args.pipeline_bind_point = pipeline_bind_point; + args.pipeline = pipeline; + } + VkBufferCopy* CmdCopyBufferEmplace(VkBuffer src_buffer, VkBuffer dst_buffer, uint32_t region_count) { - static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); const size_t header_size = xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)); uint8_t* args_ptr = reinterpret_cast( @@ -60,6 +127,30 @@ class DeferredCommandBuffer { regions, sizeof(VkBufferCopy) * region_count); } + void CmdVkDraw(uint32_t vertex_count, uint32_t instance_count, + uint32_t first_vertex, uint32_t first_instance) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkDraw, sizeof(ArgsVkDraw))); + args.vertex_count = vertex_count; + args.instance_count = instance_count; + args.first_vertex = first_vertex; + args.first_instance = first_instance; + } + + void CmdVkDrawIndexed(uint32_t index_count, uint32_t instance_count, + uint32_t first_index, int32_t vertex_offset, + uint32_t first_instance) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkDrawIndexed, sizeof(ArgsVkDrawIndexed))); + args.index_count = index_count; + args.instance_count = instance_count; + args.first_index = first_index; + args.vertex_offset = vertex_offset; + args.first_instance = first_instance; + } + + void CmdVkEndRenderPass() { WriteCommand(Command::kVkEndRenderPass, 0); } + // pNext of all barriers must be null. void CmdVkPipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, @@ -71,11 +162,47 @@ class DeferredCommandBuffer { uint32_t image_memory_barrier_count, const VkImageMemoryBarrier* image_memory_barriers); + void CmdVkSetScissor(uint32_t first_scissor, uint32_t scissor_count, + const VkRect2D* scissors) { + const size_t header_size = + xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D)); + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkSetScissor, + header_size + sizeof(VkRect2D) * scissor_count)); + auto& args = *reinterpret_cast(args_ptr); + args.first_scissor = first_scissor; + args.scissor_count = scissor_count; + std::memcpy(args_ptr + header_size, scissors, + sizeof(VkRect2D) * scissor_count); + } + + void CmdVkSetViewport(uint32_t first_viewport, uint32_t viewport_count, + const VkViewport* viewports) { + const size_t header_size = + xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport)); + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkSetViewport, + header_size + sizeof(VkViewport) * viewport_count)); + auto& args = *reinterpret_cast(args_ptr); + args.first_viewport = first_viewport; + args.viewport_count = viewport_count; + std::memcpy(args_ptr + header_size, viewports, + sizeof(VkViewport) * viewport_count); + } + private: enum class Command { + kVkBeginRenderPass, + kVkBindDescriptorSets, kVkBindIndexBuffer, + kVkBindPipeline, kVkCopyBuffer, + kVkDraw, + kVkDrawIndexed, + kVkEndRenderPass, kVkPipelineBarrier, + kVkSetScissor, + kVkSetViewport, }; struct CommandHeader { @@ -85,17 +212,58 @@ class DeferredCommandBuffer { static constexpr size_t kCommandHeaderSizeElements = (sizeof(CommandHeader) + sizeof(uintmax_t) - 1) / sizeof(uintmax_t); + struct ArgsVkBeginRenderPass { + VkRenderPass render_pass; + VkFramebuffer framebuffer; + VkRect2D render_area; + uint32_t clear_value_count; + VkSubpassContents contents; + // Followed by aligned optional VkClearValue[]. + static_assert(alignof(VkClearValue) <= alignof(uintmax_t)); + }; + + struct ArgsVkBindDescriptorSets { + VkPipelineBindPoint pipeline_bind_point; + VkPipelineLayout layout; + uint32_t first_set; + uint32_t descriptor_set_count; + uint32_t dynamic_offset_count; + // Followed by aligned VkDescriptorSet[], optional uint32_t[]. + static_assert(alignof(VkDescriptorSet) <= alignof(uintmax_t)); + }; + struct ArgsVkBindIndexBuffer { VkBuffer buffer; VkDeviceSize offset; VkIndexType index_type; }; + struct ArgsVkBindPipeline { + VkPipelineBindPoint pipeline_bind_point; + VkPipeline pipeline; + }; + struct ArgsVkCopyBuffer { VkBuffer src_buffer; VkBuffer dst_buffer; uint32_t region_count; - // Followed by VkBufferCopy[]. + // Followed by aligned VkBufferCopy[]. + static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); + }; + + struct ArgsVkDraw { + uint32_t vertex_count; + uint32_t instance_count; + uint32_t first_vertex; + uint32_t first_instance; + }; + + struct ArgsVkDrawIndexed { + uint32_t index_count; + uint32_t instance_count; + uint32_t first_index; + int32_t vertex_offset; + uint32_t first_instance; }; struct ArgsVkPipelineBarrier { @@ -105,8 +273,25 @@ class DeferredCommandBuffer { uint32_t memory_barrier_count; uint32_t buffer_memory_barrier_count; uint32_t image_memory_barrier_count; - // Followed by aligned VkMemoryBarrier[], VkBufferMemoryBarrier[], - // VkImageMemoryBarrier[]. + // Followed by aligned optional VkMemoryBarrier[], + // optional VkBufferMemoryBarrier[], optional VkImageMemoryBarrier[]. + static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t)); + static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t)); + static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); + }; + + struct ArgsVkSetScissor { + uint32_t first_scissor; + uint32_t scissor_count; + // Followed by aligned VkRect2D[]. + static_assert(alignof(VkRect2D) <= alignof(uintmax_t)); + }; + + struct ArgsVkSetViewport { + uint32_t first_viewport; + uint32_t viewport_count; + // Followed by aligned VkViewport[]. + static_assert(alignof(VkViewport) <= alignof(uintmax_t)); }; void* WriteCommand(Command command, size_t arguments_size_bytes); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 7b895f48c..434ce0d02 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -9,15 +9,24 @@ #include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include #include +#include #include #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/registers.h" +#include "xenia/gpu/shader.h" #include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" +#include "xenia/gpu/vulkan/vulkan_render_target_cache.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" +#include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/vulkan_context.h" #include "xenia/ui/vulkan/vulkan_provider.h" #include "xenia/ui/vulkan/vulkan_util.h" @@ -54,6 +63,16 @@ bool VulkanCommandProcessor::SetupContext() { transient_descriptor_pool_uniform_buffers_ = std::make_unique( provider, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 32768, 32768); + // 16384 is bigger than any single uniform buffer that Xenia needs, but is the + // minimum maxUniformBufferRange, thus the safe minimum amount. + VkDeviceSize uniform_buffer_alignment = std::max( + provider.device_properties().limits.minUniformBufferOffsetAlignment, + VkDeviceSize(1)); + uniform_buffer_pool_ = std::make_unique( + provider, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + xe::align(std::max(ui::GraphicsUploadBufferPool::kDefaultPageSize, + size_t(16384)), + size_t(uniform_buffer_alignment))); VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; descriptor_set_layout_create_info.sType = @@ -162,6 +181,20 @@ bool VulkanCommandProcessor::SetupContext() { return false; } + render_target_cache_ = + std::make_unique(*this, *register_file_); + if (!render_target_cache_->Initialize()) { + XELOGE("Failed to initialize the render target cache"); + return false; + } + + pipeline_cache_ = std::make_unique( + *this, *register_file_, *render_target_cache_); + if (!pipeline_cache_->Initialize()) { + XELOGE("Failed to initialize the graphics pipeline cache"); + return false; + } + // Shared memory and EDRAM common bindings. VkDescriptorPoolSize descriptor_pool_sizes[1]; descriptor_pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; @@ -229,6 +262,9 @@ bool VulkanCommandProcessor::SetupContext() { // interlocks case. dfn.vkUpdateDescriptorSets(device, 1, write_descriptor_sets, 0, nullptr); + // Just not to expose uninitialized memory. + std::memset(&system_constants_, 0, sizeof(system_constants_)); + return true; } @@ -244,6 +280,10 @@ void VulkanCommandProcessor::ShutdownContext() { dfn.vkDestroyDescriptorPool, device, shared_memory_and_edram_descriptor_pool_); + pipeline_cache_.reset(); + + render_target_cache_.reset(); + shared_memory_.reset(); for (const auto& pipeline_layout_pair : pipeline_layouts_) { @@ -276,6 +316,7 @@ void VulkanCommandProcessor::ShutdownContext() { ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, device, descriptor_set_layout_empty_); + uniform_buffer_pool_.reset(); transient_descriptor_pool_uniform_buffers_.reset(); sparse_bind_wait_stage_mask_ = 0; @@ -325,6 +366,42 @@ void VulkanCommandProcessor::ShutdownContext() { CommandProcessor::ShutdownContext(); } +void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) { + CommandProcessor::WriteRegister(index, value); + + if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X && + index <= XE_GPU_REG_SHADER_CONSTANT_511_W) { + if (frame_open_) { + uint32_t float_constant_index = + (index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2; + if (float_constant_index >= 256) { + float_constant_index -= 256; + if (current_float_constant_map_pixel_[float_constant_index >> 6] & + (1ull << (float_constant_index & 63))) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel); + } + } else { + if (current_float_constant_map_vertex_[float_constant_index >> 6] & + (1ull << (float_constant_index & 63))) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex); + } + } + } + } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && + index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { + current_graphics_descriptor_set_values_up_to_date_ &= ~( + uint32_t(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants); + } else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 && + index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants); + } +} + void VulkanCommandProcessor::SparseBindBuffer( VkBuffer buffer, uint32_t bind_count, const VkSparseMemoryBind* binds, VkPipelineStageFlags wait_stage_mask) { @@ -356,17 +433,25 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, EndSubmission(true); } -bool VulkanCommandProcessor::GetPipelineLayout( - uint32_t texture_count_pixel, uint32_t texture_count_vertex, - PipelineLayout& pipeline_layout_out) { +void VulkanCommandProcessor::EndRenderPass() { + assert_true(submission_open_); + if (current_render_pass_ == VK_NULL_HANDLE) { + return; + } + deferred_command_buffer_.CmdVkEndRenderPass(); + current_render_pass_ = VK_NULL_HANDLE; +} + +const VulkanPipelineCache::PipelineLayoutProvider* +VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, + uint32_t texture_count_vertex) { PipelineLayoutKey pipeline_layout_key; pipeline_layout_key.texture_count_pixel = texture_count_pixel; pipeline_layout_key.texture_count_vertex = texture_count_vertex; { auto it = pipeline_layouts_.find(pipeline_layout_key.key); if (it != pipeline_layouts_.end()) { - pipeline_layout_out = it->second; - return true; + return &it->second; } } @@ -462,26 +547,28 @@ bool VulkanCommandProcessor::GetPipelineLayout( VkDescriptorSetLayout descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetCount]; - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] = - descriptor_set_layout_fetch_bool_loop_constants_; - descriptor_set_layouts - [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = - descriptor_set_layout_float_constants_vertex_; - descriptor_set_layouts - [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = - descriptor_set_layout_float_constants_pixel_; - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] = - descriptor_set_layout_textures_pixel; - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] = - descriptor_set_layout_textures_vertex; - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] = - descriptor_set_layout_system_constants_; - descriptor_set_layouts - [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = - descriptor_set_layout_fetch_bool_loop_constants_; + // Immutable layouts. descriptor_set_layouts [SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] = descriptor_set_layout_shared_memory_and_edram_; + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = + descriptor_set_layout_fetch_bool_loop_constants_; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] = + descriptor_set_layout_system_constants_; + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = + descriptor_set_layout_float_constants_pixel_; + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = + descriptor_set_layout_float_constants_vertex_; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] = + descriptor_set_layout_fetch_bool_loop_constants_; + // Mutable layouts. + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] = + descriptor_set_layout_textures_vertex; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] = + descriptor_set_layout_textures_pixel; VkPipelineLayoutCreateInfo pipeline_layout_create_info; pipeline_layout_create_info.sType = @@ -508,16 +595,18 @@ bool VulkanCommandProcessor::GetPipelineLayout( descriptor_set_layout_textures_pixel; pipeline_layout_entry.descriptor_set_layout_textures_vertex_ref = descriptor_set_layout_textures_vertex; - pipeline_layouts_.emplace(pipeline_layout_key.key, pipeline_layout_entry); - pipeline_layout_out = pipeline_layout_entry; - return true; + auto emplaced_pair = + pipeline_layouts_.emplace(pipeline_layout_key.key, pipeline_layout_entry); + // unordered_map insertion doesn't invalidate element references. + return &emplaced_pair.first->second; } Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) { - return nullptr; + return pipeline_cache_->LoadShader(shader_type, guest_address, host_address, + dword_count); } bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, @@ -530,9 +619,135 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, BeginSubmission(true); + auto vertex_shader = static_cast(active_vertex_shader()); + if (!vertex_shader) { + // Always need a vertex shader. + return false; + } + // TODO(Triang3l): Get a pixel shader. + VulkanShader* pixel_shader = nullptr; + + VulkanRenderTargetCache::FramebufferKey framebuffer_key; + if (!render_target_cache_->UpdateRenderTargets(framebuffer_key)) { + return false; + } + VkFramebuffer framebuffer = + render_target_cache_->GetFramebuffer(framebuffer_key); + if (framebuffer == VK_NULL_HANDLE) { + return false; + } + VkRenderPass render_pass = + render_target_cache_->GetRenderPass(framebuffer_key.render_pass_key); + if (render_pass == VK_NULL_HANDLE) { + return false; + } + + // Update the graphics pipeline, and if the new graphics pipeline has a + // different layout, invalidate incompatible descriptor sets before updating + // current_graphics_pipeline_layout_. + VkPipeline pipeline; + const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; + if (!pipeline_cache_->ConfigurePipeline(vertex_shader, pixel_shader, + framebuffer_key.render_pass_key, + pipeline, pipeline_layout_provider)) { + return false; + } + deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline); + auto pipeline_layout = + static_cast(pipeline_layout_provider); + if (current_graphics_pipeline_layout_ != pipeline_layout) { + if (current_graphics_pipeline_layout_) { + // Keep descriptor set layouts for which the new pipeline layout is + // compatible with the previous one (pipeline layouts are compatible for + // set N if set layouts 0 through N are compatible). + uint32_t descriptor_sets_kept = + uint32_t(SpirvShaderTranslator::kDescriptorSetCount); + if (current_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_vertex_ref != + pipeline_layout->descriptor_set_layout_textures_vertex_ref) { + descriptor_sets_kept = std::min( + descriptor_sets_kept, + uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesVertex)); + } + if (current_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_pixel_ref != + pipeline_layout->descriptor_set_layout_textures_pixel_ref) { + descriptor_sets_kept = std::min( + descriptor_sets_kept, + uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesPixel)); + } + } else { + // No or unknown pipeline layout previously bound - all bindings are in an + // indeterminate state. + current_graphics_descriptor_sets_bound_up_to_date_ = 0; + } + current_graphics_pipeline_layout_ = pipeline_layout; + } + + // Update fixed-function dynamic state. + UpdateFixedFunctionState(); + bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base; - // Actually draw. + // Update system constants before uploading them. + UpdateSystemConstantValues(indexed ? index_buffer_info->endianness + : xenos::Endian::kNone); + + // Update uniform buffers and descriptor sets after binding the pipeline with + // the new layout. + if (!UpdateBindings(vertex_shader, pixel_shader)) { + return false; + } + + const RegisterFile& regs = *register_file_; + + // Ensure vertex buffers are resident. + // TODO(Triang3l): Cache residency for ranges in a way similar to how texture + // validity is tracked. + uint64_t vertex_buffers_resident[2] = {}; + for (const Shader::VertexBinding& vertex_binding : + vertex_shader->vertex_bindings()) { + uint32_t vfetch_index = vertex_binding.fetch_constant; + if (vertex_buffers_resident[vfetch_index >> 6] & + (uint64_t(1) << (vfetch_index & 63))) { + continue; + } + const auto& vfetch_constant = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); + switch (vfetch_constant.type) { + case xenos::FetchConstantType::kVertex: + break; + case xenos::FetchConstantType::kInvalidVertex: + if (cvars::gpu_allow_invalid_fetch_constants) { + break; + } + XELOGW( + "Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! " + "This " + "is incorrect behavior, but you can try bypassing this by " + "launching Xenia with --gpu_allow_invalid_fetch_constants=true.", + vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); + return false; + default: + XELOGW( + "Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!", + vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); + return false; + } + if (!shared_memory_->RequestRange(vfetch_constant.address << 2, + vfetch_constant.size << 2)) { + XELOGE( + "Failed to request vertex buffer at 0x{:08X} (size {}) in the shared " + "memory", + vfetch_constant.address << 2, vfetch_constant.size << 2); + return false; + } + vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1) + << (vfetch_index & 63); + } + + // Set up the geometry. if (indexed) { uint32_t index_size = index_buffer_info->format == xenos::IndexFormat::kInt32 @@ -557,6 +772,37 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, } shared_memory_->Use(VulkanSharedMemory::Usage::kRead); + // After all commands that may dispatch or copy, enter the render pass before + // drawing. + if (current_render_pass_ != render_pass || + current_framebuffer_ != framebuffer) { + if (current_render_pass_ != VK_NULL_HANDLE) { + deferred_command_buffer_.CmdVkEndRenderPass(); + } + current_render_pass_ = render_pass; + current_framebuffer_ = framebuffer; + VkRenderPassBeginInfo render_pass_begin_info; + render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_begin_info.pNext = nullptr; + render_pass_begin_info.renderPass = render_pass; + render_pass_begin_info.framebuffer = framebuffer; + render_pass_begin_info.renderArea.offset.x = 0; + render_pass_begin_info.renderArea.offset.y = 0; + render_pass_begin_info.renderArea.extent.width = 1280; + render_pass_begin_info.renderArea.extent.height = 720; + render_pass_begin_info.clearValueCount = 0; + render_pass_begin_info.pClearValues = nullptr; + deferred_command_buffer_.CmdVkBeginRenderPass(&render_pass_begin_info, + VK_SUBPASS_CONTENTS_INLINE); + } + + // Draw. + if (indexed) { + deferred_command_buffer_.CmdVkDrawIndexed(index_count, 1, 0, 0, 0); + } else { + deferred_command_buffer_.CmdVkDraw(index_count, 1, 0, 0); + } + return true; } @@ -659,9 +905,6 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) { command_buffers_submitted_.pop_front(); } - // Reclaim descriptor pools. - transient_descriptor_pool_uniform_buffers_->Reclaim(submission_completed_); - shared_memory_->CompletedSubmissionUpdated(); } @@ -705,13 +948,41 @@ void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { submission_open_ = true; // Start a new deferred command buffer - will submit it to the real one in - // the end of the submission (when async pipeline state object creation - // requests are fulfilled). + // the end of the submission (when async pipeline object creation requests + // are fulfilled). deferred_command_buffer_.Reset(); + + // Reset cached state of the command buffer. + ff_viewport_update_needed_ = true; + ff_scissor_update_needed_ = true; + current_render_pass_ = VK_NULL_HANDLE; + current_framebuffer_ = VK_NULL_HANDLE; + current_graphics_pipeline_ = VK_NULL_HANDLE; + current_graphics_pipeline_layout_ = nullptr; + current_graphics_descriptor_sets_bound_up_to_date_ = 0; } if (is_opening_frame) { frame_open_ = true; + + // Reset bindings that depend on transient data. + std::memset(current_float_constant_map_vertex_, 0, + sizeof(current_float_constant_map_vertex_)); + std::memset(current_float_constant_map_pixel_, 0, + sizeof(current_float_constant_map_pixel_)); + std::memset(current_graphics_descriptor_sets_, 0, + sizeof(current_graphics_descriptor_sets_)); + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] = + shared_memory_and_edram_descriptor_set_; + current_graphics_descriptor_set_values_up_to_date_ = + uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram; + + // Reclaim pool pages - no need to do this every small submission since some + // may be reused. + transient_descriptor_pool_uniform_buffers_->Reclaim(frame_completed_); + uniform_buffer_pool_->Reclaim(frame_completed_); } } @@ -784,8 +1055,12 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { bool is_closing_frame = is_swap && frame_open_; if (submission_open_) { + EndRenderPass(); + shared_memory_->EndSubmission(); + uniform_buffer_pool_->FlushWrites(); + // Submit sparse binds earlier, before executing the deferred command // buffer, to reduce latency. if (!sparse_memory_binds_.empty()) { @@ -910,13 +1185,30 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { if (cache_clear_requested_ && AwaitAllQueueOperationsCompletion()) { cache_clear_requested_ = false; - transient_descriptor_pool_uniform_buffers_->ClearCache(); - assert_true(command_buffers_submitted_.empty()); for (const CommandBuffer& command_buffer : command_buffers_writable_) { dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr); } command_buffers_writable_.clear(); + + uniform_buffer_pool_->ClearCache(); + transient_descriptor_pool_uniform_buffers_->ClearCache(); + + pipeline_cache_->ClearCache(); + + render_target_cache_->ClearCache(); + + for (const auto& pipeline_layout_pair : pipeline_layouts_) { + dfn.vkDestroyPipelineLayout( + device, pipeline_layout_pair.second.pipeline_layout, nullptr); + } + pipeline_layouts_.clear(); + for (const auto& descriptor_set_layout_pair : + descriptor_set_layouts_textures_) { + dfn.vkDestroyDescriptorSetLayout( + device, descriptor_set_layout_pair.second, nullptr); + } + descriptor_set_layouts_textures_.clear(); } } @@ -936,6 +1228,441 @@ VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags() return stages; } +void VulkanCommandProcessor::UpdateFixedFunctionState() { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + const RegisterFile& regs = *register_file_; + + // Window parameters. + // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h + // See r200UpdateWindow: + // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c + auto pa_sc_window_offset = regs.Get(); + + uint32_t pixel_size_x = 1, pixel_size_y = 1; + + // Viewport. + // PA_CL_VTE_CNTL contains whether offsets and scales are enabled. + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + // In games, either all are enabled (for regular drawing) or none are (for + // rectangle lists usually). + // + // If scale/offset is enabled, the Xenos shader is writing (neglecting W + // division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1) + // box. If it's not, the position is in screen space. Since we can only use + // the NDC in PC APIs, we use a viewport of the largest possible size, and + // divide the position by it in translated shaders. + // + // TODO(Triang3l): Move all of this to draw_util. + // TODO(Triang3l): Limit the viewport if exceeding the device limit; move to + // NDC scale/offset constants. + auto pa_cl_vte_cntl = regs.Get(); + float viewport_scale_x = + pa_cl_vte_cntl.vport_x_scale_ena + ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32) + : 4096.0f; + float viewport_scale_y = + pa_cl_vte_cntl.vport_y_scale_ena + ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) + : 4096.0f; + float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 + : 1.0f; + float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 + : std::abs(viewport_scale_x); + float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 + : std::abs(viewport_scale_y); + float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 + : 0.0f; + if (regs.Get().vtx_window_offset_enable) { + viewport_offset_x += float(pa_sc_window_offset.window_x_offset); + viewport_offset_y += float(pa_sc_window_offset.window_y_offset); + } + VkViewport viewport; + viewport.x = (viewport_offset_x - viewport_scale_x) * float(pixel_size_x); + viewport.y = (viewport_offset_y - viewport_scale_y) * float(pixel_size_y); + viewport.width = viewport_scale_x * 2.0f * float(pixel_size_x); + viewport.height = viewport_scale_y * 2.0f * float(pixel_size_y); + viewport.minDepth = std::min(std::max(viewport_offset_z, 0.0f), 1.0f); + viewport.maxDepth = + std::min(std::max(viewport_offset_z + viewport_scale_z, 0.0f), 1.0f); + ff_viewport_update_needed_ |= ff_viewport_.x != viewport.x; + ff_viewport_update_needed_ |= ff_viewport_.y != viewport.y; + ff_viewport_update_needed_ |= ff_viewport_.width != viewport.width; + ff_viewport_update_needed_ |= ff_viewport_.height != viewport.height; + ff_viewport_update_needed_ |= ff_viewport_.minDepth != viewport.minDepth; + ff_viewport_update_needed_ |= ff_viewport_.maxDepth != viewport.maxDepth; + if (ff_viewport_update_needed_) { + ff_viewport_ = viewport; + deferred_command_buffer_.CmdVkSetViewport(0, 1, &viewport); + ff_viewport_update_needed_ = false; + } + + // Scissor. + // TODO(Triang3l): Move all of this to draw_util. + // TODO(Triang3l): Limit the scissor if exceeding the device limit. + auto pa_sc_window_scissor_tl = regs.Get(); + auto pa_sc_window_scissor_br = regs.Get(); + VkRect2D scissor; + scissor.offset.x = int32_t(pa_sc_window_scissor_tl.tl_x); + scissor.offset.y = int32_t(pa_sc_window_scissor_tl.tl_y); + int32_t scissor_br_x = + std::max(int32_t(pa_sc_window_scissor_br.br_x), scissor.offset.x); + int32_t scissor_br_y = + std::max(int32_t(pa_sc_window_scissor_br.br_y), scissor.offset.y); + if (!pa_sc_window_scissor_tl.window_offset_disable) { + scissor.offset.x = std::max( + scissor.offset.x + pa_sc_window_offset.window_x_offset, int32_t(0)); + scissor.offset.y = std::max( + scissor.offset.y + pa_sc_window_offset.window_y_offset, int32_t(0)); + scissor_br_x = std::max(scissor_br_x + pa_sc_window_offset.window_x_offset, + int32_t(0)); + scissor_br_y = std::max(scissor_br_y + pa_sc_window_offset.window_y_offset, + int32_t(0)); + } + scissor.extent.width = uint32_t(scissor_br_x - scissor.offset.x); + scissor.extent.height = uint32_t(scissor_br_y - scissor.offset.y); + scissor.offset.x *= pixel_size_x; + scissor.offset.y *= pixel_size_y; + scissor.extent.width *= pixel_size_x; + scissor.extent.height *= pixel_size_y; + ff_scissor_update_needed_ |= ff_scissor_.offset.x != scissor.offset.x; + ff_scissor_update_needed_ |= ff_scissor_.offset.y != scissor.offset.y; + ff_scissor_update_needed_ |= ff_scissor_.extent.width != scissor.extent.width; + ff_scissor_update_needed_ |= + ff_scissor_.extent.height != scissor.extent.height; + if (ff_scissor_update_needed_) { + ff_scissor_ = scissor; + deferred_command_buffer_.CmdVkSetScissor(0, 1, &scissor); + ff_scissor_update_needed_ = false; + } +} + +void VulkanCommandProcessor::UpdateSystemConstantValues( + xenos::Endian index_endian) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + const RegisterFile& regs = *register_file_; + int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); + + bool dirty = false; + + // Index or tessellation edge factor buffer endianness. + dirty |= system_constants_.vertex_index_endian != index_endian; + system_constants_.vertex_index_endian = index_endian; + + // Vertex index offset. + dirty |= system_constants_.vertex_base_index != vgt_indx_offset; + system_constants_.vertex_base_index = vgt_indx_offset; + + if (dirty) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants); + } +} + +bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, + const VulkanShader* pixel_shader) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + const RegisterFile& regs = *register_file_; + + // Invalidate descriptors for changed data. + // These are the constant base addresses/ranges for shaders. + // We have these hardcoded right now cause nothing seems to differ on the Xbox + // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). + assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 || + regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); + assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || + regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + // Check if the float constant layout is still the same and get the counts. + const Shader::ConstantRegisterMap& float_constant_map_vertex = + vertex_shader->constant_register_map(); + uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count; + for (uint32_t i = 0; i < 4; ++i) { + if (current_float_constant_map_vertex_[i] != + float_constant_map_vertex.float_bitmap[i]) { + current_float_constant_map_vertex_[i] = + float_constant_map_vertex.float_bitmap[i]; + // If no float constants at all, any buffer can be reused for them, so not + // invalidating. + if (float_constant_count_vertex) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~( + uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex); + } + } + } + uint32_t float_constant_count_pixel = 0; + if (pixel_shader != nullptr) { + const Shader::ConstantRegisterMap& float_constant_map_pixel = + pixel_shader->constant_register_map(); + float_constant_count_pixel = float_constant_map_pixel.float_count; + for (uint32_t i = 0; i < 4; ++i) { + if (current_float_constant_map_pixel_[i] != + float_constant_map_pixel.float_bitmap[i]) { + current_float_constant_map_pixel_[i] = + float_constant_map_pixel.float_bitmap[i]; + if (float_constant_count_pixel) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel); + } + } + } + } else { + std::memset(current_float_constant_map_pixel_, 0, + sizeof(current_float_constant_map_pixel_)); + } + + // Make sure new descriptor sets are bound to the command buffer. + current_graphics_descriptor_sets_bound_up_to_date_ &= + current_graphics_descriptor_set_values_up_to_date_; + + // Write the new descriptor sets. + VkWriteDescriptorSet + write_descriptor_sets[SpirvShaderTranslator::kDescriptorSetCount]; + uint32_t write_descriptor_set_count = 0; + uint32_t write_descriptor_set_bits = 0; + assert_not_zero( + current_graphics_descriptor_set_values_up_to_date_ & + (uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram)); + VkDescriptorBufferInfo buffer_info_bool_loop_constants; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants))) { + VkWriteDescriptorSet& write_bool_loop_constants = + write_descriptor_sets[write_descriptor_set_count++]; + constexpr size_t kBoolLoopConstantsSize = sizeof(uint32_t) * (8 + 32); + uint8_t* mapping_bool_loop_constants = WriteUniformBufferBinding( + kBoolLoopConstantsSize, + descriptor_set_layout_fetch_bool_loop_constants_, + buffer_info_bool_loop_constants, write_bool_loop_constants); + if (!mapping_bool_loop_constants) { + return false; + } + std::memcpy(mapping_bool_loop_constants, + ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, + kBoolLoopConstantsSize); + write_descriptor_set_bits |= + uint32_t(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = + write_bool_loop_constants.dstSet; + } + VkDescriptorBufferInfo buffer_info_system_constants; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetSystemConstants))) { + VkWriteDescriptorSet& write_system_constants = + write_descriptor_sets[write_descriptor_set_count++]; + uint8_t* mapping_system_constants = WriteUniformBufferBinding( + sizeof(SpirvShaderTranslator::SystemConstants), + descriptor_set_layout_system_constants_, buffer_info_system_constants, + write_system_constants); + if (!mapping_system_constants) { + return false; + } + std::memcpy(mapping_system_constants, &system_constants_, + sizeof(SpirvShaderTranslator::SystemConstants)); + write_descriptor_set_bits |= + uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetSystemConstants] = + write_system_constants.dstSet; + } + VkDescriptorBufferInfo buffer_info_float_constant_pixel; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel))) { + // Even if the shader doesn't need any float constants, a valid binding must + // still be provided (the pipeline layout always has float constants, for + // both the vertex shader and the pixel shader), so if the first draw in the + // frame doesn't have float constants at all, still allocate an empty + // buffer. + VkWriteDescriptorSet& write_float_constants_pixel = + write_descriptor_sets[write_descriptor_set_count++]; + uint8_t* mapping_float_constants_pixel = WriteUniformBufferBinding( + sizeof(float) * 4 * std::max(float_constant_count_pixel, uint32_t(1)), + descriptor_set_layout_float_constants_pixel_, + buffer_info_float_constant_pixel, write_float_constants_pixel); + if (!mapping_float_constants_pixel) { + return false; + } + for (uint32_t i = 0; i < 4; ++i) { + uint64_t float_constant_map_entry = current_float_constant_map_pixel_[i]; + uint32_t float_constant_index; + while (xe::bit_scan_forward(float_constant_map_entry, + &float_constant_index)) { + float_constant_map_entry &= ~(1ull << float_constant_index); + std::memcpy(mapping_float_constants_pixel, + ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + + (float_constant_index << 2)] + .f32, + sizeof(float) * 4); + mapping_float_constants_pixel += sizeof(float) * 4; + } + } + write_descriptor_set_bits |= + uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = + write_float_constants_pixel.dstSet; + } + VkDescriptorBufferInfo buffer_info_float_constant_vertex; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex))) { + VkWriteDescriptorSet& write_float_constants_vertex = + write_descriptor_sets[write_descriptor_set_count++]; + uint8_t* mapping_float_constants_vertex = WriteUniformBufferBinding( + sizeof(float) * 4 * std::max(float_constant_count_vertex, uint32_t(1)), + descriptor_set_layout_float_constants_vertex_, + buffer_info_float_constant_vertex, write_float_constants_vertex); + if (!mapping_float_constants_vertex) { + return false; + } + for (uint32_t i = 0; i < 4; ++i) { + uint64_t float_constant_map_entry = current_float_constant_map_vertex_[i]; + uint32_t float_constant_index; + while (xe::bit_scan_forward(float_constant_map_entry, + &float_constant_index)) { + float_constant_map_entry &= ~(1ull << float_constant_index); + std::memcpy(mapping_float_constants_vertex, + ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + + (float_constant_index << 2)] + .f32, + sizeof(float) * 4); + mapping_float_constants_vertex += sizeof(float) * 4; + } + } + write_descriptor_set_bits |= + uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = + write_float_constants_vertex.dstSet; + } + VkDescriptorBufferInfo buffer_info_fetch_constants; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants))) { + VkWriteDescriptorSet& write_fetch_constants = + write_descriptor_sets[write_descriptor_set_count++]; + constexpr size_t kFetchConstantsSize = sizeof(uint32_t) * 6 * 32; + uint8_t* mapping_fetch_constants = WriteUniformBufferBinding( + kFetchConstantsSize, descriptor_set_layout_fetch_bool_loop_constants_, + buffer_info_fetch_constants, write_fetch_constants); + if (!mapping_fetch_constants) { + return false; + } + std::memcpy(mapping_fetch_constants, + ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, + kFetchConstantsSize); + write_descriptor_set_bits |= + uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetFetchConstants] = + write_fetch_constants.dstSet; + } + if (write_descriptor_set_count) { + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + dfn.vkUpdateDescriptorSets(device, write_descriptor_set_count, + write_descriptor_sets, 0, nullptr); + } + // Only make valid if written successfully. + current_graphics_descriptor_set_values_up_to_date_ |= + write_descriptor_set_bits; + + // Bind the new descriptor sets. + uint32_t descriptor_sets_needed = + (uint32_t(1) << SpirvShaderTranslator::kDescriptorSetCount) - 1; + if (current_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_vertex_ref == + descriptor_set_layout_empty_) { + descriptor_sets_needed &= + ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex); + } + if (current_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_pixel_ref == + descriptor_set_layout_empty_) { + descriptor_sets_needed &= + ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel); + } + uint32_t descriptor_sets_remaining = + descriptor_sets_needed & + ~current_graphics_descriptor_sets_bound_up_to_date_; + uint32_t descriptor_set_index; + while ( + xe::bit_scan_forward(descriptor_sets_remaining, &descriptor_set_index)) { + uint32_t descriptor_set_mask_tzcnt = + xe::tzcnt(~(descriptor_sets_remaining | + ((uint32_t(1) << descriptor_set_index) - 1))); + // TODO(Triang3l): Bind to compute for rectangle list emulation without + // geometry shaders. + deferred_command_buffer_.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, + current_graphics_pipeline_layout_->pipeline_layout, + descriptor_set_index, descriptor_set_mask_tzcnt - descriptor_set_index, + current_graphics_descriptor_sets_ + descriptor_set_index, 0, nullptr); + if (descriptor_set_mask_tzcnt >= 32) { + break; + } + descriptor_sets_remaining &= + ~((uint32_t(1) << descriptor_set_mask_tzcnt) - 1); + } + current_graphics_descriptor_sets_bound_up_to_date_ |= descriptor_sets_needed; + + return true; +} + +uint8_t* VulkanCommandProcessor::WriteUniformBufferBinding( + size_t size, VkDescriptorSetLayout descriptor_set_layout, + VkDescriptorBufferInfo& descriptor_buffer_info_out, + VkWriteDescriptorSet& write_descriptor_set_out) { + VkDescriptorSet descriptor_set = + transient_descriptor_pool_uniform_buffers_->Request( + frame_current_, descriptor_set_layout, 1); + if (descriptor_set == VK_NULL_HANDLE) { + return nullptr; + } + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, size, + size_t( + provider.device_properties().limits.minUniformBufferOffsetAlignment), + descriptor_buffer_info_out.buffer, descriptor_buffer_info_out.offset); + if (!mapping) { + return false; + } + descriptor_buffer_info_out.range = VkDeviceSize(size); + write_descriptor_set_out.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_out.pNext = nullptr; + write_descriptor_set_out.dstSet = descriptor_set; + write_descriptor_set_out.dstBinding = 0; + write_descriptor_set_out.dstArrayElement = 0; + write_descriptor_set_out.descriptorCount = 1; + write_descriptor_set_out.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + write_descriptor_set_out.pImageInfo = nullptr; + write_descriptor_set_out.pBufferInfo = &descriptor_buffer_info_out; + write_descriptor_set_out.pTexelBufferView = nullptr; + return mapping; +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 90df3f39b..a7283d56f 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -10,6 +10,7 @@ #ifndef XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ #define XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ +#include #include #include #include @@ -18,13 +19,18 @@ #include #include "xenia/gpu/command_processor.h" +#include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_graphics_system.h" +#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" +#include "xenia/gpu/vulkan/vulkan_render_target_cache.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" #include "xenia/ui/vulkan/transient_descriptor_pool.h" #include "xenia/ui/vulkan/vulkan_context.h" +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" namespace xe { namespace gpu { @@ -67,19 +73,21 @@ class VulkanCommandProcessor : public CommandProcessor { const VkSparseMemoryBind* binds, VkPipelineStageFlags wait_stage_mask); - struct PipelineLayout { - VkPipelineLayout pipeline_layout; - VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref; - VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref; - }; - bool GetPipelineLayout(uint32_t texture_count_pixel, - uint32_t texture_count_vertex, - PipelineLayout& pipeline_layout_out); + // Must be called before doing anything outside the render pass scope, + // including adding pipeline barriers that are not a part of the render pass + // scope. Submission must be open. + void EndRenderPass(); + + // The returned reference is valid until a cache clear. + const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout( + uint32_t texture_count_pixel, uint32_t texture_count_vertex); protected: bool SetupContext() override; void ShutdownContext() override; + void WriteRegister(uint32_t index, uint32_t value) override; + void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) override; @@ -95,6 +103,49 @@ class VulkanCommandProcessor : public CommandProcessor { void InitializeTrace() override; private: + struct CommandBuffer { + VkCommandPool pool; + VkCommandBuffer buffer; + }; + + struct SparseBufferBind { + VkBuffer buffer; + size_t bind_offset; + uint32_t bind_count; + }; + + union TextureDescriptorSetLayoutKey { + struct { + uint32_t is_vertex : 1; + // For 0, use descriptor_set_layout_empty_ instead as these are owning + // references. + uint32_t texture_count : 31; + }; + uint32_t key = 0; + }; + static_assert(sizeof(TextureDescriptorSetLayoutKey) == sizeof(uint32_t)); + + union PipelineLayoutKey { + struct { + // Pixel textures in the low bits since those are varied much more + // commonly. + uint32_t texture_count_pixel : 16; + uint32_t texture_count_vertex : 16; + }; + uint32_t key = 0; + }; + static_assert(sizeof(PipelineLayoutKey) == sizeof(uint32_t)); + + class PipelineLayout : public VulkanPipelineCache::PipelineLayoutProvider { + public: + VkPipelineLayout GetPipelineLayout() const override { + return pipeline_layout; + } + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref; + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref; + }; + // BeginSubmission and EndSubmission may be called at any time. If there's an // open non-frame submission, BeginSubmission(true) will promote it to a // frame. EndSubmission(true) will close the frame no matter whether the @@ -119,6 +170,18 @@ class VulkanCommandProcessor : public CommandProcessor { VkShaderStageFlags GetGuestVertexShaderStageFlags() const; + void UpdateFixedFunctionState(); + void UpdateSystemConstantValues(xenos::Endian index_endian); + bool UpdateBindings(const VulkanShader* vertex_shader, + const VulkanShader* pixel_shader); + // Allocates a descriptor, space in the uniform buffer pool, and fills the + // VkWriteDescriptorSet structure and VkDescriptorBufferInfo referenced by it. + // Returns null in case of failure. + uint8_t* WriteUniformBufferBinding( + size_t size, VkDescriptorSetLayout descriptor_set_layout, + VkDescriptorBufferInfo& descriptor_buffer_info_out, + VkWriteDescriptorSet& write_descriptor_set_out); + bool cache_clear_requested_ = false; std::vector fences_free_; @@ -143,20 +206,11 @@ class VulkanCommandProcessor : public CommandProcessor { // Submission indices of frames that have already been submitted. uint64_t closed_frame_submissions_[kMaxFramesInFlight] = {}; - struct CommandBuffer { - VkCommandPool pool; - VkCommandBuffer buffer; - }; std::vector command_buffers_writable_; std::deque> command_buffers_submitted_; DeferredCommandBuffer deferred_command_buffer_; std::vector sparse_memory_binds_; - struct SparseBufferBind { - VkBuffer buffer; - size_t bind_offset; - uint32_t bind_count; - }; std::vector sparse_buffer_binds_; // SparseBufferBind converted to VkSparseBufferMemoryBindInfo to this buffer // on submission (because pBinds should point to a place in std::vector, but @@ -166,6 +220,7 @@ class VulkanCommandProcessor : public CommandProcessor { std::unique_ptr transient_descriptor_pool_uniform_buffers_; + std::unique_ptr uniform_buffer_pool_; // Descriptor set layouts used by different shaders. VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE; @@ -180,34 +235,66 @@ class VulkanCommandProcessor : public CommandProcessor { VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ = VK_NULL_HANDLE; - union TextureDescriptorSetLayoutKey { - struct { - uint32_t is_vertex : 1; - // For 0, use descriptor_set_layout_empty_ instead as these are owning - // references. - uint32_t texture_count : 31; - }; - uint32_t key = 0; - }; // TextureDescriptorSetLayoutKey::key -> VkDescriptorSetLayout. + // Layouts are referenced by pipeline_layouts_. std::unordered_map descriptor_set_layouts_textures_; - union PipelineLayoutKey { - struct { - // Pixel textures in the low bits since those are varied much more - // commonly. - uint32_t texture_count_pixel : 16; - uint32_t texture_count_vertex : 16; - }; - uint32_t key = 0; - }; // PipelineLayoutKey::key -> PipelineLayout. + // Layouts are referenced by VulkanPipelineCache. std::unordered_map pipeline_layouts_; std::unique_ptr shared_memory_; + std::unique_ptr pipeline_cache_; + + std::unique_ptr render_target_cache_; + VkDescriptorPool shared_memory_and_edram_descriptor_pool_ = VK_NULL_HANDLE; VkDescriptorSet shared_memory_and_edram_descriptor_set_; + + // The current fixed-function drawing state. + VkViewport ff_viewport_; + VkRect2D ff_scissor_; + bool ff_viewport_update_needed_; + bool ff_scissor_update_needed_; + + // Cache render pass currently started in the command buffer with framebuffer. + VkRenderPass current_render_pass_; + VkFramebuffer current_framebuffer_; + + // Cache graphics pipeline currently bound to the command buffer. + VkPipeline current_graphics_pipeline_; + + // Pipeline layout of the current graphics pipeline. + const PipelineLayout* current_graphics_pipeline_layout_; + VkDescriptorSet current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetCount]; + // Whether descriptor sets in current_graphics_descriptor_sets_ point to + // up-to-date data. + uint32_t current_graphics_descriptor_set_values_up_to_date_; + // Whether the descriptor sets currently bound to the command buffer - only + // low bits for the descriptor set layouts that remained the same are kept + // when changing the pipeline layout. May be out of sync with + // current_graphics_descriptor_set_values_up_to_date_, but should be ensured + // to be a subset of it at some point when it becomes important; bits for + // non-existent descriptor set layouts may also be set, but need to be ignored + // when they start to matter. + uint32_t current_graphics_descriptor_sets_bound_up_to_date_; + static_assert( + SpirvShaderTranslator::kDescriptorSetCount <= + sizeof(current_graphics_descriptor_set_values_up_to_date_) * CHAR_BIT, + "Bit fields storing descriptor set validity must be large enough"); + static_assert( + SpirvShaderTranslator::kDescriptorSetCount <= + sizeof(current_graphics_descriptor_sets_bound_up_to_date_) * CHAR_BIT, + "Bit fields storing descriptor set validity must be large enough"); + + // Float constant usage masks of the last draw call. + uint64_t current_float_constant_map_vertex_[4]; + uint64_t current_float_constant_map_pixel_[4]; + + // System shader constants. + SpirvShaderTranslator::SystemConstants system_constants_; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc new file mode 100644 index 000000000..8db426857 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -0,0 +1,443 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" + +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/registers.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/gpu/xenos.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanPipelineCache::VulkanPipelineCache( + VulkanCommandProcessor& command_processor, + const RegisterFile& register_file, + VulkanRenderTargetCache& render_target_cache) + : command_processor_(command_processor), + register_file_(register_file), + render_target_cache_(render_target_cache) {} + +VulkanPipelineCache::~VulkanPipelineCache() { Shutdown(); } + +bool VulkanPipelineCache::Initialize() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + + device_pipeline_features_.features = 0; + // TODO(Triang3l): Support the portability subset. + device_pipeline_features_.triangle_fans = 1; + + shader_translator_ = std::make_unique( + SpirvShaderTranslator::Features(provider)); + + return true; +} + +void VulkanPipelineCache::Shutdown() { + ClearCache(); + + shader_translator_.reset(); +} + +void VulkanPipelineCache::ClearCache() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + last_pipeline_ = nullptr; + for (const auto& pipeline_pair : pipelines_) { + if (pipeline_pair.second.pipeline != VK_NULL_HANDLE) { + dfn.vkDestroyPipeline(device, pipeline_pair.second.pipeline, nullptr); + } + } + pipelines_.clear(); + + for (auto it : shaders_) { + delete it.second; + } + shaders_.clear(); +} + +VulkanShader* VulkanPipelineCache::LoadShader(xenos::ShaderType shader_type, + uint32_t guest_address, + const uint32_t* host_address, + uint32_t dword_count) { + // Hash the input memory and lookup the shader. + uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); + auto it = shaders_.find(data_hash); + if (it != shaders_.end()) { + // Shader has been previously loaded. + return it->second; + } + + // Always create the shader and stash it away. + // We need to track it even if it fails translation so we know not to try + // again. + VulkanShader* shader = + new VulkanShader(shader_type, data_hash, host_address, dword_count); + shaders_.emplace(data_hash, shader); + + return shader; +} + +bool VulkanPipelineCache::EnsureShadersTranslated( + VulkanShader* vertex_shader, VulkanShader* pixel_shader, + Shader::HostVertexShaderType host_vertex_shader_type) { + const RegisterFile& regs = register_file_; + auto sq_program_cntl = regs.Get(); + + // Edge flags are not supported yet (because polygon primitives are not). + assert_true(sq_program_cntl.vs_export_mode != + xenos::VertexShaderExportMode::kPosition2VectorsEdge && + sq_program_cntl.vs_export_mode != + xenos::VertexShaderExportMode::kPosition2VectorsEdgeKill); + assert_false(sq_program_cntl.gen_index_vtx); + + if (!vertex_shader->is_translated()) { + if (!TranslateShader(*shader_translator_, *vertex_shader, + sq_program_cntl)) { + XELOGE("Failed to translate the vertex shader!"); + return false; + } + } + + if (pixel_shader != nullptr && !pixel_shader->is_translated()) { + if (!TranslateShader(*shader_translator_, *pixel_shader, sq_program_cntl)) { + XELOGE("Failed to translate the pixel shader!"); + return false; + } + } + + return true; +} + +bool VulkanPipelineCache::ConfigurePipeline( + VulkanShader* vertex_shader, VulkanShader* pixel_shader, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + VkPipeline& pipeline_out, + const PipelineLayoutProvider*& pipeline_layout_out) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + PipelineDescription description; + if (!GetCurrentStateDescription(vertex_shader, pixel_shader, render_pass_key, + description)) { + return false; + } + if (last_pipeline_ && last_pipeline_->first == description) { + pipeline_out = last_pipeline_->second.pipeline; + pipeline_layout_out = last_pipeline_->second.pipeline_layout; + return true; + } + auto it = pipelines_.find(description); + if (it != pipelines_.end()) { + last_pipeline_ = &*it; + pipeline_out = it->second.pipeline; + pipeline_layout_out = it->second.pipeline_layout; + return true; + } + + // Create the pipeline if not the latest and not already existing. + if (!EnsureShadersTranslated(vertex_shader, pixel_shader, + Shader::HostVertexShaderType::kVertex)) { + return false; + } + const PipelineLayoutProvider* pipeline_layout = + command_processor_.GetPipelineLayout(0, 0); + if (!pipeline_layout) { + return false; + } + VkRenderPass render_pass = + render_target_cache_.GetRenderPass(render_pass_key); + if (render_pass == VK_NULL_HANDLE) { + return false; + } + PipelineCreationArguments creation_arguments; + auto& pipeline = + *pipelines_.emplace(description, Pipeline(pipeline_layout)).first; + creation_arguments.pipeline = &pipeline; + creation_arguments.vertex_shader = vertex_shader; + creation_arguments.pixel_shader = pixel_shader; + creation_arguments.render_pass = render_pass; + if (!EnsurePipelineCreated(creation_arguments)) { + return false; + } + pipeline_out = pipeline.second.pipeline; + pipeline_layout_out = pipeline_layout; + return true; +} + +bool VulkanPipelineCache::TranslateShader(SpirvShaderTranslator& translator, + VulkanShader& shader, + reg::SQ_PROGRAM_CNTL cntl) { + // Perform translation. + // If this fails the shader will be marked as invalid and ignored later. + // TODO(Triang3l): Host vertex shader type. + if (!translator.Translate(&shader, cntl, + Shader::HostVertexShaderType::kVertex)) { + XELOGE("Shader {:016X} translation failed; marking as ignored", + shader.ucode_data_hash()); + return false; + } + return shader.InitializeShaderModule( + command_processor_.GetVulkanContext().GetVulkanProvider()); +} + +bool VulkanPipelineCache::GetCurrentStateDescription( + const VulkanShader* vertex_shader, const VulkanShader* pixel_shader, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + PipelineDescription& description_out) const { + description_out.Reset(); + + const RegisterFile& regs = register_file_; + + description_out.vertex_shader_hash = vertex_shader->ucode_data_hash(); + description_out.pixel_shader_hash = + pixel_shader ? pixel_shader->ucode_data_hash() : 0; + description_out.render_pass_key = render_pass_key; + + auto vgt_draw_initiator = regs.Get(); + PipelinePrimitiveTopology primitive_topology; + switch (vgt_draw_initiator.prim_type) { + case xenos::PrimitiveType::kPointList: + primitive_topology = PipelinePrimitiveTopology::kPointList; + break; + case xenos::PrimitiveType::kLineList: + primitive_topology = PipelinePrimitiveTopology::kLineList; + break; + case xenos::PrimitiveType::kLineStrip: + primitive_topology = PipelinePrimitiveTopology::kLineStrip; + break; + case xenos::PrimitiveType::kTriangleList: + primitive_topology = PipelinePrimitiveTopology::kTriangleList; + break; + case xenos::PrimitiveType::kTriangleFan: + primitive_topology = device_pipeline_features_.triangle_fans + ? PipelinePrimitiveTopology::kTriangleFan + : PipelinePrimitiveTopology::kTriangleList; + break; + case xenos::PrimitiveType::kTriangleStrip: + primitive_topology = PipelinePrimitiveTopology::kTriangleStrip; + break; + default: + // TODO(Triang3l): All primitive types and tessellation. + return false; + } + description_out.primitive_topology = primitive_topology; + // TODO(Triang3l): Primitive restart. + + return true; +} + +bool VulkanPipelineCache::EnsurePipelineCreated( + const PipelineCreationArguments& creation_arguments) { + if (creation_arguments.pipeline->second.pipeline != VK_NULL_HANDLE) { + return true; + } + + // This function preferably should validate the description to prevent + // unsupported behavior that may be dangerous/crashing because pipelines can + // be created from the disk storage. + + if (creation_arguments.pixel_shader) { + XELOGGPU("Creating graphics pipeline state with VS {:016X}, PS {:016X}", + creation_arguments.vertex_shader->ucode_data_hash(), + creation_arguments.pixel_shader->ucode_data_hash()); + } else { + XELOGGPU("Creating graphics pipeline state with VS {:016X}", + creation_arguments.vertex_shader->ucode_data_hash()); + } + + const PipelineDescription& description = creation_arguments.pipeline->first; + + VkPipelineShaderStageCreateInfo shader_stages[2]; + uint32_t shader_stage_count = 0; + + assert_true(creation_arguments.vertex_shader->is_translated()); + if (!creation_arguments.vertex_shader->is_valid()) { + return false; + } + assert_true(shader_stage_count < xe::countof(shader_stages)); + VkPipelineShaderStageCreateInfo& shader_stage_vertex = + shader_stages[shader_stage_count++]; + shader_stage_vertex.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stage_vertex.pNext = nullptr; + shader_stage_vertex.flags = 0; + shader_stage_vertex.stage = VK_SHADER_STAGE_VERTEX_BIT; + shader_stage_vertex.module = + creation_arguments.vertex_shader->shader_module(); + assert_true(shader_stage_vertex.module != VK_NULL_HANDLE); + shader_stage_vertex.pName = "main"; + shader_stage_vertex.pSpecializationInfo = nullptr; + if (creation_arguments.pixel_shader) { + assert_true(creation_arguments.pixel_shader->is_translated()); + if (!creation_arguments.pixel_shader->is_valid()) { + return false; + } + assert_true(shader_stage_count < xe::countof(shader_stages)); + VkPipelineShaderStageCreateInfo& shader_stage_fragment = + shader_stages[shader_stage_count++]; + shader_stage_fragment.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stage_fragment.pNext = nullptr; + shader_stage_fragment.flags = 0; + shader_stage_fragment.stage = VK_SHADER_STAGE_FRAGMENT_BIT; + shader_stage_fragment.module = + creation_arguments.pixel_shader->shader_module(); + assert_true(shader_stage_fragment.module != VK_NULL_HANDLE); + shader_stage_fragment.pName = "main"; + shader_stage_fragment.pSpecializationInfo = nullptr; + } + + VkPipelineVertexInputStateCreateInfo vertex_input_state; + vertex_input_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_input_state.pNext = nullptr; + vertex_input_state.flags = 0; + vertex_input_state.vertexBindingDescriptionCount = 0; + vertex_input_state.pVertexBindingDescriptions = nullptr; + vertex_input_state.vertexAttributeDescriptionCount = 0; + vertex_input_state.pVertexAttributeDescriptions = nullptr; + + VkPipelineInputAssemblyStateCreateInfo input_assembly_state; + input_assembly_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_assembly_state.pNext = nullptr; + input_assembly_state.flags = 0; + switch (description.primitive_topology) { + case PipelinePrimitiveTopology::kPointList: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + break; + case PipelinePrimitiveTopology::kLineList: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + break; + case PipelinePrimitiveTopology::kLineStrip: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + break; + case PipelinePrimitiveTopology::kTriangleList: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + break; + case PipelinePrimitiveTopology::kTriangleStrip: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + break; + case PipelinePrimitiveTopology::kTriangleFan: + assert_true(device_pipeline_features_.triangle_fans); + if (!device_pipeline_features_.triangle_fans) { + return false; + } + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + break; + case PipelinePrimitiveTopology::kLineListWithAdjacency: + input_assembly_state.topology = + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; + break; + case PipelinePrimitiveTopology::kPatchList: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + break; + default: + assert_unhandled_case(description.primitive_topology); + return false; + } + input_assembly_state.primitiveRestartEnable = + description.primitive_restart ? VK_TRUE : VK_FALSE; + + VkPipelineViewportStateCreateInfo viewport_state; + viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state.pNext = nullptr; + viewport_state.flags = 0; + viewport_state.viewportCount = 1; + viewport_state.pViewports = nullptr; + viewport_state.scissorCount = 1; + viewport_state.pScissors = nullptr; + + VkPipelineRasterizationStateCreateInfo rasterization_state = {}; + rasterization_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_state.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo multisample_state = {}; + multisample_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisample_state.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + static const VkDynamicState dynamic_states[] = { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }; + VkPipelineDynamicStateCreateInfo dynamic_state; + dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state.pNext = nullptr; + dynamic_state.flags = 0; + dynamic_state.dynamicStateCount = uint32_t(xe::countof(dynamic_states)); + dynamic_state.pDynamicStates = dynamic_states; + + VkGraphicsPipelineCreateInfo pipeline_create_info; + pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_create_info.pNext = nullptr; + pipeline_create_info.flags = 0; + pipeline_create_info.stageCount = shader_stage_count; + pipeline_create_info.pStages = shader_stages; + pipeline_create_info.pVertexInputState = &vertex_input_state; + pipeline_create_info.pInputAssemblyState = &input_assembly_state; + pipeline_create_info.pTessellationState = nullptr; + pipeline_create_info.pViewportState = &viewport_state; + pipeline_create_info.pRasterizationState = &rasterization_state; + pipeline_create_info.pMultisampleState = &multisample_state; + pipeline_create_info.pDepthStencilState = nullptr; + pipeline_create_info.pColorBlendState = nullptr; + pipeline_create_info.pDynamicState = &dynamic_state; + pipeline_create_info.layout = + creation_arguments.pipeline->second.pipeline_layout->GetPipelineLayout(); + pipeline_create_info.renderPass = creation_arguments.render_pass; + pipeline_create_info.subpass = 0; + pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; + pipeline_create_info.basePipelineIndex = 0; + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkPipeline pipeline; + if (dfn.vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, + &pipeline_create_info, nullptr, + &pipeline) != VK_SUCCESS) { + // TODO(Triang3l): Move these error messages outside. + /* if (creation_arguments.pixel_shader) { + XELOGE( + "Failed to create graphics pipeline with VS {:016X}, PS {:016X}", + creation_arguments.vertex_shader->ucode_data_hash(), + creation_arguments.pixel_shader->ucode_data_hash()); + } else { + XELOGE("Failed to create graphics pipeline with VS {:016X}", + creation_arguments.vertex_shader->ucode_data_hash()); + } */ + return false; + } + creation_arguments.pipeline->second.pipeline = pipeline; + return true; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h new file mode 100644 index 000000000..1d2f852e5 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -0,0 +1,183 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_ +#define XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_ + +#include +#include +#include +#include +#include + +#include "third_party/xxhash/xxhash.h" +#include "xenia/base/hash.h" +#include "xenia/base/platform.h" +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/vulkan_render_target_cache.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D +// implementations. +class VulkanPipelineCache { + public: + class PipelineLayoutProvider { + public: + virtual ~PipelineLayoutProvider() {} + virtual VkPipelineLayout GetPipelineLayout() const = 0; + }; + + VulkanPipelineCache(VulkanCommandProcessor& command_processor, + const RegisterFile& register_file, + VulkanRenderTargetCache& render_target_cache); + ~VulkanPipelineCache(); + + bool Initialize(); + void Shutdown(); + void ClearCache(); + + VulkanShader* LoadShader(xenos::ShaderType shader_type, + uint32_t guest_address, const uint32_t* host_address, + uint32_t dword_count); + + // Translates shaders if needed, also making shader info up to date. + bool EnsureShadersTranslated( + VulkanShader* vertex_shader, VulkanShader* pixel_shader, + Shader::HostVertexShaderType host_vertex_shader_type); + + // TODO(Triang3l): Return a deferred creation handle. + bool ConfigurePipeline(VulkanShader* vertex_shader, + VulkanShader* pixel_shader, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + VkPipeline& pipeline_out, + const PipelineLayoutProvider*& pipeline_layout_out); + + private: + // Can only load pipeline storage if features of the device it was created on + // and the current device match because descriptions may requires features not + // supported on the device. Very radical differences (such as RB emulation + // method) should result in a different storage file being used. + union DevicePipelineFeatures { + struct { + uint32_t triangle_fans : 1; + }; + uint32_t features = 0; + }; + + enum class PipelinePrimitiveTopology : uint32_t { + kPointList, + kLineList, + kLineStrip, + kTriangleList, + kTriangleStrip, + // Requires DevicePipelineFeatures::triangle_fans. + kTriangleFan, + kLineListWithAdjacency, + kPatchList, + }; + + XEPACKEDSTRUCT(PipelineDescription, { + uint64_t vertex_shader_hash; + // 0 if no pixel shader. + uint64_t pixel_shader_hash; + VulkanRenderTargetCache::RenderPassKey render_pass_key; + + // Input assembly. + PipelinePrimitiveTopology primitive_topology : 3; + uint32_t primitive_restart : 1; + + // Including all the padding, for a stable hash. + PipelineDescription() { Reset(); } + PipelineDescription(const PipelineDescription& description) { + std::memcpy(this, &description, sizeof(*this)); + } + PipelineDescription& operator=(const PipelineDescription& description) { + std::memcpy(this, &description, sizeof(*this)); + return *this; + } + bool operator==(const PipelineDescription& description) const { + return std::memcmp(this, &description, sizeof(*this)) == 0; + } + void Reset() { std::memset(this, 0, sizeof(*this)); } + uint64_t GetHash() const { return XXH64(this, sizeof(*this), 0); } + struct Hasher { + size_t operator()(const PipelineDescription& description) const { + return size_t(description.GetHash()); + } + }; + }); + + struct Pipeline { + VkPipeline pipeline = VK_NULL_HANDLE; + // Owned by VulkanCommandProcessor, valid until ClearCache. + const PipelineLayoutProvider* pipeline_layout; + Pipeline(const PipelineLayoutProvider* pipeline_layout_provider) + : pipeline_layout(pipeline_layout_provider) {} + }; + + // Description that can be passed from the command processor thread to the + // creation threads, with everything needed from caches pre-looked-up. + struct PipelineCreationArguments { + std::pair* pipeline; + const VulkanShader* vertex_shader; + const VulkanShader* pixel_shader; + VkRenderPass render_pass; + }; + + // Can be called from multiple threads. + bool TranslateShader(SpirvShaderTranslator& translator, VulkanShader& shader, + reg::SQ_PROGRAM_CNTL cntl); + + bool GetCurrentStateDescription( + const VulkanShader* vertex_shader, const VulkanShader* pixel_shader, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + PipelineDescription& description_out) const; + + // Can be called from creation threads - all needed data must be fully set up + // at the point of the call: shaders must be translated, pipeline layout and + // render pass objects must be available. + bool EnsurePipelineCreated( + const PipelineCreationArguments& creation_arguments); + + VulkanCommandProcessor& command_processor_; + const RegisterFile& register_file_; + VulkanRenderTargetCache& render_target_cache_; + + DevicePipelineFeatures device_pipeline_features_; + + // Reusable shader translator on the command processor thread. + std::unique_ptr shader_translator_; + + // Ucode hash -> shader. + std::unordered_map> + shaders_; + + std::unordered_map + pipelines_; + + // Previously used pipeline, to avoid lookups if the state wasn't changed. + const std::pair* last_pipeline_ = + nullptr; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc new file mode 100644 index 000000000..e85b6ea8b --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -0,0 +1,136 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_render_target_cache.h" + +#include "xenia/base/logging.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanRenderTargetCache::VulkanRenderTargetCache( + VulkanCommandProcessor& command_processor, + const RegisterFile& register_file) + : command_processor_(command_processor), register_file_(register_file) {} + +VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(); } + +bool VulkanRenderTargetCache::Initialize() { return true; } + +void VulkanRenderTargetCache::Shutdown() { ClearCache(); } + +void VulkanRenderTargetCache::ClearCache() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + for (const auto& framebuffer_pair : framebuffers_) { + dfn.vkDestroyFramebuffer(device, framebuffer_pair.second, nullptr); + } + framebuffers_.clear(); + + for (const auto& render_pass_pair : render_passes_) { + dfn.vkDestroyRenderPass(device, render_pass_pair.second, nullptr); + } + render_passes_.clear(); +} + +VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { + auto it = render_passes_.find(key.key); + if (it != render_passes_.end()) { + return it->second; + } + + // TODO(Triang3l): Attachments and dependencies. + + VkSubpassDescription subpass_description; + subpass_description.flags = 0; + subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass_description.inputAttachmentCount = 0; + subpass_description.pInputAttachments = nullptr; + subpass_description.colorAttachmentCount = 0; + subpass_description.pColorAttachments = nullptr; + subpass_description.pResolveAttachments = nullptr; + subpass_description.pDepthStencilAttachment = nullptr; + subpass_description.preserveAttachmentCount = 0; + subpass_description.pPreserveAttachments = nullptr; + + VkRenderPassCreateInfo render_pass_create_info; + render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + render_pass_create_info.pNext = nullptr; + render_pass_create_info.flags = 0; + render_pass_create_info.attachmentCount = 0; + render_pass_create_info.pAttachments = nullptr; + render_pass_create_info.subpassCount = 1; + render_pass_create_info.pSubpasses = &subpass_description; + render_pass_create_info.dependencyCount = 0; + render_pass_create_info.pDependencies = nullptr; + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkRenderPass render_pass; + if (dfn.vkCreateRenderPass(device, &render_pass_create_info, nullptr, + &render_pass) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan render pass"); + return VK_NULL_HANDLE; + } + render_passes_.emplace(key.key, render_pass); + return render_pass; +} + +VkFramebuffer VulkanRenderTargetCache::GetFramebuffer(FramebufferKey key) { + auto it = framebuffers_.find(key); + if (it != framebuffers_.end()) { + return it->second; + } + + VkRenderPass render_pass = GetRenderPass(key.render_pass_key); + if (render_pass == VK_NULL_HANDLE) { + return VK_NULL_HANDLE; + } + + VkFramebufferCreateInfo framebuffer_create_info; + framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebuffer_create_info.pNext = nullptr; + framebuffer_create_info.flags = 0; + framebuffer_create_info.renderPass = render_pass; + framebuffer_create_info.attachmentCount = 0; + framebuffer_create_info.pAttachments = nullptr; + framebuffer_create_info.width = 1280; + framebuffer_create_info.height = 720; + framebuffer_create_info.layers = 1; + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkFramebuffer framebuffer; + if (dfn.vkCreateFramebuffer(device, &framebuffer_create_info, nullptr, + &framebuffer) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan framebuffer"); + return VK_NULL_HANDLE; + } + framebuffers_.emplace(key, framebuffer); + return framebuffer; +} + +bool VulkanRenderTargetCache::UpdateRenderTargets( + FramebufferKey& framebuffer_key_out) { + framebuffer_key_out = FramebufferKey(); + return true; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h new file mode 100644 index 000000000..f5c183f70 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -0,0 +1,95 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_ +#define XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_ + +#include +#include +#include + +#include "third_party/xxhash/xxhash.h" +#include "xenia/gpu/register_file.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D +// implementations. +class VulkanRenderTargetCache { + public: + union RenderPassKey { + uint32_t key = 0; + }; + static_assert(sizeof(RenderPassKey) == sizeof(uint32_t)); + + struct FramebufferKey { + RenderPassKey render_pass_key; + + // Including all the padding, for a stable hash. + FramebufferKey() { Reset(); } + FramebufferKey(const FramebufferKey& key) { + std::memcpy(this, &key, sizeof(*this)); + } + FramebufferKey& operator=(const FramebufferKey& key) { + std::memcpy(this, &key, sizeof(*this)); + return *this; + } + bool operator==(const FramebufferKey& key) const { + return std::memcmp(this, &key, sizeof(*this)) == 0; + } + void Reset() { std::memset(this, 0, sizeof(*this)); } + uint64_t GetHash() const { return XXH64(this, sizeof(*this), 0); } + struct Hasher { + size_t operator()(const FramebufferKey& description) const { + return size_t(description.GetHash()); + } + }; + }; + static_assert(sizeof(FramebufferKey) == sizeof(uint32_t)); + + VulkanRenderTargetCache(VulkanCommandProcessor& command_processor, + const RegisterFile& register_file); + ~VulkanRenderTargetCache(); + + bool Initialize(); + void Shutdown(); + void ClearCache(); + + // Returns the render pass object, or VK_NULL_HANDLE if failed to create. + // A render pass managed by the render target cache may be ended and resumed + // at any time (to allow for things like copying and texture loading). + VkRenderPass GetRenderPass(RenderPassKey key); + + // Returns the framebuffer object, or VK_NULL_HANDLE if failed to create. + VkFramebuffer GetFramebuffer(FramebufferKey key); + + // May dispatch computations. + bool UpdateRenderTargets(FramebufferKey& framebuffer_key_out); + + private: + VulkanCommandProcessor& command_processor_; + const RegisterFile& register_file_; + + // RenderPassKey::key -> VkRenderPass. + std::unordered_map render_passes_; + + std::unordered_map + framebuffers_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_shader.cc b/src/xenia/gpu/vulkan/vulkan_shader.cc new file mode 100644 index 000000000..e4fafff96 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_shader.cc @@ -0,0 +1,48 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_shader.h" + +#include + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanShader::VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count) + : Shader(shader_type, data_hash, dword_ptr, dword_count) {} + +bool VulkanShader::InitializeShaderModule( + const ui::vulkan::VulkanProvider& provider) { + if (!is_valid()) { + return false; + } + if (shader_module_ != VK_NULL_HANDLE) { + return true; + } + VkShaderModuleCreateInfo shader_module_create_info; + shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + shader_module_create_info.pNext = nullptr; + shader_module_create_info.flags = 0; + shader_module_create_info.codeSize = translated_binary().size(); + shader_module_create_info.pCode = + reinterpret_cast(translated_binary().data()); + if (provider.dfn().vkCreateShaderModule(provider.device(), + &shader_module_create_info, nullptr, + &shader_module_) != VK_SUCCESS) { + is_valid_ = false; + return false; + } + return true; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_shader.h b/src/xenia/gpu/vulkan/vulkan_shader.h new file mode 100644 index 000000000..23ff5fd90 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_shader.h @@ -0,0 +1,39 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_SHADER_H_ +#define XENIA_GPU_VULKAN_VULKAN_SHADER_H_ + +#include + +#include "xenia/gpu/shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanShader : public Shader { + public: + VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count); + + bool InitializeShaderModule(const ui::vulkan::VulkanProvider& provider); + VkShaderModule shader_module() const { return shader_module_; } + + private: + VkShaderModule shader_module_ = VK_NULL_HANDLE; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_SHADER_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc index e8b1790b3..5ee2755f2 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -241,6 +241,7 @@ void VulkanSharedMemory::Use(Usage usage, buffer_memory_barrier.size = VK_WHOLE_SIZE; last_usage_ = usage; } + command_processor_.EndRenderPass(); command_processor_.deferred_command_buffer().CmdVkPipelineBarrier( stage_mask_src, stage_mask_dst, 0, 0, nullptr, 1, &buffer_memory_barrier, 0, nullptr); @@ -271,7 +272,7 @@ bool VulkanSharedMemory::InitializeTraceSubmitDownloads() { return false; } - // TODO(Triang3l): End the render pass. + command_processor_.EndRenderPass(); Use(Usage::kRead); DeferredCommandBuffer& command_buffer = command_processor_.deferred_command_buffer(); @@ -384,7 +385,7 @@ bool VulkanSharedMemory::UploadRanges( if (upload_page_ranges.empty()) { return true; } - // TODO(Triang3l): End the render pass. + command_processor_.EndRenderPass(); // upload_page_ranges are sorted, use them to determine the range for the // ordering barrier. Use(Usage::kTransferDestination, diff --git a/src/xenia/ui/vulkan/transient_descriptor_pool.cc b/src/xenia/ui/vulkan/transient_descriptor_pool.cc index af2c0f424..e471a6c6e 100644 --- a/src/xenia/ui/vulkan/transient_descriptor_pool.cc +++ b/src/xenia/ui/vulkan/transient_descriptor_pool.cc @@ -80,6 +80,8 @@ VkDescriptorSet TransientDescriptorPool::Request( VkDescriptorSet descriptor_set; // Try to allocate as normal. + // TODO(Triang3l): Investigate the possibility of reuse of descriptor sets, as + // vkAllocateDescriptorSets may be implemented suboptimally. if (!pages_writable_.empty()) { if (page_current_descriptor_sets_used_ < page_descriptor_set_count_ && page_current_descriptors_used_ + layout_descriptor_count <=