From fe9c83c15bd2961acb7b88907d98bc97ad1bb398 Mon Sep 17 00:00:00 2001 From: DrChat Date: Sun, 18 Feb 2018 23:22:02 -0600 Subject: [PATCH] [Vulkan] First working version of dynamic vfetch! --- src/xenia/gpu/spirv_shader_translator.cc | 207 ++++++++++++++--------- src/xenia/gpu/spirv_shader_translator.h | 4 +- src/xenia/gpu/vulkan/buffer_cache.cc | 22 ++- src/xenia/gpu/vulkan/pipeline_cache.cc | 8 + 4 files changed, 148 insertions(+), 93 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 24f4f3981..94c320465 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -36,7 +36,7 @@ SpirvShaderTranslator::~SpirvShaderTranslator() = default; void SpirvShaderTranslator::StartTranslation() { // Create a new builder. - builder_ = std::make_unique(SPV_VERSION, 0xFFFFFFFF, nullptr); + builder_ = std::make_unique(0x10000, 0xFFFFFFFF, nullptr); auto& b = *builder_; // Import required modules. @@ -221,11 +221,33 @@ void SpirvShaderTranslator::StartTranslation() { if (is_vertex_shader()) { // Vertex inputs/outputs // Inputs: 32 SSBOs on DS 2 binding 0 + + // Runtime array for vertex data Id vtx_t = b.makeRuntimeArray(uint_type_); - Id vtx_a_t = b.makeArrayType(vtx_t, b.makeUintConstant(32), 0); + b.addDecoration(vtx_t, spv::Decoration::DecorationArrayStride, + sizeof(uint32_t)); + + Id vtx_s = b.makeStructType({vtx_t}, "vertex_type"); + b.addDecoration(vtx_s, spv::Decoration::DecorationBufferBlock); + + // Describe the actual data + b.addMemberName(vtx_s, 0, "data"); + b.addMemberDecoration(vtx_s, 0, spv::Decoration::DecorationOffset, 0); + + // Create the vertex bindings variable. + Id vtx_a_t = b.makeArrayType(vtx_s, b.makeUintConstant(32), 0); vtx_ = b.createVariable(spv::StorageClass::StorageClassUniform, vtx_a_t, "vertex_bindings"); + // DS 2 binding 0 + b.addDecoration(vtx_, spv::Decoration::DecorationDescriptorSet, 2); + b.addDecoration(vtx_, spv::Decoration::DecorationBinding, 0); + + // Set up the map from binding -> ssbo index + for (const auto& binding : vertex_bindings()) { + vtx_binding_map_[binding.fetch_constant] = binding.binding_index; + } + // Outputs interpolators_ = b.createVariable(spv::StorageClass::StorageClassOutput, interpolators_type, "interpolators"); @@ -420,8 +442,9 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { exec_skip_block_ = nullptr; // main() entry point. - auto mainFn = - b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "main", {}, {}); + spv::Block* entry_block; + auto mainFn = b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "main", + {}, {}, &entry_block); if (is_vertex_shader()) { auto entry = b.addEntryPoint(spv::ExecutionModel::ExecutionModelVertex, mainFn, "main"); @@ -1165,66 +1188,108 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( // TODO: Indexed fetch auto vertex_idx = LoadFromOperand(instr.operands[0]); vertex_idx = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, vertex_idx); - auto shader_vertex_idx = b.createLoad(vertex_idx_); - auto vertex_components = - GetVertexFormatComponentCount(instr.attributes.data_format); + // vertex_idx * stride + offset + vertex_idx = b.createBinOp(spv::Op::OpIMul, int_type_, vertex_idx, + b.makeUintConstant(instr.attributes.stride)); + vertex_idx = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, + b.makeUintConstant(instr.attributes.offset)); - // Skip loading if it's an indexed fetch. - auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, vertex_idx, - shader_vertex_idx); - Id alt_vertex = 0; - switch (vertex_components) { - case 1: - alt_vertex = b.makeFloatConstant(0.f); - break; - case 2: - alt_vertex = b.makeCompositeConstant( - vec2_float_type_, std::vector({b.makeFloatConstant(0.f), - b.makeFloatConstant(1.f)})); - cond = b.smearScalar(spv::NoPrecision, cond, vec2_bool_type_); - break; - case 3: - alt_vertex = b.makeCompositeConstant( - vec3_float_type_, - std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f), - b.makeFloatConstant(1.f)})); - cond = b.smearScalar(spv::NoPrecision, cond, vec3_bool_type_); - break; - case 4: - alt_vertex = b.makeCompositeConstant( - vec4_float_type_, - std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f), - b.makeFloatConstant(0.f), - b.makeFloatConstant(1.f)})); - cond = b.smearScalar(spv::NoPrecision, cond, vec4_bool_type_); - break; - default: - assert_unhandled_case(vertex_components); - } + auto data_ptr = b.createAccessChain( + spv::StorageClass::StorageClassUniform, vtx_, + {b.makeUintConstant(vtx_binding_map_[instr.operands[1].storage_index]), + b.makeUintConstant(0)}); - auto vertex_ptr = vertex_binding_map_[instr.operands[1].storage_index] - [instr.attributes.offset]; - assert_not_zero(vertex_ptr); - spv::Id vertex = b.createLoad(vertex_ptr); + spv::Id vertex = 0; switch (instr.attributes.data_format) { - case VertexFormat::k_8_8_8_8: - case VertexFormat::k_2_10_10_10: + case VertexFormat::k_8_8_8_8: { + if (!instr.attributes.is_integer) { + auto vertex_ptr = b.createAccessChain( + spv::StorageClass::StorageClassUniform, data_ptr, {vertex_idx}); + auto vertex_data = b.createLoad(vertex_ptr); + + spv::GLSLstd450 op; + if (instr.attributes.is_signed) { + op = spv::GLSLstd450::kUnpackSnorm4x8; + } else { + op = spv::GLSLstd450::kUnpackUnorm4x8; + } + vertex = CreateGlslStd450InstructionCall( + spv::NoPrecision, vec4_float_type_, op, {vertex_data}); + } + } break; + case VertexFormat::k_16_16: case VertexFormat::k_16_16_16_16: case VertexFormat::k_16_16_FLOAT: case VertexFormat::k_16_16_16_16_FLOAT: - case VertexFormat::k_32: - case VertexFormat::k_32_32: - case VertexFormat::k_32_32_32_32: - case VertexFormat::k_32_FLOAT: - case VertexFormat::k_32_32_FLOAT: - case VertexFormat::k_32_32_32_FLOAT: + case VertexFormat::k_32: { + } break; + case VertexFormat::k_32_32: { + } break; + case VertexFormat::k_32_32_32_32: { + } break; + case VertexFormat::k_32_FLOAT: { + } break; + case VertexFormat::k_32_32_FLOAT: { + spv::Id components[2] = {}; + for (uint32_t i = 0; i < 2; i++) { + auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, + b.makeUintConstant(i)); + auto vertex_ptr = b.createAccessChain( + spv::StorageClass::StorageClassUniform, data_ptr, {index}); + auto vertex_data = b.createLoad(vertex_ptr); + + components[i] = + b.createUnaryOp(spv::Op::OpBitcast, float_type_, vertex_data); + } + + vertex = b.createCompositeConstruct(vec2_float_type_, + {components[0], components[1]}); + } break; + case VertexFormat::k_32_32_32_FLOAT: { + spv::Id components[3] = {}; + for (uint32_t i = 0; i < 3; i++) { + auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, + b.makeUintConstant(i)); + auto vertex_ptr = b.createAccessChain( + spv::StorageClass::StorageClassUniform, data_ptr, {index}); + auto vertex_data = b.createLoad(vertex_ptr); + + components[i] = + b.createUnaryOp(spv::Op::OpBitcast, float_type_, vertex_data); + } + + vertex = b.createCompositeConstruct( + vec3_float_type_, {components[0], components[1], components[2]}); + } break; case VertexFormat::k_32_32_32_32_FLOAT: { - // These are handled, for now. + spv::Id components[4] = {}; + for (uint32_t i = 0; i < 4; i++) { + auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, + b.makeUintConstant(i)); + auto vertex_ptr = b.createAccessChain( + spv::StorageClass::StorageClassUniform, data_ptr, {index}); + auto vertex_data = b.createLoad(vertex_ptr); + + components[i] = + b.createUnaryOp(spv::Op::OpBitcast, float_type_, vertex_data); + } + + vertex = b.createCompositeConstruct( + vec4_float_type_, + {components[0], components[1], components[2], components[3]}); + } break; + + case VertexFormat::k_2_10_10_10: { } break; case VertexFormat::k_10_11_11: { + auto vertex_ptr = b.createAccessChain( + spv::StorageClass::StorageClassUniform, data_ptr, {vertex_idx}); + auto vertex_data = b.createLoad(vertex_ptr); + assert(b.getTypeId(vertex_data) == uint_type_); + // This needs to be converted. bool is_signed = instr.attributes.is_signed; bool is_integer = instr.attributes.is_integer; @@ -1232,7 +1297,7 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( is_signed ? spv::Op::OpBitFieldSExtract : spv::Op::OpBitFieldUExtract; auto comp_type = is_signed ? int_type_ : uint_type_; - assert_true(comp_type == b.getTypeId(vertex)); + assert_true(comp_type == b.getTypeId(vertex_data)); spv::Id components[3] = {0}; /* @@ -1246,9 +1311,12 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( b.makeUintConstant(11)); */ // Workaround until NVIDIA fixes their compiler :| - components[0] = BitfieldExtract(comp_type, vertex, is_signed, 00, 11); - components[1] = BitfieldExtract(comp_type, vertex, is_signed, 11, 11); - components[2] = BitfieldExtract(comp_type, vertex, is_signed, 22, 10); + components[0] = + BitfieldExtract(comp_type, vertex_data, is_signed, 00, 11); + components[1] = + BitfieldExtract(comp_type, vertex_data, is_signed, 11, 11); + components[2] = + BitfieldExtract(comp_type, vertex_data, is_signed, 22, 10); op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF; for (int i = 0; i < 3; i++) { @@ -1316,32 +1384,7 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( break; } - // Convert any integers to floats. - auto scalar_type = b.getScalarTypeId(b.getTypeId(vertex)); - if (scalar_type == int_type_ || scalar_type == uint_type_) { - auto op = scalar_type == int_type_ ? spv::Op::OpConvertSToF - : spv::Op::OpConvertUToF; - spv::Id vtx_type; - switch (vertex_components) { - case 1: - vtx_type = float_type_; - break; - case 2: - vtx_type = vec2_float_type_; - break; - case 3: - vtx_type = vec3_float_type_; - break; - case 4: - vtx_type = vec4_float_type_; - break; - } - - vertex = b.createUnaryOp(op, vtx_type, vertex); - } - - vertex = b.createTriOp(spv::Op::OpSelect, b.getTypeId(vertex), cond, vertex, - alt_vertex); + assert_not_zero(vertex); StoreToResult(vertex, instr.result); } diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 224c2c207..dd72f6f66 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -158,13 +158,11 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id samplers_ = 0; spv::Id tex_[3] = {0}; // Images {2D, 3D, Cube} spv::Id vtx_ = 0; // Vertex buffer array (32 runtime arrays) + std::map vtx_binding_map_; // SPIR-V IDs that are part of the in/out interface. std::vector interface_ids_; - // Map of {binding -> {offset -> spv input}} - std::map> vertex_binding_map_; - struct CFBlock { spv::Block* block = nullptr; bool labelled = false; diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index eaeec3098..3f1a093d3 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -103,7 +103,7 @@ BufferCache::BufferCache(RegisterFile* register_file, Memory* memory, transient_buffer_ = std::make_unique( device_, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, capacity, 4096); } @@ -133,6 +133,11 @@ VkResult BufferCache::Initialize() { return status; } + status = CreateVertexDescriptorPool(); + if (status != VK_SUCCESS) { + return status; + } + return VK_SUCCESS; } @@ -141,8 +146,8 @@ VkResult xe::gpu::vulkan::BufferCache::CreateVertexDescriptorPool() { std::vector pool_sizes; pool_sizes.push_back({ - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - 4096, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + 32768, }); vertex_descriptor_pool_ = std::make_unique(*device_, 32768, pool_sizes); @@ -302,6 +307,7 @@ void BufferCache::Shutdown() { } FreeConstantDescriptorSet(); + FreeVertexDescriptorPool(); transient_buffer_->Shutdown(); VK_SAFE_DESTROY(vkFreeMemory, *device_, gpu_memory_pool_, nullptr); @@ -522,7 +528,7 @@ std::pair BufferCache::UploadVertexBuffer( VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, nullptr, VK_ACCESS_HOST_WRITE_BIT, - VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + VK_ACCESS_SHADER_READ_BIT, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, transient_buffer_->gpu_buffer(), @@ -530,7 +536,7 @@ std::pair BufferCache::UploadVertexBuffer( upload_size, }; vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1, + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, 0, 0, nullptr, 1, &barrier, 0, nullptr); CacheTransientData(upload_base, upload_size, offset); @@ -538,7 +544,7 @@ std::pair BufferCache::UploadVertexBuffer( } VkDescriptorSet BufferCache::PrepareVertexSet( - VkCommandBuffer setup_buffer, VkFence fence, + VkCommandBuffer command_buffer, VkFence fence, std::vector vertex_bindings) { if (!vertex_descriptor_pool_->has_open_batch()) { vertex_descriptor_pool_->BeginBatch(fence); @@ -559,7 +565,7 @@ VkDescriptorSet BufferCache::PrepareVertexSet( 0, 0, 0, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, nullptr, buffer_infos, nullptr, @@ -597,7 +603,7 @@ VkDescriptorSet BufferCache::PrepareVertexSet( // Upload (or get a cached copy of) the buffer. auto buffer_ref = - UploadVertexBuffer(setup_buffer, physical_address, source_length, + UploadVertexBuffer(command_buffer, physical_address, source_length, static_cast(fetch->endian), fence); if (buffer_ref.second == VK_WHOLE_SIZE) { // Failed to upload buffer. diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 65292e9ce..2ce759170 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -1000,9 +1000,17 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState( return UpdateStatus::kCompatible; } + // TODO/HACK(DrChat): This is a bit silly, but we'll just do this for now. + // We don't use vertex input. state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; state_info.pNext = nullptr; state_info.flags = 0; + state_info.vertexBindingDescriptionCount = 0; + state_info.vertexAttributeDescriptionCount = 0; + state_info.pVertexBindingDescriptions = nullptr; + state_info.pVertexAttributeDescriptions = nullptr; + + return UpdateStatus::kCompatible; auto& vertex_binding_descrs = update_vertex_input_state_binding_descrs_; auto& vertex_attrib_descrs = update_vertex_input_state_attrib_descrs_;