diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index dee001b99..ee8df339e 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -29,6 +29,10 @@ void SpirvShaderTranslator::Reset() { builder_.reset(); + uniform_float_constants_ = spv::NoResult; + + var_main_registers_ = spv::NoResult; + main_switch_op_.reset(); main_switch_next_pc_phi_operands_.clear(); @@ -85,15 +89,42 @@ void SpirvShaderTranslator::StartTranslation() { const_float4_0_ = builder_->makeCompositeConstant(type_float4_, id_vector_temp_); + // Common uniform buffer - float constants. + uint32_t float_constant_count = constant_register_map().float_count; + if (float_constant_count) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(1); + id_vector_temp_.push_back(builder_->makeArrayType( + type_float4_, builder_->makeUintConstant(float_constant_count), + sizeof(float) * 4)); + // Currently (as of October 24, 2020) makeArrayType only uses the stride to + // check if deduplication can be done - the array stride decoration needs to + // be applied explicitly. + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(float) * 4); + spv::Id type_float_constants = + builder_->makeStructType(id_vector_temp_, "XeFloatConstants"); + builder_->addMemberName(type_float_constants, 0, "float_constants"); + builder_->addMemberDecoration(type_float_constants, 0, + spv::DecorationOffset, 0); + builder_->addDecoration(type_float_constants, spv::DecorationBlock); + uniform_float_constants_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_float_constants, + "xe_uniform_float_constants"); + builder_->addDecoration( + uniform_float_constants_, spv::DecorationDescriptorSet, + int(IsSpirvFragmentShader() ? kDescriptorSetFloatConstantsPixel + : kDescriptorSetFloatConstantsVertex)); + builder_->addDecoration(uniform_float_constants_, spv::DecorationBinding, + 0); + } + // Common uniform buffer - bool and loop constants. id_vector_temp_.clear(); id_vector_temp_.reserve(2); // 256 bool constants. id_vector_temp_.push_back(builder_->makeArrayType( type_uint4_, builder_->makeUintConstant(2), sizeof(uint32_t) * 4)); - // Currently (as of October 24, 2020) makeArrayType only uses the stride to - // check if deduplication can be done - the array stride decoration needs to - // be applied explicitly. builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, sizeof(uint32_t) * 4); // 32 loop constants. @@ -188,7 +219,7 @@ void SpirvShaderTranslator::StartTranslation() { // Main loop header - based on whether it's the first iteration (entered from // the function or from the continuation), choose the program counter. builder_->setBuildPoint(main_loop_header_); - spv::Id main_loop_pc_current = 0; + spv::Id main_loop_pc_current = spv::NoResult; if (has_main_switch) { // OpPhi must be the first in the block. id_vector_temp_.clear(); @@ -704,15 +735,24 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { builder_->makeArrayType(type_float_, builder_->makeUintConstant(1), 0)); spv::Id type_struct_per_vertex = builder_->makeStructType(struct_per_vertex_members, "gl_PerVertex"); + builder_->addMemberDecoration(type_struct_per_vertex, + kOutputPerVertexMemberPosition, + spv::DecorationInvariant); builder_->addMemberDecoration(type_struct_per_vertex, kOutputPerVertexMemberPosition, spv::DecorationBuiltIn, spv::BuiltInPosition); builder_->addMemberDecoration(type_struct_per_vertex, kOutputPerVertexMemberPointSize, spv::DecorationBuiltIn, spv::BuiltInPointSize); + builder_->addMemberDecoration(type_struct_per_vertex, + kOutputPerVertexMemberClipDistance, + spv::DecorationInvariant); builder_->addMemberDecoration( type_struct_per_vertex, kOutputPerVertexMemberClipDistance, spv::DecorationBuiltIn, spv::BuiltInClipDistance); + builder_->addMemberDecoration(type_struct_per_vertex, + kOutputPerVertexMemberCullDistance, + spv::DecorationInvariant); builder_->addMemberDecoration( type_struct_per_vertex, kOutputPerVertexMemberCullDistance, spv::DecorationBuiltIn, spv::BuiltInCullDistance); @@ -902,5 +942,132 @@ void SpirvShaderTranslator::CloseExecConditionals() { cf_exec_predicate_written_ = false; } +spv::Id SpirvShaderTranslator::GetStorageAddressingIndex( + InstructionStorageAddressingMode addressing_mode, uint32_t storage_index) { + EnsureBuildPointAvailable(); + spv::Id base_pointer = spv::NoResult; + switch (addressing_mode) { + case InstructionStorageAddressingMode::kStatic: + return builder_->makeIntConstant(int(storage_index)); + case InstructionStorageAddressingMode::kAddressAbsolute: + base_pointer = var_main_address_absolute_; + break; + case InstructionStorageAddressingMode::kAddressRelative: + // Load X component. + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(1); + id_vector_temp_util_.push_back(const_int_0_); + base_pointer = builder_->createAccessChain(spv::StorageClassFunction, + var_main_address_relative_, + id_vector_temp_util_); + break; + } + assert_not_zero(base_pointer); + spv::Id index = builder_->createLoad(base_pointer, spv::NoPrecision); + if (storage_index) { + index = + builder_->createBinOp(spv::OpIAdd, type_int_, index, + builder_->makeIntConstant(int(storage_index))); + } + return index; +} + +spv::Id SpirvShaderTranslator::LoadOperandStorage( + const InstructionOperand& operand) { + spv::Id index = GetStorageAddressingIndex(operand.storage_addressing_mode, + operand.storage_index); + EnsureBuildPointAvailable(); + spv::Id vec4_pointer = spv::NoResult; + switch (operand.storage_source) { + case InstructionStorageSource::kRegister: + assert_not_zero(var_main_registers_); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(1); + // Array element. + id_vector_temp_util_.push_back(index); + vec4_pointer = builder_->createAccessChain( + spv::StorageClassFunction, var_main_registers_, id_vector_temp_util_); + break; + case InstructionStorageSource::kConstantFloat: + assert_not_zero(uniform_float_constants_); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(2); + // The first and the only structure member. + id_vector_temp_util_.push_back(const_int_0_); + // Array element. + id_vector_temp_util_.push_back(index); + vec4_pointer = builder_->createAccessChain(spv::StorageClassUniform, + uniform_float_constants_, + id_vector_temp_util_); + break; + default: + assert_unhandled_case(operand.storage_source); + } + assert_not_zero(vec4_pointer); + return builder_->createLoad(vec4_pointer, spv::NoPrecision); +} + +spv::Id SpirvShaderTranslator::ApplyOperandModifiers( + spv::Id operand_value, const InstructionOperand& original_operand, + bool invert_negate, bool force_absolute) { + spv::Id type = builder_->getTypeId(operand_value); + assert_true(type != spv::NoType); + if (type == spv::NoType) { + return operand_value; + } + if (original_operand.is_absolute_value || force_absolute) { + EnsureBuildPointAvailable(); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(1); + id_vector_temp_util_.push_back(operand_value); + operand_value = builder_->createBuiltinCall( + type, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_util_); + } + if (original_operand.is_negated != invert_negate) { + EnsureBuildPointAvailable(); + operand_value = + builder_->createUnaryOp(spv::OpFNegate, type, operand_value); + builder_->addDecoration(operand_value, spv::DecorationNoContraction); + } + return operand_value; +} + +spv::Id SpirvShaderTranslator::GetUnmodifiedOperandComponents( + spv::Id operand_storage, const InstructionOperand& original_operand, + uint32_t components) { + assert_not_zero(components); + if (!components) { + return spv::NoResult; + } + assert_true(components <= 0b1111); + if (components == 0b1111 && original_operand.IsStandardSwizzle()) { + return operand_storage; + } + EnsureBuildPointAvailable(); + uint32_t component_count = xe::bit_count(components); + if (component_count == 1) { + uint32_t scalar_index; + xe::bit_scan_forward(components, &scalar_index); + return builder_->createCompositeExtract( + operand_storage, type_float_, + static_cast(original_operand.GetComponent(scalar_index)) - + static_cast(SwizzleSource::kX)); + } + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(component_count); + uint32_t components_remaining = components; + uint32_t component_index; + while (xe::bit_scan_forward(components_remaining, &component_index)) { + components_remaining &= ~(uint32_t(1) << component_index); + id_vector_temp_util_.push_back( + static_cast( + original_operand.GetComponent(component_index)) - + static_cast(SwizzleSource::kX)); + } + return builder_->createRvalueSwizzle(spv::NoPrecision, + type_float_vectors_[component_count - 1], + operand_storage, id_vector_temp_util_); +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 473afb65c..c4dbe2c1a 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -81,9 +81,10 @@ class SpirvShaderTranslator : public ShaderTranslator { } bool IsSpirvFragmentShader() const { return is_pixel_shader(); } - // Must be called before emitting any non-control-flow SPIR-V operations in - // translator callback to ensure that if the last instruction added was - // something like OpBranch - in this case, an unreachable block is created. + // Must be called before emitting any SPIR-V operations that must be in a + // block in translator callbacks to ensure that if the last instruction added + // was something like OpBranch - in this case, an unreachable block is + // created. void EnsureBuildPointAvailable(); void StartVertexOrTessEvalShaderBeforeMain(); @@ -109,12 +110,47 @@ class SpirvShaderTranslator : public ShaderTranslator { // labels) and updates the state accordingly. void CloseExecConditionals(); + spv::Id GetStorageAddressingIndex( + InstructionStorageAddressingMode addressing_mode, uint32_t storage_index); + // Loads unswizzled operand without sign modifiers as float4. + spv::Id LoadOperandStorage(const InstructionOperand& operand); + spv::Id ApplyOperandModifiers(spv::Id operand_value, + const InstructionOperand& original_operand, + bool invert_negate = false, + bool force_absolute = false); + // Returns the requested components, with the operand's swizzle applied, in a + // condensed form, but without negation / absolute value modifiers. The + // storage is float4, no matter what the component count of original_operand + // is (the storage will be either r# or c#, but the instruction may be + // scalar). + spv::Id GetUnmodifiedOperandComponents( + spv::Id operand_storage, const InstructionOperand& original_operand, + uint32_t components); + spv::Id GetOperandComponents(spv::Id operand_storage, + const InstructionOperand& original_operand, + uint32_t components, bool invert_negate = false, + bool force_absolute = false) { + return ApplyOperandModifiers( + GetUnmodifiedOperandComponents(operand_storage, original_operand, + components), + original_operand, invert_negate, force_absolute); + } + + // Return type is a float vector of xe::bit_count(result.GetUsedWriteMask()) + // or a single float, depending on whether it's a reduction instruction (check + // getTypeId of the result), or returns spv::NoResult if nothing to store. + spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr, + bool& predicate_written); + bool supports_clip_distance_; bool supports_cull_distance_; std::unique_ptr builder_; std::vector id_vector_temp_; + // For helper functions like operand loading, so they don't conflict with + // id_vector_temp_ usage in bigger callbacks. + std::vector id_vector_temp_util_; std::vector uint_vector_temp_; spv::Id ext_inst_glsl_std_450_; @@ -126,10 +162,16 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id type_uint_; spv::Id type_uint3_; spv::Id type_uint4_; - spv::Id type_float_; - spv::Id type_float2_; - spv::Id type_float3_; - spv::Id type_float4_; + union { + struct { + spv::Id type_float_; + spv::Id type_float2_; + spv::Id type_float3_; + spv::Id type_float4_; + }; + // Index = component count - 1. + spv::Id type_float_vectors_[4]; + }; spv::Id const_int_0_; spv::Id const_int4_0_; @@ -138,6 +180,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id const_float_0_; spv::Id const_float4_0_; + spv::Id uniform_float_constants_; spv::Id uniform_bool_loop_constants_; // VS as VS only - int. diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 4a051012a..4edf4c6df 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -9,6 +9,8 @@ #include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/base/math.h" + namespace xe { namespace gpu { @@ -21,7 +23,77 @@ void SpirvShaderTranslator::ProcessAluInstruction( UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition); - // TODO(Triang3l): Translate the ALU instruction. + // Floating-point arithmetic operations (addition, subtraction, negation, + // multiplication, dot product, division, modulo - see isArithmeticOperation + // in propagateNoContraction of glslang) must have the NoContraction + // decoration to prevent reordering to make sure floating-point calculations + // are optimized predictably and exactly the same in different shaders to + // allow for multipass rendering (in addition to the Invariant decoration on + // outputs). + + // Whether the instruction has changed the predicate, and it needs to be + // checked again later. + bool predicate_written_vector = false; + ProcessVectorAluOperation(instr, predicate_written_vector); + // TODO(Triang3l): Process the ALU scalar operation. + + if (predicate_written_vector) { + cf_exec_predicate_written_ = true; + CloseInstructionPredication(); + } +} + +spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( + const ParsedAluInstruction& instr, bool& predicate_written) { + predicate_written = false; + + uint32_t used_result_components = + instr.vector_and_constant_result.GetUsedResultComponents(); + if (!used_result_components && + !AluVectorOpHasSideEffects(instr.vector_opcode)) { + return spv::NoResult; + } + uint32_t used_result_component_count = xe::bit_count(used_result_components); + + // Load operand storage without swizzle and sign modifiers. + // A small shortcut, operands of cube are the same, but swizzled. + uint32_t operand_count; + if (instr.vector_opcode == ucode::AluVectorOpcode::kCube) { + operand_count = 1; + } else { + operand_count = instr.vector_operand_count; + } + spv::Id operand_storage[3] = {}; + for (uint32_t i = 0; i < operand_count; ++i) { + operand_storage[i] = LoadOperandStorage(instr.vector_operands[i]); + } + spv::Id result_vector_type = + used_result_component_count + ? type_float_vectors_[used_result_component_count - 1] + : spv::NoType; + + // In case the paired scalar instruction (if processed first) terminates the + // block (like via OpKill). + EnsureBuildPointAvailable(); + + switch (instr.vector_opcode) { + case ucode::AluVectorOpcode::kAdd: { + spv::Id result = builder_->createBinOp( + spv::OpFAdd, result_vector_type, + GetOperandComponents(operand_storage[0], instr.vector_operands[0], + used_result_components), + GetOperandComponents(operand_storage[1], instr.vector_operands[1], + used_result_components)); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } break; + // TODO(Triang3l): Handle all instructions. + default: + break; + } + + // Invalid instruction. + return spv::NoResult; } } // namespace gpu