diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 48b5b6e63..edaad6344 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -235,6 +235,9 @@ void SpirvShaderTranslator::StartTranslation() { var_main_address_relative_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_int4_, "xe_var_address_relative", const_int4_0_); + var_main_previous_scalar_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float_, + "xe_var_previous_scalar", const_float_0_); uint32_t register_array_size = register_count(); if (register_array_size) { id_vector_temp_.clear(); @@ -1128,6 +1131,31 @@ spv::Id SpirvShaderTranslator::GetUnmodifiedOperandComponents( operand_storage, id_vector_temp_util_); } +void SpirvShaderTranslator::GetOperandScalarXY( + spv::Id operand_storage, const InstructionOperand& original_operand, + spv::Id& a_out, spv::Id& b_out, bool invert_negate, bool force_absolute) { + spv::Id a = GetOperandComponents(operand_storage, original_operand, 0b0001, + invert_negate, force_absolute); + a_out = a; + b_out = original_operand.GetComponent(0) != original_operand.GetComponent(1) + ? GetOperandComponents(operand_storage, original_operand, 0b0010, + invert_negate, force_absolute) + : a; +} + +spv::Id SpirvShaderTranslator::GetAbsoluteOperand( + spv::Id operand_storage, const InstructionOperand& original_operand) { + if (original_operand.is_absolute_value && !original_operand.is_negated) { + return operand_storage; + } + EnsureBuildPointAvailable(); + id_vector_temp_util_.clear(); + id_vector_temp_util_.push_back(operand_storage); + return builder_->createBuiltinCall(builder_->getTypeId(operand_storage), + ext_inst_glsl_std_450_, GLSLstd450FAbs, + id_vector_temp_util_); +} + void SpirvShaderTranslator::StoreResult(const InstructionResult& result, spv::Id value) { uint32_t used_write_mask = result.GetUsedWriteMask(); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 395733b62..cdf812791 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -143,18 +143,38 @@ class SpirvShaderTranslator : public ShaderTranslator { components), original_operand, invert_negate, force_absolute); } + // If components are identical, the same Id will be written to both outputs. + void GetOperandScalarXY(spv::Id operand_storage, + const InstructionOperand& original_operand, + spv::Id& a_out, spv::Id& b_out, + bool invert_negate = false, + bool force_absolute = false); + // Gets the absolute value of the loaded operand if it's not absolute already. + spv::Id GetAbsoluteOperand(spv::Id operand_storage, + const InstructionOperand& original_operand); // The type of the value must be a float vector consisting of // xe::bit_count(result.GetUsedResultComponents()) elements, or (to replicate // a scalar into all used components) float, or the value can be spv::NoResult // if there's no result to store (like constants only). void StoreResult(const InstructionResult& result, spv::Id value); + // For Shader Model 3 multiplication (+-0 or denormal * anything = +0), + // replaces the value with +0 if the minimum of the two operands is 0. This + // must be called with absolute values of operands - use GetAbsoluteOperand! + spv::Id ZeroIfAnyOperandIsZero(spv::Id value, spv::Id operand_0_abs, + spv::Id operand_1_abs); // Return type is a xe::bit_count(result.GetUsedResultComponents())-component // float vector or a single float, depending on whether it's a reduction // instruction (check getTypeId of the result), or returns spv::NoResult if // nothing to store. spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr, bool& predicate_written); + // Returns a float value to write to the previous scalar register and to the + // destination. If the return value is ps itself (in the retain_prev case), + // returns spv::NoResult (handled as a special case, so if it's retain_prev, + // but don't need to write to anywhere, no OpLoad(ps) will be done). + spv::Id ProcessScalarAluOperation(const ParsedAluInstruction& instr, + bool& predicate_written); Features features_; @@ -249,6 +269,8 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id var_main_address_relative_; // int. spv::Id var_main_address_absolute_; + // float. + spv::Id var_main_previous_scalar_; // float4[register_count()]. spv::Id var_main_registers_; // VS only - float3 (special exports). diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 6f4d4397e..0aaf46473 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -9,6 +9,7 @@ #include "xenia/gpu/spirv_shader_translator.h" +#include #include #include "third_party/glslang/SPIRV/GLSL.std.450.h" @@ -18,6 +19,28 @@ namespace xe { namespace gpu { +spv::Id SpirvShaderTranslator::ZeroIfAnyOperandIsZero(spv::Id value, + spv::Id operand_0_abs, + spv::Id operand_1_abs) { + EnsureBuildPointAvailable(); + int num_components = builder_->getNumComponents(value); + assert_true(builder_->getNumComponents(operand_0_abs) == num_components); + assert_true(builder_->getNumComponents(operand_1_abs) == num_components); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(2); + id_vector_temp_util_.push_back(operand_0_abs); + id_vector_temp_util_.push_back(operand_1_abs); + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp( + spv::OpFOrdEqual, type_bool_vectors_[num_components - 1], + builder_->createBuiltinCall(type_float_vectors_[num_components - 1], + ext_inst_glsl_std_450_, GLSLstd450NMin, + id_vector_temp_util_), + const_float_vectors_0_[num_components - 1]), + const_float_vectors_0_[num_components - 1], value); +} + void SpirvShaderTranslator::ProcessAluInstruction( const ParsedAluInstruction& instr) { if (instr.IsNop()) { @@ -42,11 +65,25 @@ void SpirvShaderTranslator::ProcessAluInstruction( bool predicate_written_vector = false; spv::Id vector_result = ProcessVectorAluOperation(instr, predicate_written_vector); - // TODO(Triang3l): Process the ALU scalar operation. + bool predicate_written_scalar = false; + spv::Id scalar_result = + ProcessScalarAluOperation(instr, predicate_written_scalar); + + if (scalar_result != spv::NoResult) { + builder_->createStore(scalar_result, var_main_previous_scalar_); + } else { + // Special retain_prev case - load ps only if needed and don't store the + // same value back to ps. + if (instr.scalar_result.GetUsedWriteMask()) { + scalar_result = + builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision); + } + } StoreResult(instr.vector_and_constant_result, vector_result); + StoreResult(instr.scalar_result, scalar_result); - if (predicate_written_vector) { + if (predicate_written_vector || predicate_written_scalar) { cf_exec_predicate_written_ = true; CloseInstructionPredication(); } @@ -186,15 +223,8 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( // Check if the different components in any of the operands are zero, // even if the other is NaN - if min(|a|, |b|) is 0. for (uint32_t i = 0; i < 2; ++i) { - if (instr.vector_operands[i].is_absolute_value && - !instr.vector_operands[i].is_negated) { - continue; - } - id_vector_temp_.clear(); - id_vector_temp_.push_back(different_operands[i]); - different_operands[i] = builder_->createBuiltinCall( - different_type, ext_inst_glsl_std_450_, GLSLstd450FAbs, - id_vector_temp_); + different_operands[i] = GetAbsoluteOperand(different_operands[i], + instr.vector_operands[i]); } id_vector_temp_.clear(); id_vector_temp_.reserve(2); @@ -465,32 +495,12 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( builder_->addDecoration(product, spv::DecorationNoContraction); if (different & (1 << i)) { // Shader Model 3: +0 or denormal * anything = +-0. - // Check if the different components in any of the operands are zero, - // even if the other is NaN - if min(|a|, |b|) is 0, if yes, replace - // the result with zero. - for (uint32_t j = 0; j < 2; ++j) { - if (instr.vector_operands[j].is_absolute_value && - !instr.vector_operands[j].is_negated) { - continue; - } - id_vector_temp_.clear(); - id_vector_temp_.push_back(operand_components[j]); - operand_components[j] = - builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, - GLSLstd450FAbs, id_vector_temp_); - } - id_vector_temp_.clear(); - id_vector_temp_.reserve(2); - id_vector_temp_.push_back(operand_components[0]); - id_vector_temp_.push_back(operand_components[1]); - product = builder_->createTriOp( - spv::OpSelect, type_float_, - builder_->createBinOp(spv::OpFOrdEqual, type_bool_, - builder_->createBuiltinCall( - type_float_, ext_inst_glsl_std_450_, - GLSLstd450NMin, id_vector_temp_), - const_float_0_), - const_float_0_, product); + product = ZeroIfAnyOperandIsZero( + product, + GetAbsoluteOperand(operand_components[0], + instr.vector_operands[0]), + GetAbsoluteOperand(operand_components[1], + instr.vector_operands[1])); } if (!i) { result = product; @@ -888,29 +898,11 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( if (!(instr.vector_operands[0].GetIdenticalComponents( instr.vector_operands[1]) & 0b0010)) { - for (uint32_t i = 0; i < 2; ++i) { - if (instr.vector_operands[i].is_absolute_value && - !instr.vector_operands[i].is_negated) { - continue; - } - id_vector_temp_.clear(); - id_vector_temp_.push_back(operands_y[i]); - operands_y[i] = - builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, - GLSLstd450FAbs, id_vector_temp_); - } - id_vector_temp_.clear(); - id_vector_temp_.reserve(2); - id_vector_temp_.push_back(operands_y[0]); - id_vector_temp_.push_back(operands_y[1]); - result_y = builder_->createTriOp( - spv::OpSelect, type_float_, - builder_->createBinOp(spv::OpFOrdEqual, type_bool_, - builder_->createBuiltinCall( - type_float_, ext_inst_glsl_std_450_, - GLSLstd450NMin, id_vector_temp_), - const_float_0_), - const_float_0_, result_y); + // Shader Model 3: +0 or denormal * anything = +-0. + result_y = ZeroIfAnyOperandIsZero( + result_y, + GetAbsoluteOperand(operands_y[0], instr.vector_operands[0]), + GetAbsoluteOperand(operands_y[1], instr.vector_operands[1])); } } id_vector_temp_.clear(); @@ -953,5 +945,232 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( return spv::NoResult; } +spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( + const ParsedAluInstruction& instr, bool& predicate_written) { + predicate_written = false; + + spv::Id operand_storage[2] = {}; + for (uint32_t i = 0; i < instr.scalar_operand_count; ++i) { + operand_storage[i] = LoadOperandStorage(instr.scalar_operands[i]); + } + + // In case the paired vector instruction (if processed first) terminates the + // block (like via OpKill). + EnsureBuildPointAvailable(); + + // Lookup table for variants of instructions with similar structure. + static const unsigned int kOps[] = { + static_cast(spv::OpFAdd), // kAdds + static_cast(spv::OpFAdd), // kAddsPrev + static_cast(spv::OpNop), // kMuls + static_cast(spv::OpNop), // kMulsPrev + static_cast(spv::OpNop), // kMulsPrev2 + static_cast(spv::OpNop), // kMaxs + static_cast(spv::OpNop), // kMins + static_cast(spv::OpNop), // kSeqs + static_cast(spv::OpNop), // kSgts + static_cast(spv::OpNop), // kSges + static_cast(spv::OpNop), // kSnes + static_cast(spv::OpNop), // kFrcs + static_cast(spv::OpNop), // kTruncs + static_cast(spv::OpNop), // kFloors + static_cast(spv::OpNop), // kExp + static_cast(spv::OpNop), // kLogc + static_cast(spv::OpNop), // kLog + static_cast(spv::OpNop), // kRcpc + static_cast(spv::OpNop), // kRcpf + static_cast(spv::OpNop), // kRcp + static_cast(spv::OpNop), // kRsqc + static_cast(spv::OpNop), // kRsqf + static_cast(spv::OpNop), // kRsq + static_cast(spv::OpNop), // kMaxAs + static_cast(spv::OpNop), // kMaxAsf + static_cast(spv::OpFSub), // kSubs + static_cast(spv::OpFSub), // kSubsPrev + static_cast(spv::OpNop), // kSetpEq + static_cast(spv::OpNop), // kSetpNe + static_cast(spv::OpNop), // kSetpGt + static_cast(spv::OpNop), // kSetpGe + static_cast(spv::OpNop), // kSetpInv + static_cast(spv::OpNop), // kSetpPop + static_cast(spv::OpNop), // kSetpClr + static_cast(spv::OpNop), // kSetpRstr + static_cast(spv::OpNop), // kKillsEq + static_cast(spv::OpNop), // kKillsGt + static_cast(spv::OpNop), // kKillsGe + static_cast(spv::OpNop), // kKillsNe + static_cast(spv::OpNop), // kKillsOne + static_cast(spv::OpNop), // kSqrt + static_cast(spv::OpNop), // Invalid + static_cast(spv::OpNop), // kMulsc0 + static_cast(spv::OpNop), // kMulsc1 + static_cast(spv::OpNop), // kAddsc0 + static_cast(spv::OpNop), // kAddsc1 + static_cast(spv::OpNop), // kSubsc0 + static_cast(spv::OpNop), // kSubsc1 + static_cast(spv::OpNop), // kSin + static_cast(spv::OpNop), // kCos + static_cast(spv::OpNop), // kRetainPrev + }; + + switch (instr.scalar_opcode) { + case ucode::AluScalarOpcode::kAdds: + case ucode::AluScalarOpcode::kSubs: { + spv::Id a, b; + GetOperandScalarXY(operand_storage[0], instr.scalar_operands[0], a, b); + spv::Id result = builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_float_, a, b); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + case ucode::AluScalarOpcode::kAddsPrev: + case ucode::AluScalarOpcode::kSubsPrev: { + spv::Id result = builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_float_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision)); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + case ucode::AluScalarOpcode::kMuls: { + spv::Id a, b; + GetOperandScalarXY(operand_storage[0], instr.scalar_operands[0], a, b); + spv::Id result = builder_->createBinOp(spv::OpFMul, type_float_, a, b); + builder_->addDecoration(result, spv::DecorationNoContraction); + if (a != b) { + // Shader Model 3: +0 or denormal * anything = +-0. + result = ZeroIfAnyOperandIsZero( + result, GetAbsoluteOperand(a, instr.vector_operands[0]), + GetAbsoluteOperand(b, instr.vector_operands[0])); + } + return result; + } + case ucode::AluScalarOpcode::kMulsPrev: { + spv::Id a = GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0001); + spv::Id ps = + builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision); + spv::Id result = builder_->createBinOp(spv::OpFMul, type_float_, a, ps); + builder_->addDecoration(result, spv::DecorationNoContraction); + // Shader Model 3: +0 or denormal * anything = +-0. + id_vector_temp_.clear(); + id_vector_temp_.push_back(ps); + return ZeroIfAnyOperandIsZero( + result, GetAbsoluteOperand(a, instr.scalar_operands[0]), + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450FAbs, id_vector_temp_)); + } + case ucode::AluScalarOpcode::kMulsPrev2: { + // Check if need to select the src0.a * ps case. + // Selection merge must be the penultimate instruction in the block, check + // the condition before it. + spv::Id ps = + builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision); + // ps != -FLT_MAX. + spv::Id const_float_max_neg = builder_->makeFloatConstant(-FLT_MAX); + spv::Id condition = builder_->createBinOp( + spv::OpFUnordNotEqual, type_bool_, ps, const_float_max_neg); + // isfinite(ps), or |ps| <= FLT_MAX, or -|ps| >= -FLT_MAX, since -FLT_MAX + // is already loaded to an SGPR, this is also false if it's NaN. + id_vector_temp_.clear(); + id_vector_temp_.push_back(ps); + spv::Id ps_abs = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_); + spv::Id ps_abs_neg = + builder_->createUnaryOp(spv::OpFNegate, type_float_, ps_abs); + builder_->addDecoration(ps_abs_neg, spv::DecorationNoContraction); + condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, condition, + builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, + ps_abs_neg, const_float_max_neg)); + // isfinite(src0.b), or -|src0.b| >= -FLT_MAX for the same reason. + spv::Id b = GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0010); + spv::Id b_abs_neg = b; + if (!instr.scalar_operands[0].is_absolute_value) { + id_vector_temp_.clear(); + id_vector_temp_.push_back(b_abs_neg); + b_abs_neg = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450FAbs, id_vector_temp_); + } + if (!instr.scalar_operands[0].is_absolute_value || + !instr.scalar_operands[0].is_negated) { + b_abs_neg = + builder_->createUnaryOp(spv::OpFNegate, type_float_, b_abs_neg); + builder_->addDecoration(b_abs_neg, spv::DecorationNoContraction); + } + condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, condition, + builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, + b_abs_neg, const_float_max_neg)); + // src0.b > 0 (need !(src0.b <= 0), but src0.b has already been checked + // for NaN). + condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, condition, + builder_->createBinOp(spv::OpFOrdGreaterThan, type_bool_, b, + const_float_0_)); + spv::Block& multiply_block = builder_->makeNewBlock(); + spv::Block& merge_block = builder_->makeNewBlock(); + { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(merge_block.getId()); + selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); + builder_->getBuildPoint()->addInstruction( + std::move(selection_merge_op)); + } + { + std::unique_ptr branch_conditional_op = + std::make_unique(spv::OpBranchConditional); + branch_conditional_op->addIdOperand(condition); + branch_conditional_op->addIdOperand(multiply_block.getId()); + branch_conditional_op->addIdOperand(merge_block.getId()); + // More likely to multiply that to return -FLT_MAX. + branch_conditional_op->addImmediateOperand(2); + branch_conditional_op->addImmediateOperand(1); + builder_->getBuildPoint()->addInstruction( + std::move(branch_conditional_op)); + } + spv::Block& head_block = *builder_->getBuildPoint(); + multiply_block.addPredecessor(&head_block); + merge_block.addPredecessor(&head_block); + // Multiplication case. + builder_->setBuildPoint(&multiply_block); + spv::Id a = instr.scalar_operands[0].GetComponent(0) != + instr.scalar_operands[0].GetComponent(1) + ? GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0001) + : b; + spv::Id product = builder_->createBinOp(spv::OpFMul, type_float_, a, ps); + builder_->addDecoration(product, spv::DecorationNoContraction); + // Shader Model 3: +0 or denormal * anything = +-0. + product = ZeroIfAnyOperandIsZero( + product, GetAbsoluteOperand(a, instr.scalar_operands[0]), ps_abs); + builder_->createBranch(&merge_block); + // Merge case - choose between the product and -FLT_MAX. + builder_->setBuildPoint(&merge_block); + { + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + phi_op->addIdOperand(product); + phi_op->addIdOperand(multiply_block.getId()); + phi_op->addIdOperand(const_float_max_neg); + phi_op->addIdOperand(head_block.getId()); + spv::Id phi_result = phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + return phi_result; + } + } + // TODO(Triang3l): Implement the rest of instructions. + } + + /* assert_unhandled_case(instr.vector_opcode); + EmitTranslationError("Unknown ALU scalar operation"); */ + return spv::NoResult; +} + } // namespace gpu } // namespace xe