diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 7ab242724..7830174db 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -1057,8 +1057,11 @@ class DxbcShaderTranslator : public ShaderTranslator { // cubemap coordinate. void ArrayCoordToCubeDirection(uint32_t reg); - void ProcessVectorAluInstruction(const ParsedAluInstruction& instr); - void ProcessScalarAluInstruction(const ParsedAluInstruction& instr); + bool ProcessVectorAluOperation(const ParsedAluInstruction& instr, + bool& replicate_result_x, + bool& predicate_written); + bool ProcessScalarAluOperation(const ParsedAluInstruction& instr, + bool& predicate_written); // Appends a string to a DWORD stream, returns the DWORD-aligned length. static uint32_t AppendString(std::vector& dest, const char* source); @@ -1206,7 +1209,8 @@ class DxbcShaderTranslator : public ShaderTranslator { // eM# in each `alloc export`, or UINT32_MAX if not used. uint32_t system_temps_memexport_data_[kMaxMemExports][5]; - // Vector ALU result/scratch (since Xenos write masks can contain swizzles). + // Vector ALU result or fetch scratch (since Xenos write masks can contain + // swizzles). uint32_t system_temp_pv_; // Temporary register ID for previous scalar result, program counter, // predicate and absolute address register. diff --git a/src/xenia/gpu/dxbc_shader_translator_alu.cc b/src/xenia/gpu/dxbc_shader_translator_alu.cc index b2bf60bec..bba16c647 100644 --- a/src/xenia/gpu/dxbc_shader_translator_alu.cc +++ b/src/xenia/gpu/dxbc_shader_translator_alu.cc @@ -17,29 +17,22 @@ namespace xe { namespace gpu { using namespace ucode; -void DxbcShaderTranslator::ProcessVectorAluInstruction( - const ParsedAluInstruction& instr) { - if (FLAGS_dxbc_source_map) { - instruction_disassembly_buffer_.Reset(); - instr.Disassemble(&instruction_disassembly_buffer_); - // Will be emitted by UpdateInstructionPredication. - } - UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition, - true); - // Whether the instruction has changed the predicate and it needs to be - // checked again later. - bool predicate_written = false; +bool DxbcShaderTranslator::ProcessVectorAluOperation( + const ParsedAluInstruction& instr, bool& replicate_result_x, + bool& predicate_written) { + replicate_result_x = false; + predicate_written = false; - // Whether the result is only in X and all components should be remapped to X - // while storing. - bool replicate_result = false; + if (!instr.has_vector_op) { + return false; + } // A small shortcut, operands of cube are the same, but swizzled. uint32_t operand_count; if (instr.vector_opcode == AluVectorOpcode::kCube) { operand_count = 1; } else { - operand_count = uint32_t(instr.operand_count); + operand_count = uint32_t(instr.vector_operand_count); } DxbcSourceOperand dxbc_operands[3]; // Whether the operand is the same as any previous operand, and thus is loaded @@ -47,9 +40,9 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( bool operands_duplicate[3] = {}; uint32_t operand_length_sums[3]; for (uint32_t i = 0; i < operand_count; ++i) { - const InstructionOperand& operand = instr.operands[i]; + const InstructionOperand& operand = instr.vector_operands[i]; for (uint32_t j = 0; j < i; ++j) { - if (operand == instr.operands[j]) { + if (operand == instr.vector_operands[j]) { operands_duplicate[i] = true; dxbc_operands[i] = dxbc_operands[j]; break; @@ -98,6 +91,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( D3D10_SB_OPCODE_MAX, }; + bool translated = true; switch (instr.vector_opcode) { case AluVectorOpcode::kAdd: shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | @@ -123,7 +117,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( UseDxbcSourceOperand(dxbc_operands[1]); ++stat_.instruction_count; ++stat_.float_instruction_count; - if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) { + if (!instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) { // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0), // flushing denormals (must be done using eq - doing bitwise comparison // doesn't flush denormals). @@ -287,7 +281,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( UseDxbcSourceOperand(dxbc_operands[2]); ++stat_.instruction_count; ++stat_.float_instruction_count; - if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) { + if (!instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) { // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). // If any operand is zero or denormalized, just leave the addition part. uint32_t is_subnormal_temp = PushSystemTemp(); @@ -394,7 +388,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( case AluVectorOpcode::kDp4: case AluVectorOpcode::kDp3: case AluVectorOpcode::kDp2Add: { - if (instr.operands[0].EqualsAbsolute(instr.operands[1])) { + if (instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) { // The operands are the same when calculating vector length, no need to // emulate 0 * anything = 0 in this case. shader_code_.push_back( @@ -858,7 +852,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( } break; case AluVectorOpcode::kMax4: - replicate_result = true; + replicate_result_x = true; // pv.xy = max(src0.xy, src0.zw) shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( @@ -891,7 +885,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( case AluVectorOpcode::kSetpGtPush: case AluVectorOpcode::kSetpGePush: predicate_written = true; - replicate_result = true; + replicate_result_x = true; // pv.xy = (src0.x == 0.0, src0.w == 0.0) shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( @@ -997,7 +991,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( case AluVectorOpcode::kKillGt: case AluVectorOpcode::kKillGe: case AluVectorOpcode::kKillNe: - replicate_result = true; + replicate_result_x = true; // pv = src0 op src1 shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE( kCoreOpcodes[uint32_t(instr.vector_opcode)]) | @@ -1094,7 +1088,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1); ++stat_.instruction_count; ++stat_.float_instruction_count; - if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) { + if (!instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) { // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). // This is an attenuation calculation function, so infinity is probably // not very unlikely. @@ -1277,8 +1271,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( break; default: - assert_always(); - // Unknown instruction - don't modify pv. + assert_unhandled_case(instr.vector_opcode); + translated = false; break; } @@ -1289,37 +1283,26 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( } } - StoreResult(instr.result, system_temp_pv_, replicate_result, - instr.GetMemExportStreamConstant() != UINT32_MAX); - - if (predicate_written) { - cf_exec_predicate_written_ = true; - CloseInstructionPredication(); - } + return translated; } -void DxbcShaderTranslator::ProcessScalarAluInstruction( - const ParsedAluInstruction& instr) { - if (FLAGS_dxbc_source_map) { - instruction_disassembly_buffer_.Reset(); - instr.Disassemble(&instruction_disassembly_buffer_); - // Will be emitted by UpdateInstructionPredication. +bool DxbcShaderTranslator::ProcessScalarAluOperation( + const ParsedAluInstruction& instr, bool& predicate_written) { + predicate_written = false; + + if (!instr.has_scalar_op) { + return false; } - UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition, - true); - // Whether the instruction has changed the predicate and it needs to be - // checked again later. - bool predicate_written = false; DxbcSourceOperand dxbc_operands[3]; // Whether the operand is the same as any previous operand, and thus is loaded // only once. bool operands_duplicate[3] = {}; uint32_t operand_lengths[3]; - for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) { - const InstructionOperand& operand = instr.operands[i]; + for (uint32_t i = 0; i < uint32_t(instr.scalar_operand_count); ++i) { + const InstructionOperand& operand = instr.scalar_operands[i]; for (uint32_t j = 0; j < i; ++j) { - if (operand == instr.operands[j]) { + if (operand == instr.scalar_operands[j]) { operands_duplicate[i] = true; dxbc_operands[i] = dxbc_operands[j]; break; @@ -1385,6 +1368,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( D3D10_SB_OPCODE_SINCOS, }; + bool translated = true; switch (instr.scalar_opcode) { case AluScalarOpcode::kAdds: case AluScalarOpcode::kSubs: { @@ -1431,7 +1415,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1); ++stat_.instruction_count; ++stat_.float_instruction_count; - if (instr.operands[0].components[0] != instr.operands[0].components[1]) { + if (instr.scalar_operands[0].components[0] != + instr.scalar_operands[0].components[1]) { // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). uint32_t is_subnormal_temp = PushSystemTemp(); // Get the non-NaN multiplicand closer to zero to check if any of them @@ -1679,7 +1664,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( case AluScalarOpcode::kMaxs: case AluScalarOpcode::kMins: { // max is commonly used as mov. - if (instr.operands[0].components[0] == instr.operands[0].components[1]) { + if (instr.scalar_operands[0].components[0] == + instr.scalar_operands[0].components[1]) { shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + @@ -1990,7 +1976,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( ++stat_.instruction_count; ++stat_.conversion_instruction_count; // The `ps = max(src0.x, src0.y)` part. - if (instr.operands[0].components[0] == instr.operands[0].components[1]) { + if (instr.scalar_operands[0].components[0] == + instr.scalar_operands[0].components[1]) { shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + @@ -2308,7 +2295,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0); ++stat_.instruction_count; ++stat_.float_instruction_count; - if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) { + if (!instr.scalar_operands[0].EqualsAbsolute(instr.scalar_operands[1])) { // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). uint32_t is_subnormal_temp = PushSystemTemp(); // Get the non-NaN multiplicand closer to zero to check if any of them @@ -2407,38 +2394,62 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( ++stat_.float_instruction_count; } break; + case AluScalarOpcode::kRetainPrev: + // No changes, but translated successfully (just write the old ps). + break; + default: - // May be retain_prev, in this case the current ps should be written, or - // something invalid that's better to ignore. - assert_true(instr.scalar_opcode == AluScalarOpcode::kRetainPrev); + assert_unhandled_case(instr.scalar_opcode); + translated = false; break; } - for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) { - UnloadDxbcSourceOperand(dxbc_operands[instr.operand_count - 1 - i]); + for (uint32_t i = 0; i < uint32_t(instr.scalar_operand_count); ++i) { + UnloadDxbcSourceOperand(dxbc_operands[instr.scalar_operand_count - 1 - i]); } - StoreResult(instr.result, system_temp_ps_pc_p0_a0_, true); + return translated; +} - if (predicate_written) { +void DxbcShaderTranslator::ProcessAluInstruction( + const ParsedAluInstruction& instr) { + if (instr.is_nop()) { + return; + } + + if (FLAGS_dxbc_source_map) { + instruction_disassembly_buffer_.Reset(); + instr.Disassemble(&instruction_disassembly_buffer_); + // Will be emitted by UpdateInstructionPredication. + } + UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition, + true); + + // Whether the instruction has changed the predicate and it needs to be + // checked again later. + bool predicate_written_vector = false; + // Whether the result is only in X and all components should be remapped to X + // while storing. + bool replicate_vector_x = false; + bool store_vector = ProcessVectorAluOperation(instr, replicate_vector_x, + predicate_written_vector); + bool predicate_written_scalar = false; + bool store_scalar = + ProcessScalarAluOperation(instr, predicate_written_scalar); + + if (store_vector) { + StoreResult(instr.vector_result, system_temp_pv_, replicate_vector_x, + instr.GetMemExportStreamConstant() != UINT32_MAX); + } + if (store_scalar) { + StoreResult(instr.scalar_result, system_temp_ps_pc_p0_a0_, true); + } + + if (predicate_written_vector || predicate_written_scalar) { cf_exec_predicate_written_ = true; CloseInstructionPredication(); } } -void DxbcShaderTranslator::ProcessAluInstruction( - const ParsedAluInstruction& instr) { - switch (instr.type) { - case ParsedAluInstruction::Type::kNop: - break; - case ParsedAluInstruction::Type::kVector: - ProcessVectorAluInstruction(instr); - break; - case ParsedAluInstruction::Type::kScalar: - ProcessScalarAluInstruction(instr); - break; - } -} - } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/glsl_shader_translator.cc b/src/xenia/gpu/glsl_shader_translator.cc index e1e8baff4..f1c334dc5 100644 --- a/src/xenia/gpu/glsl_shader_translator.cc +++ b/src/xenia/gpu/glsl_shader_translator.cc @@ -820,18 +820,36 @@ void GlslShaderTranslator::ProcessTextureFetchInstruction( void GlslShaderTranslator::ProcessAluInstruction( const ParsedAluInstruction& instr) { - EmitSource("// "); + EmitSource("/*\n"); instr.Disassemble(&source_); + EmitSource("*/\n"); - switch (instr.type) { - case ParsedAluInstruction::Type::kNop: - break; - case ParsedAluInstruction::Type::kVector: - ProcessVectorAluInstruction(instr); - break; - case ParsedAluInstruction::Type::kScalar: - ProcessScalarAluInstruction(instr); - break; + if (instr.is_nop()) { + return; + } + + // Emit if statement only if we have a different predicate condition than our + // containing block. + bool conditional = false; + if (instr.is_predicated && + (!cf_exec_pred_ || (cf_exec_pred_cond_ != instr.predicate_condition))) { + conditional = true; + EmitSourceDepth("if (%cp0) {\n", instr.predicate_condition ? ' ' : '!'); + Indent(); + } + + bool store_vector = ProcessVectorAluOperation(instr); + bool store_scalar = ProcessScalarAluOperation(instr); + if (store_vector) { + EmitStoreVectorResult(instr.vector_result); + } + if (store_scalar) { + EmitStoreScalarResult(instr.scalar_result); + } + + if (conditional) { + Unindent(); + EmitSourceDepth("}\n"); } } @@ -1041,20 +1059,14 @@ void GlslShaderTranslator::EmitStoreResult(const InstructionResult& result, EmitSource(";\n"); } -void GlslShaderTranslator::ProcessVectorAluInstruction( +bool GlslShaderTranslator::ProcessVectorAluOperation( const ParsedAluInstruction& instr) { - // Emit if statement only if we have a different predicate condition than our - // containing block. - bool conditional = false; - if (instr.is_predicated && - (!cf_exec_pred_ || (cf_exec_pred_cond_ != instr.predicate_condition))) { - conditional = true; - EmitSourceDepth("if (%cp0) {\n", instr.predicate_condition ? ' ' : '!'); - Indent(); + if (!instr.has_vector_op) { + return false; } - for (size_t i = 0; i < instr.operand_count; ++i) { - EmitLoadOperand(i, instr.operands[i]); + for (size_t i = 0; i < instr.vector_operand_count; ++i) { + EmitLoadOperand(i, instr.vector_operands[i]); } switch (instr.vector_opcode) { @@ -1251,26 +1263,17 @@ void GlslShaderTranslator::ProcessVectorAluInstruction( break; } - EmitStoreVectorResult(instr.result); - - if (conditional) { - Unindent(); - EmitSourceDepth("}\n"); - } + return true; } -void GlslShaderTranslator::ProcessScalarAluInstruction( +bool GlslShaderTranslator::ProcessScalarAluOperation( const ParsedAluInstruction& instr) { - bool conditional = false; - if (instr.is_predicated && - (!cf_exec_pred_ || (cf_exec_pred_cond_ != instr.predicate_condition))) { - conditional = true; - EmitSourceDepth("if (%cp0) {\n", instr.predicate_condition ? ' ' : '!'); - Indent(); + if (!instr.has_scalar_op) { + return false; } - for (size_t i = 0; i < instr.operand_count; ++i) { - EmitLoadOperand(i, instr.operands[i]); + for (size_t i = 0; i < instr.scalar_operand_count; ++i) { + EmitLoadOperand(i, instr.scalar_operands[i]); } switch (instr.scalar_opcode) { @@ -1595,12 +1598,7 @@ void GlslShaderTranslator::ProcessScalarAluInstruction( break; } - EmitStoreScalarResult(instr.result); - - if (conditional) { - Unindent(); - EmitSourceDepth("}\n"); - } + return true; } } // namespace gpu diff --git a/src/xenia/gpu/glsl_shader_translator.h b/src/xenia/gpu/glsl_shader_translator.h index a346f1a8d..e014a0dd4 100644 --- a/src/xenia/gpu/glsl_shader_translator.h +++ b/src/xenia/gpu/glsl_shader_translator.h @@ -77,8 +77,8 @@ class GlslShaderTranslator : public ShaderTranslator { bool cf_exec_pred_ = false; bool cf_exec_pred_cond_ = false; - void ProcessVectorAluInstruction(const ParsedAluInstruction& instr); - void ProcessScalarAluInstruction(const ParsedAluInstruction& instr); + bool ProcessVectorAluOperation(const ParsedAluInstruction& instr); + bool ProcessScalarAluOperation(const ParsedAluInstruction& instr); }; } // namespace gpu diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 1ed7ac23e..ed2dcf8fd 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -459,51 +459,62 @@ struct ParsedAluInstruction { // Index into the ucode dword source. uint32_t dword_index = 0; - enum class Type { - kNop, - kVector, - kScalar, - }; - // Type of the instruction. - Type type = Type::kNop; - bool is_nop() const { return type == Type::kNop; } - bool is_vector_type() const { return type == Type::kVector; } - bool is_scalar_type() const { return type == Type::kScalar; } - // Opcode for the instruction if it is a vector type. - ucode::AluVectorOpcode vector_opcode = ucode::AluVectorOpcode::kAdd; - // Opcode for the instruction if it is a scalar type. - ucode::AluScalarOpcode scalar_opcode = ucode::AluScalarOpcode::kAdds; - // Friendly name of the instruction. - const char* opcode_name = nullptr; + // True if the vector part of the instruction needs to be executed and data + // about it in this structure is valid. + bool has_vector_op = false; + // True if the scalar part of the instruction needs to be executed and data + // about it in this structure is valid. + bool has_scalar_op = false; + bool is_nop() const { return !has_vector_op && !has_scalar_op; } + + // Opcode for the vector part of the instruction. + ucode::AluVectorOpcode vector_opcode = ucode::AluVectorOpcode::kAdd; + // Opcode for the scalar part of the instruction. + ucode::AluScalarOpcode scalar_opcode = ucode::AluScalarOpcode::kAdds; + // Friendly name of the vector instruction. + const char* vector_opcode_name = nullptr; + // Friendly name of the scalar instruction. + const char* scalar_opcode_name = nullptr; - // True if the instruction is paired with another instruction. - bool is_paired = false; // True if the instruction is predicated on the specified // predicate_condition. bool is_predicated = false; // Expected predication condition value if predicated. bool predicate_condition = false; - // Describes how the instruction result is stored. - InstructionResult result; + // Describes how the vector operation result is stored. + InstructionResult vector_result; + // Describes how the scalar operation result is stored. + InstructionResult scalar_result; + // Both operations must be executed before any result is stored if vector and + // scalar operations are paired. There are cases of vector result being used + // as scalar operand or vice versa (the halo on Avalanche in Halo 3, for + // example), in this case there must be no dependency between the two + // operations. - // Number of source operands. - size_t operand_count = 0; - // Describes each source operand. - InstructionOperand operands[3]; + // Number of source operands of the vector operation. + size_t vector_operand_count = 0; + // Describes each source operand of the vector operation. + InstructionOperand vector_operands[3]; + // Number of source operands of the scalar operation. + size_t scalar_operand_count = 0; + // Describes each source operand of the scalar operation. + InstructionOperand scalar_operands[2]; // If this is a valid eA write (MAD with a stream constant), returns the index // of the stream float constant, otherwise returns UINT32_MAX. uint32_t GetMemExportStreamConstant() const { - if (result.storage_target == InstructionStorageTarget::kExportAddress && - is_vector_type() && vector_opcode == ucode::AluVectorOpcode::kMad && - result.has_all_writes() && - operands[2].storage_source == + if (has_vector_op && + vector_result.storage_target == + InstructionStorageTarget::kExportAddress && + vector_opcode == ucode::AluVectorOpcode::kMad && + vector_result.has_all_writes() && + vector_operands[2].storage_source == InstructionStorageSource::kConstantFloat && - operands[2].storage_addressing_mode == + vector_operands[2].storage_addressing_mode == InstructionStorageAddressingMode::kStatic && - operands[2].is_standard_swizzle()) { - return operands[2].storage_index; + vector_operands[2].is_standard_swizzle()) { + return vector_operands[2].storage_index; } return UINT32_MAX; } diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index f4a15b0ee..a01055f3e 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -1124,28 +1124,19 @@ const ShaderTranslator::AluOpcodeInfo }; void ShaderTranslator::TranslateAluInstruction(const AluInstruction& op) { - if (!op.has_vector_op() && !op.has_scalar_op()) { - ParsedAluInstruction instr; - instr.type = ParsedAluInstruction::Type::kNop; - instr.Disassemble(&ucode_disasm_buffer_); - ProcessAluInstruction(instr); - return; - } - ParsedAluInstruction instr; - if (op.has_vector_op()) { - const auto& opcode_info = - alu_vector_opcode_infos_[static_cast(op.vector_opcode())]; - ParseAluVectorInstruction(op, opcode_info, instr); - ProcessAluInstruction(instr); - } - if (op.has_scalar_op()) { - const auto& opcode_info = - alu_scalar_opcode_infos_[static_cast(op.scalar_opcode())]; - ParseAluScalarInstruction(op, opcode_info, instr); - ProcessAluInstruction(instr); - } + instr.dword_index = 0; + + instr.is_predicated = op.is_predicated(); + instr.predicate_condition = op.predicate_condition(); + + ParseAluVectorOperation(op, instr); + ParseAluScalarOperation(op, instr); + + instr.Disassemble(&ucode_disasm_buffer_); + + ProcessAluInstruction(instr); } void ParseAluInstructionOperand(const AluInstruction& op, int i, @@ -1238,62 +1229,64 @@ void ParseAluInstructionOperandSpecial(const AluInstruction& op, out_op->components[0] = GetSwizzleFromComponentIndex(a); } -void ShaderTranslator::ParseAluVectorInstruction( - const AluInstruction& op, const AluOpcodeInfo& opcode_info, - ParsedAluInstruction& i) { - i.dword_index = 0; - i.type = ParsedAluInstruction::Type::kVector; +void ShaderTranslator::ParseAluVectorOperation(const AluInstruction& op, + ParsedAluInstruction& i) { + i.has_vector_op = op.has_vector_op(); + if (!i.has_vector_op) { + return; + } i.vector_opcode = op.vector_opcode(); - i.opcode_name = opcode_info.name; - i.is_paired = op.has_scalar_op(); - i.is_predicated = op.is_predicated(); - i.predicate_condition = op.predicate_condition(); + const auto& opcode_info = + alu_vector_opcode_infos_[static_cast(op.vector_opcode())]; + i.vector_opcode_name = opcode_info.name; - i.result.is_export = op.is_export(); - i.result.is_clamped = op.vector_clamp(); - i.result.storage_target = InstructionStorageTarget::kRegister; - i.result.storage_index = 0; + i.vector_result.is_export = op.is_export(); + i.vector_result.is_clamped = op.vector_clamp(); + i.vector_result.storage_target = InstructionStorageTarget::kRegister; + i.vector_result.storage_index = 0; uint32_t dest_num = op.vector_dest(); if (!op.is_export()) { assert_true(dest_num < 32); - i.result.storage_target = InstructionStorageTarget::kRegister; - i.result.storage_index = dest_num; - i.result.storage_addressing_mode = + i.vector_result.storage_target = InstructionStorageTarget::kRegister; + i.vector_result.storage_index = dest_num; + i.vector_result.storage_addressing_mode = op.is_vector_dest_relative() ? InstructionStorageAddressingMode::kAddressRelative : InstructionStorageAddressingMode::kStatic; } else if (is_vertex_shader()) { switch (dest_num) { case 32: - i.result.storage_target = InstructionStorageTarget::kExportAddress; + i.vector_result.storage_target = + InstructionStorageTarget::kExportAddress; break; case 33: case 34: case 35: case 36: case 37: - i.result.storage_index = dest_num - 33; - i.result.storage_target = InstructionStorageTarget::kExportData; + i.vector_result.storage_index = dest_num - 33; + i.vector_result.storage_target = InstructionStorageTarget::kExportData; break; case 62: - i.result.storage_target = InstructionStorageTarget::kPosition; + i.vector_result.storage_target = InstructionStorageTarget::kPosition; break; case 63: - i.result.storage_target = InstructionStorageTarget::kPointSize; + i.vector_result.storage_target = InstructionStorageTarget::kPointSize; break; default: if (dest_num < 16) { - i.result.storage_target = InstructionStorageTarget::kInterpolant; - i.result.storage_index = dest_num; + i.vector_result.storage_target = + InstructionStorageTarget::kInterpolant; + i.vector_result.storage_index = dest_num; } else { // Unimplemented. // assert_always(); XELOGE( - "ShaderTranslator::ParseAluVectorInstruction: Unsupported write " - "to export %d", + "ShaderTranslator::ParseAluVectorOperation: Unsupported write to " + "export %d", dest_num); - i.result.storage_target = InstructionStorageTarget::kNone; - i.result.storage_index = 0; + i.vector_result.storage_target = InstructionStorageTarget::kNone; + i.vector_result.storage_index = 0; } break; } @@ -1301,42 +1294,43 @@ void ShaderTranslator::ParseAluVectorInstruction( switch (dest_num) { case 0: case 63: // ? masked? - i.result.storage_target = InstructionStorageTarget::kColorTarget; - i.result.storage_index = 0; + i.vector_result.storage_target = InstructionStorageTarget::kColorTarget; + i.vector_result.storage_index = 0; break; case 1: - i.result.storage_target = InstructionStorageTarget::kColorTarget; - i.result.storage_index = 1; + i.vector_result.storage_target = InstructionStorageTarget::kColorTarget; + i.vector_result.storage_index = 1; break; case 2: - i.result.storage_target = InstructionStorageTarget::kColorTarget; - i.result.storage_index = 2; + i.vector_result.storage_target = InstructionStorageTarget::kColorTarget; + i.vector_result.storage_index = 2; break; case 3: - i.result.storage_target = InstructionStorageTarget::kColorTarget; - i.result.storage_index = 3; + i.vector_result.storage_target = InstructionStorageTarget::kColorTarget; + i.vector_result.storage_index = 3; break; case 32: - i.result.storage_target = InstructionStorageTarget::kExportAddress; + i.vector_result.storage_target = + InstructionStorageTarget::kExportAddress; break; case 33: case 34: case 35: case 36: case 37: - i.result.storage_index = dest_num - 33; - i.result.storage_target = InstructionStorageTarget::kExportData; + i.vector_result.storage_index = dest_num - 33; + i.vector_result.storage_target = InstructionStorageTarget::kExportData; break; case 61: - i.result.storage_target = InstructionStorageTarget::kDepth; + i.vector_result.storage_target = InstructionStorageTarget::kDepth; break; default: XELOGE( - "ShaderTranslator::ParseAluVectorInstruction: Unsupported write " - "to export %d", + "ShaderTranslator::ParseAluVectorOperation: Unsupported write to " + "export %d", dest_num); - i.result.storage_target = InstructionStorageTarget::kNone; - i.result.storage_index = 0; + i.vector_result.storage_target = InstructionStorageTarget::kNone; + i.vector_result.storage_index = 0; } } if (op.is_export()) { @@ -1344,22 +1338,22 @@ void ShaderTranslator::ParseAluVectorInstruction( uint32_t const_1_mask = op.scalar_write_mask(); if (!write_mask) { for (int j = 0; j < 4; ++j) { - i.result.write_mask[j] = false; + i.vector_result.write_mask[j] = false; } } else { for (int j = 0; j < 4; ++j, write_mask >>= 1, const_1_mask >>= 1) { - i.result.write_mask[j] = true; + i.vector_result.write_mask[j] = true; if (write_mask & 0x1) { if (const_1_mask & 0x1) { - i.result.components[j] = SwizzleSource::k1; + i.vector_result.components[j] = SwizzleSource::k1; } else { - i.result.components[j] = GetSwizzleFromComponentIndex(j); + i.vector_result.components[j] = GetSwizzleFromComponentIndex(j); } } else { if (op.is_scalar_dest_relative()) { - i.result.components[j] = SwizzleSource::k0; + i.vector_result.components[j] = SwizzleSource::k0; } else { - i.result.write_mask[j] = false; + i.vector_result.write_mask[j] = false; } } } @@ -1367,45 +1361,44 @@ void ShaderTranslator::ParseAluVectorInstruction( } else { uint32_t write_mask = op.vector_write_mask(); for (int j = 0; j < 4; ++j, write_mask >>= 1) { - i.result.write_mask[j] = (write_mask & 0x1) == 0x1; - i.result.components[j] = GetSwizzleFromComponentIndex(j); + i.vector_result.write_mask[j] = (write_mask & 0x1) == 0x1; + i.vector_result.components[j] = GetSwizzleFromComponentIndex(j); } } - i.operand_count = opcode_info.argument_count; - for (int j = 0; j < i.operand_count; ++j) { - ParseAluInstructionOperand( - op, j + 1, opcode_info.src_swizzle_component_count, &i.operands[j]); + i.vector_operand_count = opcode_info.argument_count; + for (int j = 0; j < i.vector_operand_count; ++j) { + ParseAluInstructionOperand(op, j + 1, + opcode_info.src_swizzle_component_count, + &i.vector_operands[j]); // Track constant float register loads. - if (i.operands[j].storage_source == + if (i.vector_operands[j].storage_source == InstructionStorageSource::kConstantFloat) { - if (i.operands[j].storage_addressing_mode != + if (i.vector_operands[j].storage_addressing_mode != InstructionStorageAddressingMode::kStatic) { // Dynamic addressing makes all constants required. std::memset(constant_register_map_.float_bitmap, 0xFF, sizeof(constant_register_map_.float_bitmap)); } else { - auto register_index = i.operands[j].storage_index; + auto register_index = i.vector_operands[j].storage_index; constant_register_map_.float_bitmap[register_index / 64] |= 1ull << (register_index % 64); } } } - - i.Disassemble(&ucode_disasm_buffer_); } -void ShaderTranslator::ParseAluScalarInstruction( - const AluInstruction& op, const AluOpcodeInfo& opcode_info, - ParsedAluInstruction& i) { - i.dword_index = 0; - i.type = ParsedAluInstruction::Type::kScalar; +void ShaderTranslator::ParseAluScalarOperation(const AluInstruction& op, + ParsedAluInstruction& i) { + i.has_scalar_op = op.has_scalar_op(); + if (!i.has_scalar_op) { + return; + } i.scalar_opcode = op.scalar_opcode(); - i.opcode_name = opcode_info.name; - i.is_paired = op.has_vector_op(); - i.is_predicated = op.is_predicated(); - i.predicate_condition = op.predicate_condition(); + const auto& opcode_info = + alu_scalar_opcode_infos_[static_cast(op.scalar_opcode())]; + i.scalar_opcode_name = opcode_info.name; uint32_t dest_num; uint32_t write_mask; @@ -1416,50 +1409,52 @@ void ShaderTranslator::ParseAluScalarInstruction( dest_num = op.scalar_dest(); write_mask = op.scalar_write_mask(); } - i.result.is_export = op.is_export(); - i.result.is_clamped = op.scalar_clamp(); - i.result.storage_target = InstructionStorageTarget::kRegister; - i.result.storage_index = 0; + i.scalar_result.is_export = op.is_export(); + i.scalar_result.is_clamped = op.scalar_clamp(); + i.scalar_result.storage_target = InstructionStorageTarget::kRegister; + i.scalar_result.storage_index = 0; if (!op.is_export()) { assert_true(dest_num < 32); - i.result.storage_target = InstructionStorageTarget::kRegister; - i.result.storage_index = dest_num; - i.result.storage_addressing_mode = + i.scalar_result.storage_target = InstructionStorageTarget::kRegister; + i.scalar_result.storage_index = dest_num; + i.scalar_result.storage_addressing_mode = op.is_scalar_dest_relative() ? InstructionStorageAddressingMode::kAddressRelative : InstructionStorageAddressingMode::kStatic; } else if (is_vertex_shader()) { switch (dest_num) { case 32: - i.result.storage_target = InstructionStorageTarget::kExportAddress; + i.scalar_result.storage_target = + InstructionStorageTarget::kExportAddress; break; case 33: case 34: case 35: case 36: case 37: - i.result.storage_index = dest_num - 33; - i.result.storage_target = InstructionStorageTarget::kExportData; + i.scalar_result.storage_index = dest_num - 33; + i.scalar_result.storage_target = InstructionStorageTarget::kExportData; break; case 62: - i.result.storage_target = InstructionStorageTarget::kPosition; + i.scalar_result.storage_target = InstructionStorageTarget::kPosition; break; case 63: - i.result.storage_target = InstructionStorageTarget::kPointSize; + i.scalar_result.storage_target = InstructionStorageTarget::kPointSize; break; default: if (dest_num < 16) { - i.result.storage_target = InstructionStorageTarget::kInterpolant; - i.result.storage_index = dest_num; + i.scalar_result.storage_target = + InstructionStorageTarget::kInterpolant; + i.scalar_result.storage_index = dest_num; } else { // Unimplemented. // assert_always(); XELOGE( - "ShaderTranslator::ParseAluScalarInstruction: Unsupported write " - "to export %d", + "ShaderTranslator::ParseAluScalarOperation: Unsupported write to " + "export %d", dest_num); - i.result.storage_target = InstructionStorageTarget::kNone; - i.result.storage_index = 0; + i.scalar_result.storage_target = InstructionStorageTarget::kNone; + i.scalar_result.storage_index = 0; } break; } @@ -1467,46 +1462,47 @@ void ShaderTranslator::ParseAluScalarInstruction( switch (dest_num) { case 0: case 63: // ? masked? - i.result.storage_target = InstructionStorageTarget::kColorTarget; - i.result.storage_index = 0; + i.scalar_result.storage_target = InstructionStorageTarget::kColorTarget; + i.scalar_result.storage_index = 0; break; case 1: - i.result.storage_target = InstructionStorageTarget::kColorTarget; - i.result.storage_index = 1; + i.scalar_result.storage_target = InstructionStorageTarget::kColorTarget; + i.scalar_result.storage_index = 1; break; case 2: - i.result.storage_target = InstructionStorageTarget::kColorTarget; - i.result.storage_index = 2; + i.scalar_result.storage_target = InstructionStorageTarget::kColorTarget; + i.scalar_result.storage_index = 2; break; case 3: - i.result.storage_target = InstructionStorageTarget::kColorTarget; - i.result.storage_index = 3; + i.scalar_result.storage_target = InstructionStorageTarget::kColorTarget; + i.scalar_result.storage_index = 3; break; case 32: - i.result.storage_target = InstructionStorageTarget::kExportAddress; + i.scalar_result.storage_target = + InstructionStorageTarget::kExportAddress; break; case 33: case 34: case 35: case 36: case 37: - i.result.storage_index = dest_num - 33; - i.result.storage_target = InstructionStorageTarget::kExportData; + i.scalar_result.storage_index = dest_num - 33; + i.scalar_result.storage_target = InstructionStorageTarget::kExportData; break; case 61: - i.result.storage_target = InstructionStorageTarget::kDepth; + i.scalar_result.storage_target = InstructionStorageTarget::kDepth; break; } } for (int j = 0; j < 4; ++j, write_mask >>= 1) { - i.result.write_mask[j] = (write_mask & 0x1) == 0x1; - i.result.components[j] = GetSwizzleFromComponentIndex(j); + i.scalar_result.write_mask[j] = (write_mask & 0x1) == 0x1; + i.scalar_result.components[j] = GetSwizzleFromComponentIndex(j); } - i.operand_count = opcode_info.argument_count; + i.scalar_operand_count = opcode_info.argument_count; if (opcode_info.argument_count == 1) { ParseAluInstructionOperand(op, 3, opcode_info.src_swizzle_component_count, - &i.operands[0]); + &i.scalar_operands[0]); } else { uint32_t src3_swizzle = op.src_swizzle(3); uint32_t swiz_a = ((src3_swizzle >> 6) + 3) & 0x3; @@ -1518,19 +1514,19 @@ void ShaderTranslator::ParseAluScalarInstruction( ParseAluInstructionOperandSpecial( op, InstructionStorageSource::kConstantFloat, op.src_reg(3), - op.src_negate(3), 0, swiz_a, &i.operands[0]); + op.src_negate(3), 0, swiz_a, &i.scalar_operands[0]); ParseAluInstructionOperandSpecial(op, InstructionStorageSource::kRegister, reg2, op.src_negate(3), const_slot, - swiz_b, &i.operands[1]); + swiz_b, &i.scalar_operands[1]); } // Track constant float register loads - in either case, a float constant may // be used in operand 0. - if (i.operands[0].storage_source == + if (i.scalar_operands[0].storage_source == InstructionStorageSource::kConstantFloat) { - auto register_index = i.operands[0].storage_index; - if (i.operands[0].storage_addressing_mode != + auto register_index = i.scalar_operands[0].storage_index; + if (i.scalar_operands[0].storage_addressing_mode != InstructionStorageAddressingMode::kStatic) { // Dynamic addressing makes all constants required. std::memset(constant_register_map_.float_bitmap, 0xFF, @@ -1540,8 +1536,6 @@ void ShaderTranslator::ParseAluScalarInstruction( 1ull << (register_index % 64); } } - - i.Disassemble(&ucode_disasm_buffer_); } } // namespace gpu diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index 48775ade9..292c96bed 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -208,12 +208,10 @@ class ShaderTranslator { ParsedTextureFetchInstruction* out_instr); void TranslateAluInstruction(const ucode::AluInstruction& op); - void ParseAluVectorInstruction(const ucode::AluInstruction& op, - const AluOpcodeInfo& opcode_info, - ParsedAluInstruction& instr); - void ParseAluScalarInstruction(const ucode::AluInstruction& op, - const AluOpcodeInfo& opcode_info, - ParsedAluInstruction& instr); + void ParseAluVectorOperation(const ucode::AluInstruction& op, + ParsedAluInstruction& instr); + void ParseAluScalarOperation(const ucode::AluInstruction& op, + ParsedAluInstruction& instr); // Input shader metadata and microcode. ShaderType shader_type_; diff --git a/src/xenia/gpu/shader_translator_disasm.cc b/src/xenia/gpu/shader_translator_disasm.cc index ab180ccf4..1de31efb1 100644 --- a/src/xenia/gpu/shader_translator_disasm.cc +++ b/src/xenia/gpu/shader_translator_disasm.cc @@ -454,29 +454,44 @@ void ParsedAluInstruction::Disassemble(StringBuffer* out) const { out->Append(" nop\n"); return; } - if (is_scalar_type() && is_paired) { - out->Append(" + "); - } else { + if (has_vector_op) { out->Append(" "); + if (is_predicated) { + out->Append(predicate_condition ? " (p0) " : "(!p0) "); + } else { + out->Append(" "); + } + out->Append(vector_opcode_name); + if (vector_result.is_clamped) { + out->Append("_sat"); + } + out->Append(' '); + DisassembleResultOperand(vector_result, out); + for (int i = 0; i < vector_operand_count; ++i) { + out->Append(", "); + DisassembleSourceOperand(vector_operands[i], out); + } + out->Append('\n'); } - if (is_predicated) { - out->Append(predicate_condition ? " (p0) " : "(!p0) "); - } else { - out->Append(" "); + if (has_scalar_op) { + out->Append(has_vector_op ? " + " : " "); + if (is_predicated) { + out->Append(predicate_condition ? " (p0) " : "(!p0) "); + } else { + out->Append(" "); + } + out->Append(scalar_opcode_name); + if (scalar_result.is_clamped) { + out->Append("_sat"); + } + out->Append(' '); + DisassembleResultOperand(scalar_result, out); + for (int i = 0; i < scalar_operand_count; ++i) { + out->Append(", "); + DisassembleSourceOperand(scalar_operands[i], out); + } + out->Append('\n'); } - out->Append(opcode_name); - if (result.is_clamped) { - out->Append("_sat"); - } - out->Append(' '); - - DisassembleResultOperand(result, out); - - for (int i = 0; i < operand_count; ++i) { - out->Append(", "); - DisassembleSourceOperand(operands[i], out); - } - out->Append('\n'); } } // namespace gpu diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index a539f0c81..2ed777ee5 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -2000,17 +2000,60 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( void SpirvShaderTranslator::ProcessAluInstruction( const ParsedAluInstruction& instr) { + if (instr.is_nop()) { + return; + } + auto& b = *builder_; - switch (instr.type) { - case ParsedAluInstruction::Type::kNop: - b.createNoResultOp(spv::Op::OpNop); - break; - case ParsedAluInstruction::Type::kVector: - ProcessVectorAluInstruction(instr); - break; - case ParsedAluInstruction::Type::kScalar: - ProcessScalarAluInstruction(instr); - break; + + // Close the open predicated block if this instr isn't predicated or the + // conditions do not match. + if (open_predicated_block_ && + (!instr.is_predicated || + instr.predicate_condition != predicated_block_cond_)) { + b.createBranch(predicated_block_end_); + b.setBuildPoint(predicated_block_end_); + open_predicated_block_ = false; + predicated_block_cond_ = false; + predicated_block_end_ = nullptr; + } + + if (!open_predicated_block_ && instr.is_predicated) { + Id pred_cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.predicate_condition)); + auto block = &b.makeNewBlock(); + open_predicated_block_ = true; + predicated_block_cond_ = instr.predicate_condition; + predicated_block_end_ = &b.makeNewBlock(); + + b.createSelectionMerge(predicated_block_end_, + spv::SelectionControlMaskNone); + b.createConditionalBranch(pred_cond, block, predicated_block_end_); + b.setBuildPoint(block); + } + + bool close_predicated_block_vector = false; + bool store_vector = + ProcessVectorAluOperation(instr, close_predicated_block_vector); + bool close_predicated_block_scalar = false; + bool store_scalar = + ProcessScalarAluOperation(instr, close_predicated_block_scalar); + + if (store_vector) { + StoreToResult(b.createLoad(pv_), instr.vector_result); + } + if (store_scalar) { + StoreToResult(b.createLoad(ps_), instr.scalar_result); + } + + if ((close_predicated_block_vector || close_predicated_block_scalar) && + open_predicated_block_) { + b.createBranch(predicated_block_end_); + b.setBuildPoint(predicated_block_end_); + open_predicated_block_ = false; + predicated_block_cond_ = false; + predicated_block_end_ = nullptr; } } @@ -2202,45 +2245,23 @@ spv::Function* SpirvShaderTranslator::CreateCubeFunction() { return function; } -void SpirvShaderTranslator::ProcessVectorAluInstruction( - const ParsedAluInstruction& instr) { +bool SpirvShaderTranslator::ProcessVectorAluOperation( + const ParsedAluInstruction& instr, bool& close_predicated_block) { + close_predicated_block = false; + + if (!instr.has_vector_op) { + return false; + } + auto& b = *builder_; - // Close the open predicated block if this instr isn't predicated or the - // conditions do not match. - if (open_predicated_block_ && - (!instr.is_predicated || - instr.predicate_condition != predicated_block_cond_)) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; - } - - if (!open_predicated_block_ && instr.is_predicated) { - Id pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); - auto block = &b.makeNewBlock(); - open_predicated_block_ = true; - predicated_block_cond_ = instr.predicate_condition; - predicated_block_end_ = &b.makeNewBlock(); - - b.createSelectionMerge(predicated_block_end_, - spv::SelectionControlMaskNone); - b.createConditionalBranch(pred_cond, block, predicated_block_end_); - b.setBuildPoint(block); - } - // TODO: If we have identical operands, reuse previous one. Id sources[3] = {0}; Id dest = vec4_float_zero_; - for (size_t i = 0; i < instr.operand_count; i++) { - sources[i] = LoadFromOperand(instr.operands[i]); + for (size_t i = 0; i < instr.vector_operand_count; i++) { + sources[i] = LoadFromOperand(instr.vector_operands[i]); } - bool close_predicated_block = false; switch (instr.vector_opcode) { case AluVectorOpcode::kAdd: { dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, sources[0], @@ -2603,58 +2624,30 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( assert_true(b.getTypeId(dest) == vec4_float_type_); if (dest) { b.createStore(dest, pv_); - StoreToResult(dest, instr.result); - } - - if (close_predicated_block && open_predicated_block_) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; + return true; } + return false; } -void SpirvShaderTranslator::ProcessScalarAluInstruction( - const ParsedAluInstruction& instr) { +bool SpirvShaderTranslator::ProcessScalarAluOperation( + const ParsedAluInstruction& instr, bool& close_predicated_block) { + close_predicated_block = false; + + if (!instr.has_scalar_op) { + return false; + } + auto& b = *builder_; - // Close the open predicated block if this instr isn't predicated or the - // conditions do not match. - if (open_predicated_block_ && - (!instr.is_predicated || - instr.predicate_condition != predicated_block_cond_)) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; - } - - if (!open_predicated_block_ && instr.is_predicated) { - Id pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); - auto block = &b.makeNewBlock(); - open_predicated_block_ = true; - predicated_block_cond_ = instr.predicate_condition; - predicated_block_end_ = &b.makeNewBlock(); - - b.createSelectionMerge(predicated_block_end_, - spv::SelectionControlMaskNone); - b.createConditionalBranch(pred_cond, block, predicated_block_end_); - b.setBuildPoint(block); - } - // TODO: If we have identical operands, reuse previous one. Id sources[3] = {0}; Id dest = b.makeFloatConstant(0); - for (size_t i = 0, x = 0; i < instr.operand_count; i++) { - auto src = LoadFromOperand(instr.operands[i]); + for (size_t i = 0, x = 0; i < instr.scalar_operand_count; i++) { + auto src = LoadFromOperand(instr.scalar_operands[i]); // Pull components out of the vector operands and use them as sources. - if (instr.operands[i].component_count > 1) { - for (int j = 0; j < instr.operands[i].component_count; j++) { + if (instr.scalar_operands[i].component_count > 1) { + for (int j = 0; j < instr.scalar_operands[i].component_count; j++) { sources[x++] = b.createCompositeExtract(src, float_type_, j); } } else { @@ -2662,7 +2655,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } } - bool close_predicated_block = false; switch (instr.scalar_opcode) { case AluScalarOpcode::kAdds: case AluScalarOpcode::kAddsc0: @@ -3073,16 +3065,9 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( assert_true(b.getTypeId(dest) == float_type_); if (dest) { b.createStore(dest, ps_); - StoreToResult(dest, instr.result); - } - - if (close_predicated_block && open_predicated_block_) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; + return true; } + return false; } Id SpirvShaderTranslator::CreateGlslStd450InstructionCall( diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 5a5d1de41..044dea019 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -88,8 +88,10 @@ class SpirvShaderTranslator : public ShaderTranslator { private: spv::Function* CreateCubeFunction(); - void ProcessVectorAluInstruction(const ParsedAluInstruction& instr); - void ProcessScalarAluInstruction(const ParsedAluInstruction& instr); + bool ProcessVectorAluOperation(const ParsedAluInstruction& instr, + bool& close_predicate_block); + bool ProcessScalarAluOperation(const ParsedAluInstruction& instr, + bool& close_predicate_block); spv::Id BitfieldExtract(spv::Id result_type, spv::Id base, bool is_signed, uint32_t offset, uint32_t count);