[GPU] Shader ALU refactoring + documentation

Mainly move instruction info from the ShaderTranslator to xe::gpu::ucode for future use in the CPU shader interpreter
This commit is contained in:
Triang3l 2022-04-27 20:52:20 +03:00
parent df9a37f798
commit b42680abf7
6 changed files with 229 additions and 193 deletions

View File

@ -28,7 +28,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
uint32_t used_result_components =
instr.vector_and_constant_result.GetUsedResultComponents();
if (!used_result_components &&
!AluVectorOpHasSideEffects(instr.vector_opcode)) {
!ucode::GetAluVectorOpcodeInfo(instr.vector_opcode).changed_state) {
return;
}

View File

@ -561,12 +561,12 @@ struct ParsedAluInstruction {
// instruction even if only constants are being exported. The XNA disassembler
// falls back to displaying the whole vector operation, even if only constant
// components are written, if the scalar operation is a nop or if the vector
// operation has side effects (but if the scalar operation isn't nop, it
// outputs the entire constant mask in the scalar operation destination).
// Normally the XNA disassembler outputs the constant mask in both vector and
// scalar operations, but that's not required by assembler, so it doesn't
// really matter whether it's specified in the vector operation, in the scalar
// operation, or in both.
// operation changes a0, p0 or kills pixels (but if the scalar operation isn't
// nop, it outputs the entire constant mask in the scalar operation
// destination). Normally the XNA disassembler outputs the constant mask in
// both vector and scalar operations, but that's not required by assembler, so
// it doesn't really matter whether it's specified in the vector operation, in
// the scalar operation, or in both.
InstructionResult vector_and_constant_result;
// Describes how the scalar operation result is stored.
InstructionResult scalar_result;
@ -591,8 +591,8 @@ struct ParsedAluInstruction {
// will result in the same microcode (since instructions with just an empty
// write mask may have different values in other fields).
// This is for disassembly! Translators should use the write masks and
// AluVectorOpHasSideEffects to skip operations, as this only covers one very
// specific nop format!
// the changed state bits in the opcode info to skip operations, as this only
// covers one very specific nop format!
bool IsVectorOpDefaultNop() const;
// Whether the scalar part of the instruction is the same as if it was omitted
// in the assembly (if compiled or assembled with the Xbox 360 shader

View File

@ -370,9 +370,12 @@ void Shader::GatherAluInstructionInformation(
ParseAluInstruction(op, type(), instr);
instr.Disassemble(&ucode_disasm_buffer);
kills_pixels_ = kills_pixels_ ||
ucode::AluVectorOpcodeIsKill(op.vector_opcode()) ||
ucode::AluScalarOpcodeIsKill(op.scalar_opcode());
kills_pixels_ =
kills_pixels_ ||
(ucode::GetAluVectorOpcodeInfo(op.vector_opcode()).changed_state &
ucode::kAluOpChangedStatePixelKill) ||
(ucode::GetAluScalarOpcodeInfo(op.scalar_opcode()).changed_state &
ucode::kAluOpChangedStatePixelKill);
GatherAluResultInformation(instr.vector_and_constant_result,
memexport_alloc_current_count);
@ -1055,99 +1058,6 @@ uint32_t ParsedTextureFetchInstruction::GetNonZeroResultComponents() const {
return result.GetUsedResultComponents() & components;
}
struct AluOpcodeInfo {
const char* name;
uint32_t argument_count;
uint32_t src_swizzle_component_count;
};
static const AluOpcodeInfo alu_vector_opcode_infos[0x20] = {
{"add", 2, 4}, // 0
{"mul", 2, 4}, // 1
{"max", 2, 4}, // 2
{"min", 2, 4}, // 3
{"seq", 2, 4}, // 4
{"sgt", 2, 4}, // 5
{"sge", 2, 4}, // 6
{"sne", 2, 4}, // 7
{"frc", 1, 4}, // 8
{"trunc", 1, 4}, // 9
{"floor", 1, 4}, // 10
{"mad", 3, 4}, // 11
{"cndeq", 3, 4}, // 12
{"cndge", 3, 4}, // 13
{"cndgt", 3, 4}, // 14
{"dp4", 2, 4}, // 15
{"dp3", 2, 4}, // 16
{"dp2add", 3, 4}, // 17
{"cube", 2, 4}, // 18
{"max4", 1, 4}, // 19
{"setp_eq_push", 2, 4}, // 20
{"setp_ne_push", 2, 4}, // 21
{"setp_gt_push", 2, 4}, // 22
{"setp_ge_push", 2, 4}, // 23
{"kill_eq", 2, 4}, // 24
{"kill_gt", 2, 4}, // 25
{"kill_ge", 2, 4}, // 26
{"kill_ne", 2, 4}, // 27
{"dst", 2, 4}, // 28
{"maxa", 2, 4}, // 29
};
static const AluOpcodeInfo alu_scalar_opcode_infos[0x40] = {
{"adds", 1, 2}, // 0
{"adds_prev", 1, 1}, // 1
{"muls", 1, 2}, // 2
{"muls_prev", 1, 1}, // 3
{"muls_prev2", 1, 2}, // 4
{"maxs", 1, 2}, // 5
{"mins", 1, 2}, // 6
{"seqs", 1, 1}, // 7
{"sgts", 1, 1}, // 8
{"sges", 1, 1}, // 9
{"snes", 1, 1}, // 10
{"frcs", 1, 1}, // 11
{"truncs", 1, 1}, // 12
{"floors", 1, 1}, // 13
{"exp", 1, 1}, // 14
{"logc", 1, 1}, // 15
{"log", 1, 1}, // 16
{"rcpc", 1, 1}, // 17
{"rcpf", 1, 1}, // 18
{"rcp", 1, 1}, // 19
{"rsqc", 1, 1}, // 20
{"rsqf", 1, 1}, // 21
{"rsq", 1, 1}, // 22
{"maxas", 1, 2}, // 23
{"maxasf", 1, 2}, // 24
{"subs", 1, 2}, // 25
{"subs_prev", 1, 1}, // 26
{"setp_eq", 1, 1}, // 27
{"setp_ne", 1, 1}, // 28
{"setp_gt", 1, 1}, // 29
{"setp_ge", 1, 1}, // 30
{"setp_inv", 1, 1}, // 31
{"setp_pop", 1, 1}, // 32
{"setp_clr", 0, 0}, // 33
{"setp_rstr", 1, 1}, // 34
{"kills_eq", 1, 1}, // 35
{"kills_gt", 1, 1}, // 36
{"kills_ge", 1, 1}, // 37
{"kills_ne", 1, 1}, // 38
{"kills_one", 1, 1}, // 39
{"sqrt", 1, 1}, // 40
{"UNKNOWN", 0, 0}, // 41
{"mulsc", 2, 1}, // 42
{"mulsc", 2, 1}, // 43
{"addsc", 2, 1}, // 44
{"addsc", 2, 1}, // 45
{"subsc", 2, 1}, // 46
{"subsc", 2, 1}, // 47
{"sin", 1, 1}, // 48
{"cos", 1, 1}, // 49
{"retain_prev", 0, 0}, // 50
};
static void ParseAluInstructionOperand(const AluInstruction& op, uint32_t i,
uint32_t swizzle_component_count,
InstructionOperand& out_op) {
@ -1290,9 +1200,10 @@ void ParseAluInstruction(const AluInstruction& op,
// Vector operation and constant 0/1 writes.
instr.vector_opcode = op.vector_opcode();
const auto& vector_opcode_info =
alu_vector_opcode_infos[uint32_t(instr.vector_opcode)];
ucode::AluVectorOpcode vector_opcode = op.vector_opcode();
instr.vector_opcode = vector_opcode;
const ucode::AluVectorOpcodeInfo& vector_opcode_info =
ucode::GetAluVectorOpcodeInfo(vector_opcode);
instr.vector_opcode_name = vector_opcode_info.name;
instr.vector_and_constant_result.storage_target = storage_target;
@ -1322,19 +1233,18 @@ void ParseAluInstruction(const AluInstruction& op,
instr.vector_and_constant_result.components[i] = component;
}
instr.vector_operand_count = vector_opcode_info.argument_count;
instr.vector_operand_count = vector_opcode_info.GetOperandCount();
for (uint32_t i = 0; i < instr.vector_operand_count; ++i) {
InstructionOperand& vector_operand = instr.vector_operands[i];
ParseAluInstructionOperand(op, i + 1,
vector_opcode_info.src_swizzle_component_count,
vector_operand);
ParseAluInstructionOperand(op, i + 1, 4, vector_operand);
}
// Scalar operation.
instr.scalar_opcode = op.scalar_opcode();
const auto& scalar_opcode_info =
alu_scalar_opcode_infos[uint32_t(instr.scalar_opcode)];
ucode::AluScalarOpcode scalar_opcode = op.scalar_opcode();
instr.scalar_opcode = scalar_opcode;
const ucode::AluScalarOpcodeInfo& scalar_opcode_info =
ucode::GetAluScalarOpcodeInfo(scalar_opcode);
instr.scalar_opcode_name = scalar_opcode_info.name;
instr.scalar_result.storage_target = storage_target;
@ -1355,12 +1265,12 @@ void ParseAluInstruction(const AluInstruction& op,
instr.scalar_result.components[i] = GetSwizzleFromComponentIndex(i);
}
instr.scalar_operand_count = scalar_opcode_info.argument_count;
instr.scalar_operand_count = scalar_opcode_info.operand_count;
if (instr.scalar_operand_count) {
if (instr.scalar_operand_count == 1) {
ParseAluInstructionOperand(op, 3,
scalar_opcode_info.src_swizzle_component_count,
instr.scalar_operands[0]);
ParseAluInstructionOperand(
op, 3, scalar_opcode_info.single_operand_is_two_component ? 2 : 1,
instr.scalar_operands[0]);
} else {
// Constant and temporary register.
@ -1393,7 +1303,7 @@ void ParseAluInstruction(const AluInstruction& op,
temp_op.is_negated = src3_negate;
temp_op.is_absolute_value = op.abs_constants();
temp_op.storage_source = InstructionStorageSource::kRegister;
temp_op.storage_index = op.scalar_const_op_src_temp_reg();
temp_op.storage_index = op.scalar_const_reg_op_src_temp_reg();
temp_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAbsolute;
temp_op.component_count = 1;
@ -1423,7 +1333,7 @@ bool ParsedAluInstruction::IsNop() const {
return scalar_opcode == ucode::AluScalarOpcode::kRetainPrev &&
!scalar_result.GetUsedWriteMask() &&
!vector_and_constant_result.GetUsedWriteMask() &&
!ucode::AluVectorOpHasSideEffects(vector_opcode);
!ucode::GetAluVectorOpcodeInfo(vector_opcode).changed_state;
}
uint32_t ParsedAluInstruction::GetMemExportStreamConstant() const {

View File

@ -2264,7 +2264,7 @@ bool SpirvShaderTranslator::ProcessVectorAluOperation(
close_predicated_block = false;
if (!instr.vector_and_constant_result.GetUsedWriteMask() &&
!AluVectorOpHasSideEffects(instr.vector_opcode)) {
!ucode::GetAluVectorOpcodeInfo(instr.vector_opcode).changed_state) {
return false;
}

120
src/xenia/gpu/ucode.cc Normal file
View File

@ -0,0 +1,120 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/ucode.h"
namespace xe {
namespace gpu {
namespace ucode {
const AluScalarOpcodeInfo kAluScalarOpcodeInfos[64] = {
{"adds", 1, true, kAluOpChangedStateNone},
{"adds_prev", 1, false, kAluOpChangedStateNone},
{"muls", 1, true, kAluOpChangedStateNone},
{"muls_prev", 1, false, kAluOpChangedStateNone},
{"muls_prev2", 1, true, kAluOpChangedStateNone},
{"maxs", 1, true, kAluOpChangedStateNone},
{"mins", 1, true, kAluOpChangedStateNone},
{"seqs", 1, false, kAluOpChangedStateNone},
{"sgts", 1, false, kAluOpChangedStateNone},
{"sges", 1, false, kAluOpChangedStateNone},
{"snes", 1, false, kAluOpChangedStateNone},
{"frcs", 1, false, kAluOpChangedStateNone},
{"truncs", 1, false, kAluOpChangedStateNone},
{"floors", 1, false, kAluOpChangedStateNone},
{"exp", 1, false, kAluOpChangedStateNone},
{"logc", 1, false, kAluOpChangedStateNone},
{"log", 1, false, kAluOpChangedStateNone},
{"rcpc", 1, false, kAluOpChangedStateNone},
{"rcpf", 1, false, kAluOpChangedStateNone},
{"rcp", 1, false, kAluOpChangedStateNone},
{"rsqc", 1, false, kAluOpChangedStateNone},
{"rsqf", 1, false, kAluOpChangedStateNone},
{"rsq", 1, false, kAluOpChangedStateNone},
{"maxas", 1, true, kAluOpChangedStateAddressRegister},
{"maxasf", 1, true, kAluOpChangedStateAddressRegister},
{"subs", 1, true, kAluOpChangedStateNone},
{"subs_prev", 1, false, kAluOpChangedStateNone},
{"setp_eq", 1, false, kAluOpChangedStatePredicate},
{"setp_ne", 1, false, kAluOpChangedStatePredicate},
{"setp_gt", 1, false, kAluOpChangedStatePredicate},
{"setp_ge", 1, false, kAluOpChangedStatePredicate},
{"setp_inv", 1, false, kAluOpChangedStatePredicate},
{"setp_pop", 1, false, kAluOpChangedStatePredicate},
{"setp_clr", 0, false, kAluOpChangedStatePredicate},
{"setp_rstr", 1, false, kAluOpChangedStatePredicate},
{"kills_eq", 1, false, kAluOpChangedStatePixelKill},
{"kills_gt", 1, false, kAluOpChangedStatePixelKill},
{"kills_ge", 1, false, kAluOpChangedStatePixelKill},
{"kills_ne", 1, false, kAluOpChangedStatePixelKill},
{"kills_one", 1, false, kAluOpChangedStatePixelKill},
{"sqrt", 1, false, kAluOpChangedStateNone},
{"opcode_41", 0, false, kAluOpChangedStateNone},
{"mulsc", 2, false, kAluOpChangedStateNone},
{"mulsc", 2, false, kAluOpChangedStateNone},
{"addsc", 2, false, kAluOpChangedStateNone},
{"addsc", 2, false, kAluOpChangedStateNone},
{"subsc", 2, false, kAluOpChangedStateNone},
{"subsc", 2, false, kAluOpChangedStateNone},
{"sin", 1, false, kAluOpChangedStateNone},
{"cos", 1, false, kAluOpChangedStateNone},
{"retain_prev", 0, false, kAluOpChangedStateNone},
{"opcode_51", 0, false, kAluOpChangedStateNone},
{"opcode_52", 0, false, kAluOpChangedStateNone},
{"opcode_53", 0, false, kAluOpChangedStateNone},
{"opcode_54", 0, false, kAluOpChangedStateNone},
{"opcode_55", 0, false, kAluOpChangedStateNone},
{"opcode_56", 0, false, kAluOpChangedStateNone},
{"opcode_57", 0, false, kAluOpChangedStateNone},
{"opcode_58", 0, false, kAluOpChangedStateNone},
{"opcode_59", 0, false, kAluOpChangedStateNone},
{"opcode_60", 0, false, kAluOpChangedStateNone},
{"opcode_61", 0, false, kAluOpChangedStateNone},
{"opcode_62", 0, false, kAluOpChangedStateNone},
{"opcode_63", 0, false, kAluOpChangedStateNone},
};
const AluVectorOpcodeInfo kAluVectorOpcodeInfos[32] = {
{"add", {0b1111, 0b1111}, kAluOpChangedStateNone},
{"mul", {0b1111, 0b1111}, kAluOpChangedStateNone},
{"max", {0b1111, 0b1111}, kAluOpChangedStateNone},
{"min", {0b1111, 0b1111}, kAluOpChangedStateNone},
{"seq", {0b1111, 0b1111}, kAluOpChangedStateNone},
{"sgt", {0b1111, 0b1111}, kAluOpChangedStateNone},
{"sge", {0b1111, 0b1111}, kAluOpChangedStateNone},
{"sne", {0b1111, 0b1111}, kAluOpChangedStateNone},
{"frc", {0b1111}, kAluOpChangedStateNone},
{"trunc", {0b1111}, kAluOpChangedStateNone},
{"floor", {0b1111}, kAluOpChangedStateNone},
{"mad", {0b1111, 0b1111, 0b1111}, kAluOpChangedStateNone},
{"cndeq", {0b1111, 0b1111, 0b1111}, kAluOpChangedStateNone},
{"cndge", {0b1111, 0b1111, 0b1111}, kAluOpChangedStateNone},
{"cndgt", {0b1111, 0b1111, 0b1111}, kAluOpChangedStateNone},
{"dp4", {0b1111, 0b1111}, kAluOpChangedStateNone},
{"dp3", {0b0111, 0b0111}, kAluOpChangedStateNone},
{"dp2add", {0b0011, 0b0011, 0b0001}, kAluOpChangedStateNone},
{"cube", {0b1111, 0b1111}, kAluOpChangedStateNone},
{"max4", {0b1111}, kAluOpChangedStateNone},
{"setp_eq_push", {0b1001, 0b1001}, kAluOpChangedStatePredicate},
{"setp_ne_push", {0b1001, 0b1001}, kAluOpChangedStatePredicate},
{"setp_gt_push", {0b1001, 0b1001}, kAluOpChangedStatePredicate},
{"setp_ge_push", {0b1001, 0b1001}, kAluOpChangedStatePredicate},
{"kill_eq", {0b1111, 0b1111}, kAluOpChangedStatePixelKill},
{"kill_gt", {0b1111, 0b1111}, kAluOpChangedStatePixelKill},
{"kill_ge", {0b1111, 0b1111}, kAluOpChangedStatePixelKill},
{"kill_ne", {0b1111, 0b1111}, kAluOpChangedStatePixelKill},
{"dst", {0b0110, 0b1010}, kAluOpChangedStateNone},
{"maxa", {0b1111, 0b1111}, kAluOpChangedStateAddressRegister},
{"opcode_30", {}, kAluOpChangedStateNone},
{"opcode_31", {}, kAluOpChangedStateNone},
};
} // namespace ucode
} // namespace gpu
} // namespace xe

View File

@ -13,6 +13,7 @@
#include <cstdint>
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/base/platform.h"
#include "xenia/gpu/xenos.h"
@ -900,8 +901,9 @@ static_assert_size(FetchInstruction, sizeof(uint32_t) * 3);
// Conventions:
// - All temporary registers are vec4s.
// - Most scalar ALU operations work with one or two components of the source
// register passed as the third operand of the whole co-issued ALU operation,
// denoted by `a` (the left-hand operand) and `b` (the right-hand operand).
// register or the float constant passed as the third operand of the whole
// co-issued ALU operation, denoted by `a` (the left-hand operand) and `b`
// (the right-hand operand).
// `a` is the [(3 + src3_swizzle[6:7]) & 3] component (W - alpha).
// `b` is the [(0 + src3_swizzle[0:1]) & 3] component (X - red).
// - mulsc, addsc, subsc scalar ALU operations accept two operands - a float
@ -948,6 +950,14 @@ static_assert_size(FetchInstruction, sizeof(uint32_t) * 3);
// use instructions that may be interpreted by the host GPU as fused
// multiply-add.
// For analysis of shaders and skipping instructions that write nothing.
enum AluOpChangedState {
kAluOpChangedStateNone = 0,
kAluOpChangedStateAddressRegister = 1 << 0,
kAluOpChangedStatePredicate = 1 << 1,
kAluOpChangedStatePixelKill = 1 << 2,
};
enum class AluScalarOpcode : uint32_t {
// Floating-Point Add
// adds/ADDs dest, src0.ab
@ -1277,17 +1287,28 @@ enum class AluScalarOpcode : uint32_t {
kRetainPrev = 50,
};
constexpr bool AluScalarOpcodeIsKill(AluScalarOpcode scalar_opcode) {
switch (scalar_opcode) {
case AluScalarOpcode::kKillsEq:
case AluScalarOpcode::kKillsGt:
case AluScalarOpcode::kKillsGe:
case AluScalarOpcode::kKillsNe:
case AluScalarOpcode::kKillsOne:
return true;
default:
return false;
}
struct AluScalarOpcodeInfo {
const char* name;
// 0 - no operands.
// 1 - one single-component (W) or two-component (WX) r# or c#.
// 2 - c#.w and r#.x.
uint32_t operand_count;
// If operand_count is 1, whether both W and X of the operand are used rather
// than only W.
bool single_operand_is_two_component;
// Note that all scalar instructions except for retain_prev modify the
// previous scalar register, so they must be executed even if they don't write
// any result and don't perform any other state changes.
AluOpChangedState changed_state;
};
// 6 scalar opcode bits - 64 entries.
extern const AluScalarOpcodeInfo kAluScalarOpcodeInfos[64];
inline const AluScalarOpcodeInfo& GetAluScalarOpcodeInfo(
AluScalarOpcode opcode) {
assert_true(uint32_t(opcode) < xe::countof(kAluScalarOpcodeInfos));
return kAluScalarOpcodeInfos[uint32_t(opcode)];
}
enum class AluVectorOpcode : uint32_t {
@ -1385,6 +1406,9 @@ enum class AluVectorOpcode : uint32_t {
// dest.y = src0.y * src1.y + src2.y;
// dest.z = src0.z * src1.z + src2.z;
// dest.w = src0.w * src1.w + src2.w;
// According to SQ_ALU::multiply_add (used in the isHardwareAccurate case)
// from IPR2015-00325 sq_alu, this is FMA - rounding to single-precision only
// after the addition.
kMad = 11,
// Per-Component Floating-Point Conditional Move If Equal
@ -1490,6 +1514,17 @@ enum class AluVectorOpcode : uint32_t {
// } else {
// dest.xyzw = src0.w;
// }
// However, the comparisons may be >= actually - the XNA documentation on
// MSDN, as well as R600 and GCN documentation, describe `max` as being
// implemented via >= rather than >. `max4` is documented vaguely, without the
// exact calculations for each component - MSDN describes it as max(xyzw), and
// in the R600 documentation it's max(wzyx). There's also a case more similar
// to `max4` where there also is a discrepancy between IPR2015-00325 sq_alu
// and the GCN documentation - `cube` has max3 in zyx priority order, and a >=
// comparison is used for this purpose on the GCN, but in IPR2015-00325 sq_alu
// it's implemented via >. It's possible that in an early version of the R400,
// the comparison was >, but was later changed to >=, but this is merely a
// guess.
kMax4 = 19,
// Floating-Point Predicate Counter Increment If Equal
@ -1627,60 +1662,32 @@ enum class AluVectorOpcode : uint32_t {
kMaxA = 29,
};
constexpr bool AluVectorOpcodeIsKill(AluVectorOpcode vector_opcode) {
switch (vector_opcode) {
case AluVectorOpcode::kKillEq:
case AluVectorOpcode::kKillGt:
case AluVectorOpcode::kKillGe:
case AluVectorOpcode::kKillNe:
return true;
default:
return false;
}
}
struct AluVectorOpcodeInfo {
const char* name;
uint32_t operand_components_used[3];
AluOpChangedState changed_state;
// Whether the vector instruction has side effects such as discarding a pixel or
// setting the predicate and can't be ignored even if it doesn't write to
// anywhere. Note that all scalar operations except for retain_prev have a side
// effect of modifying the previous scalar result register, so they must always
// be executed even if not writing.
constexpr bool AluVectorOpHasSideEffects(AluVectorOpcode vector_opcode) {
if (AluVectorOpcodeIsKill(vector_opcode)) {
return true;
uint32_t GetOperandCount() const {
if (!operand_components_used[2]) {
if (!operand_components_used[1]) {
if (!operand_components_used[0]) {
return 0;
}
return 1;
}
return 2;
}
return 3;
}
switch (vector_opcode) {
case AluVectorOpcode::kSetpEqPush:
case AluVectorOpcode::kSetpNePush:
case AluVectorOpcode::kSetpGtPush:
case AluVectorOpcode::kSetpGePush:
case AluVectorOpcode::kMaxA:
return true;
default:
return false;
}
}
};
// Whether each component of a source operand is used at all in the instruction
// (doesn't check the operand count though).
constexpr uint32_t GetAluVectorOpUsedSourceComponents(
AluVectorOpcode vector_opcode, uint32_t src_index) {
assert_not_zero(src_index);
switch (vector_opcode) {
case AluVectorOpcode::kDp3:
return 0b0111;
case AluVectorOpcode::kDp2Add:
return src_index == 3 ? 0b0001 : 0b0011;
case AluVectorOpcode::kSetpEqPush:
case AluVectorOpcode::kSetpNePush:
case AluVectorOpcode::kSetpGtPush:
case AluVectorOpcode::kSetpGePush:
return 0b1001;
case AluVectorOpcode::kDst:
return src_index == 2 ? 0b1010 : 0b0110;
default:
break;
}
return 0b1111;
// 5 vector opcode bits - 32 entries.
extern const AluVectorOpcodeInfo kAluVectorOpcodeInfos[32];
inline const AluVectorOpcodeInfo& GetAluVectorOpcodeInfo(
AluVectorOpcode opcode) {
assert_true(uint32_t(opcode) < xe::countof(kAluVectorOpcodeInfos));
return kAluVectorOpcodeInfos[uint32_t(opcode)];
}
// Whether each component of a source operand is needed for the instruction if
@ -1688,7 +1695,7 @@ constexpr uint32_t GetAluVectorOpUsedSourceComponents(
// undefined in translation. For per-component operations, for example, only the
// components specified in the write mask are needed, but there are instructions
// with special behavior for certain components.
constexpr uint32_t GetAluVectorOpNeededSourceComponents(
inline uint32_t GetAluVectorOpNeededSourceComponents(
AluVectorOpcode vector_opcode, uint32_t src_index,
uint32_t used_result_components) {
assert_not_zero(src_index);
@ -1721,8 +1728,8 @@ constexpr uint32_t GetAluVectorOpNeededSourceComponents(
case AluVectorOpcode::kKillNe:
components = 0b1111;
break;
// kDst is per-component, but not all components are used -
// GetAluVectorOpUsedSourceComponents will filter out the unused ones.
// kDst is per-component, but not all components are used.
// operand_components_used will filter out the unused ones.
case AluVectorOpcode::kMaxA:
if (src_index == 1) {
components |= 0b1000;
@ -1731,8 +1738,8 @@ constexpr uint32_t GetAluVectorOpNeededSourceComponents(
default:
break;
}
return components &
GetAluVectorOpUsedSourceComponents(vector_opcode, src_index);
return components & GetAluVectorOpcodeInfo(vector_opcode)
.operand_components_used[src_index - 1];
}
enum class ExportRegister : uint32_t {
@ -1787,7 +1794,6 @@ struct alignas(uint32_t) AluInstruction {
// Whether data is being exported (or written to local registers).
bool is_export() const { return data_.export_data == 1; }
bool export_write_mask() const { return data_.scalar_dest_rel == 1; }
// Whether the jump is predicated (or conditional).
bool is_predicated() const { return data_.is_predicated; }
@ -1921,7 +1927,7 @@ struct alignas(uint32_t) AluInstruction {
}
}
uint32_t scalar_const_op_src_temp_reg() const {
uint32_t scalar_const_reg_op_src_temp_reg() const {
return (uint32_t(data_.scalar_opc) & 1) | (data_.src3_sel << 1) |
(data_.src3_swiz & 0x3C);
}