[GPU] Shader ALU refactoring + documentation
Mainly move instruction info from the ShaderTranslator to xe::gpu::ucode for future use in the CPU shader interpreter
This commit is contained in:
parent
df9a37f798
commit
b42680abf7
|
@ -28,7 +28,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
|
|||
uint32_t used_result_components =
|
||||
instr.vector_and_constant_result.GetUsedResultComponents();
|
||||
if (!used_result_components &&
|
||||
!AluVectorOpHasSideEffects(instr.vector_opcode)) {
|
||||
!ucode::GetAluVectorOpcodeInfo(instr.vector_opcode).changed_state) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -561,12 +561,12 @@ struct ParsedAluInstruction {
|
|||
// instruction even if only constants are being exported. The XNA disassembler
|
||||
// falls back to displaying the whole vector operation, even if only constant
|
||||
// components are written, if the scalar operation is a nop or if the vector
|
||||
// operation has side effects (but if the scalar operation isn't nop, it
|
||||
// outputs the entire constant mask in the scalar operation destination).
|
||||
// Normally the XNA disassembler outputs the constant mask in both vector and
|
||||
// scalar operations, but that's not required by assembler, so it doesn't
|
||||
// really matter whether it's specified in the vector operation, in the scalar
|
||||
// operation, or in both.
|
||||
// operation changes a0, p0 or kills pixels (but if the scalar operation isn't
|
||||
// nop, it outputs the entire constant mask in the scalar operation
|
||||
// destination). Normally the XNA disassembler outputs the constant mask in
|
||||
// both vector and scalar operations, but that's not required by assembler, so
|
||||
// it doesn't really matter whether it's specified in the vector operation, in
|
||||
// the scalar operation, or in both.
|
||||
InstructionResult vector_and_constant_result;
|
||||
// Describes how the scalar operation result is stored.
|
||||
InstructionResult scalar_result;
|
||||
|
@ -591,8 +591,8 @@ struct ParsedAluInstruction {
|
|||
// will result in the same microcode (since instructions with just an empty
|
||||
// write mask may have different values in other fields).
|
||||
// This is for disassembly! Translators should use the write masks and
|
||||
// AluVectorOpHasSideEffects to skip operations, as this only covers one very
|
||||
// specific nop format!
|
||||
// the changed state bits in the opcode info to skip operations, as this only
|
||||
// covers one very specific nop format!
|
||||
bool IsVectorOpDefaultNop() const;
|
||||
// Whether the scalar part of the instruction is the same as if it was omitted
|
||||
// in the assembly (if compiled or assembled with the Xbox 360 shader
|
||||
|
|
|
@ -370,9 +370,12 @@ void Shader::GatherAluInstructionInformation(
|
|||
ParseAluInstruction(op, type(), instr);
|
||||
instr.Disassemble(&ucode_disasm_buffer);
|
||||
|
||||
kills_pixels_ = kills_pixels_ ||
|
||||
ucode::AluVectorOpcodeIsKill(op.vector_opcode()) ||
|
||||
ucode::AluScalarOpcodeIsKill(op.scalar_opcode());
|
||||
kills_pixels_ =
|
||||
kills_pixels_ ||
|
||||
(ucode::GetAluVectorOpcodeInfo(op.vector_opcode()).changed_state &
|
||||
ucode::kAluOpChangedStatePixelKill) ||
|
||||
(ucode::GetAluScalarOpcodeInfo(op.scalar_opcode()).changed_state &
|
||||
ucode::kAluOpChangedStatePixelKill);
|
||||
|
||||
GatherAluResultInformation(instr.vector_and_constant_result,
|
||||
memexport_alloc_current_count);
|
||||
|
@ -1055,99 +1058,6 @@ uint32_t ParsedTextureFetchInstruction::GetNonZeroResultComponents() const {
|
|||
return result.GetUsedResultComponents() & components;
|
||||
}
|
||||
|
||||
struct AluOpcodeInfo {
|
||||
const char* name;
|
||||
uint32_t argument_count;
|
||||
uint32_t src_swizzle_component_count;
|
||||
};
|
||||
|
||||
static const AluOpcodeInfo alu_vector_opcode_infos[0x20] = {
|
||||
{"add", 2, 4}, // 0
|
||||
{"mul", 2, 4}, // 1
|
||||
{"max", 2, 4}, // 2
|
||||
{"min", 2, 4}, // 3
|
||||
{"seq", 2, 4}, // 4
|
||||
{"sgt", 2, 4}, // 5
|
||||
{"sge", 2, 4}, // 6
|
||||
{"sne", 2, 4}, // 7
|
||||
{"frc", 1, 4}, // 8
|
||||
{"trunc", 1, 4}, // 9
|
||||
{"floor", 1, 4}, // 10
|
||||
{"mad", 3, 4}, // 11
|
||||
{"cndeq", 3, 4}, // 12
|
||||
{"cndge", 3, 4}, // 13
|
||||
{"cndgt", 3, 4}, // 14
|
||||
{"dp4", 2, 4}, // 15
|
||||
{"dp3", 2, 4}, // 16
|
||||
{"dp2add", 3, 4}, // 17
|
||||
{"cube", 2, 4}, // 18
|
||||
{"max4", 1, 4}, // 19
|
||||
{"setp_eq_push", 2, 4}, // 20
|
||||
{"setp_ne_push", 2, 4}, // 21
|
||||
{"setp_gt_push", 2, 4}, // 22
|
||||
{"setp_ge_push", 2, 4}, // 23
|
||||
{"kill_eq", 2, 4}, // 24
|
||||
{"kill_gt", 2, 4}, // 25
|
||||
{"kill_ge", 2, 4}, // 26
|
||||
{"kill_ne", 2, 4}, // 27
|
||||
{"dst", 2, 4}, // 28
|
||||
{"maxa", 2, 4}, // 29
|
||||
};
|
||||
|
||||
static const AluOpcodeInfo alu_scalar_opcode_infos[0x40] = {
|
||||
{"adds", 1, 2}, // 0
|
||||
{"adds_prev", 1, 1}, // 1
|
||||
{"muls", 1, 2}, // 2
|
||||
{"muls_prev", 1, 1}, // 3
|
||||
{"muls_prev2", 1, 2}, // 4
|
||||
{"maxs", 1, 2}, // 5
|
||||
{"mins", 1, 2}, // 6
|
||||
{"seqs", 1, 1}, // 7
|
||||
{"sgts", 1, 1}, // 8
|
||||
{"sges", 1, 1}, // 9
|
||||
{"snes", 1, 1}, // 10
|
||||
{"frcs", 1, 1}, // 11
|
||||
{"truncs", 1, 1}, // 12
|
||||
{"floors", 1, 1}, // 13
|
||||
{"exp", 1, 1}, // 14
|
||||
{"logc", 1, 1}, // 15
|
||||
{"log", 1, 1}, // 16
|
||||
{"rcpc", 1, 1}, // 17
|
||||
{"rcpf", 1, 1}, // 18
|
||||
{"rcp", 1, 1}, // 19
|
||||
{"rsqc", 1, 1}, // 20
|
||||
{"rsqf", 1, 1}, // 21
|
||||
{"rsq", 1, 1}, // 22
|
||||
{"maxas", 1, 2}, // 23
|
||||
{"maxasf", 1, 2}, // 24
|
||||
{"subs", 1, 2}, // 25
|
||||
{"subs_prev", 1, 1}, // 26
|
||||
{"setp_eq", 1, 1}, // 27
|
||||
{"setp_ne", 1, 1}, // 28
|
||||
{"setp_gt", 1, 1}, // 29
|
||||
{"setp_ge", 1, 1}, // 30
|
||||
{"setp_inv", 1, 1}, // 31
|
||||
{"setp_pop", 1, 1}, // 32
|
||||
{"setp_clr", 0, 0}, // 33
|
||||
{"setp_rstr", 1, 1}, // 34
|
||||
{"kills_eq", 1, 1}, // 35
|
||||
{"kills_gt", 1, 1}, // 36
|
||||
{"kills_ge", 1, 1}, // 37
|
||||
{"kills_ne", 1, 1}, // 38
|
||||
{"kills_one", 1, 1}, // 39
|
||||
{"sqrt", 1, 1}, // 40
|
||||
{"UNKNOWN", 0, 0}, // 41
|
||||
{"mulsc", 2, 1}, // 42
|
||||
{"mulsc", 2, 1}, // 43
|
||||
{"addsc", 2, 1}, // 44
|
||||
{"addsc", 2, 1}, // 45
|
||||
{"subsc", 2, 1}, // 46
|
||||
{"subsc", 2, 1}, // 47
|
||||
{"sin", 1, 1}, // 48
|
||||
{"cos", 1, 1}, // 49
|
||||
{"retain_prev", 0, 0}, // 50
|
||||
};
|
||||
|
||||
static void ParseAluInstructionOperand(const AluInstruction& op, uint32_t i,
|
||||
uint32_t swizzle_component_count,
|
||||
InstructionOperand& out_op) {
|
||||
|
@ -1290,9 +1200,10 @@ void ParseAluInstruction(const AluInstruction& op,
|
|||
|
||||
// Vector operation and constant 0/1 writes.
|
||||
|
||||
instr.vector_opcode = op.vector_opcode();
|
||||
const auto& vector_opcode_info =
|
||||
alu_vector_opcode_infos[uint32_t(instr.vector_opcode)];
|
||||
ucode::AluVectorOpcode vector_opcode = op.vector_opcode();
|
||||
instr.vector_opcode = vector_opcode;
|
||||
const ucode::AluVectorOpcodeInfo& vector_opcode_info =
|
||||
ucode::GetAluVectorOpcodeInfo(vector_opcode);
|
||||
instr.vector_opcode_name = vector_opcode_info.name;
|
||||
|
||||
instr.vector_and_constant_result.storage_target = storage_target;
|
||||
|
@ -1322,19 +1233,18 @@ void ParseAluInstruction(const AluInstruction& op,
|
|||
instr.vector_and_constant_result.components[i] = component;
|
||||
}
|
||||
|
||||
instr.vector_operand_count = vector_opcode_info.argument_count;
|
||||
instr.vector_operand_count = vector_opcode_info.GetOperandCount();
|
||||
for (uint32_t i = 0; i < instr.vector_operand_count; ++i) {
|
||||
InstructionOperand& vector_operand = instr.vector_operands[i];
|
||||
ParseAluInstructionOperand(op, i + 1,
|
||||
vector_opcode_info.src_swizzle_component_count,
|
||||
vector_operand);
|
||||
ParseAluInstructionOperand(op, i + 1, 4, vector_operand);
|
||||
}
|
||||
|
||||
// Scalar operation.
|
||||
|
||||
instr.scalar_opcode = op.scalar_opcode();
|
||||
const auto& scalar_opcode_info =
|
||||
alu_scalar_opcode_infos[uint32_t(instr.scalar_opcode)];
|
||||
ucode::AluScalarOpcode scalar_opcode = op.scalar_opcode();
|
||||
instr.scalar_opcode = scalar_opcode;
|
||||
const ucode::AluScalarOpcodeInfo& scalar_opcode_info =
|
||||
ucode::GetAluScalarOpcodeInfo(scalar_opcode);
|
||||
instr.scalar_opcode_name = scalar_opcode_info.name;
|
||||
|
||||
instr.scalar_result.storage_target = storage_target;
|
||||
|
@ -1355,11 +1265,11 @@ void ParseAluInstruction(const AluInstruction& op,
|
|||
instr.scalar_result.components[i] = GetSwizzleFromComponentIndex(i);
|
||||
}
|
||||
|
||||
instr.scalar_operand_count = scalar_opcode_info.argument_count;
|
||||
instr.scalar_operand_count = scalar_opcode_info.operand_count;
|
||||
if (instr.scalar_operand_count) {
|
||||
if (instr.scalar_operand_count == 1) {
|
||||
ParseAluInstructionOperand(op, 3,
|
||||
scalar_opcode_info.src_swizzle_component_count,
|
||||
ParseAluInstructionOperand(
|
||||
op, 3, scalar_opcode_info.single_operand_is_two_component ? 2 : 1,
|
||||
instr.scalar_operands[0]);
|
||||
} else {
|
||||
// Constant and temporary register.
|
||||
|
@ -1393,7 +1303,7 @@ void ParseAluInstruction(const AluInstruction& op,
|
|||
temp_op.is_negated = src3_negate;
|
||||
temp_op.is_absolute_value = op.abs_constants();
|
||||
temp_op.storage_source = InstructionStorageSource::kRegister;
|
||||
temp_op.storage_index = op.scalar_const_op_src_temp_reg();
|
||||
temp_op.storage_index = op.scalar_const_reg_op_src_temp_reg();
|
||||
temp_op.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kAbsolute;
|
||||
temp_op.component_count = 1;
|
||||
|
@ -1423,7 +1333,7 @@ bool ParsedAluInstruction::IsNop() const {
|
|||
return scalar_opcode == ucode::AluScalarOpcode::kRetainPrev &&
|
||||
!scalar_result.GetUsedWriteMask() &&
|
||||
!vector_and_constant_result.GetUsedWriteMask() &&
|
||||
!ucode::AluVectorOpHasSideEffects(vector_opcode);
|
||||
!ucode::GetAluVectorOpcodeInfo(vector_opcode).changed_state;
|
||||
}
|
||||
|
||||
uint32_t ParsedAluInstruction::GetMemExportStreamConstant() const {
|
||||
|
|
|
@ -2264,7 +2264,7 @@ bool SpirvShaderTranslator::ProcessVectorAluOperation(
|
|||
close_predicated_block = false;
|
||||
|
||||
if (!instr.vector_and_constant_result.GetUsedWriteMask() &&
|
||||
!AluVectorOpHasSideEffects(instr.vector_opcode)) {
|
||||
!ucode::GetAluVectorOpcodeInfo(instr.vector_opcode).changed_state) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,120 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/ucode.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace ucode {
|
||||
|
||||
const AluScalarOpcodeInfo kAluScalarOpcodeInfos[64] = {
|
||||
{"adds", 1, true, kAluOpChangedStateNone},
|
||||
{"adds_prev", 1, false, kAluOpChangedStateNone},
|
||||
{"muls", 1, true, kAluOpChangedStateNone},
|
||||
{"muls_prev", 1, false, kAluOpChangedStateNone},
|
||||
{"muls_prev2", 1, true, kAluOpChangedStateNone},
|
||||
{"maxs", 1, true, kAluOpChangedStateNone},
|
||||
{"mins", 1, true, kAluOpChangedStateNone},
|
||||
{"seqs", 1, false, kAluOpChangedStateNone},
|
||||
{"sgts", 1, false, kAluOpChangedStateNone},
|
||||
{"sges", 1, false, kAluOpChangedStateNone},
|
||||
{"snes", 1, false, kAluOpChangedStateNone},
|
||||
{"frcs", 1, false, kAluOpChangedStateNone},
|
||||
{"truncs", 1, false, kAluOpChangedStateNone},
|
||||
{"floors", 1, false, kAluOpChangedStateNone},
|
||||
{"exp", 1, false, kAluOpChangedStateNone},
|
||||
{"logc", 1, false, kAluOpChangedStateNone},
|
||||
{"log", 1, false, kAluOpChangedStateNone},
|
||||
{"rcpc", 1, false, kAluOpChangedStateNone},
|
||||
{"rcpf", 1, false, kAluOpChangedStateNone},
|
||||
{"rcp", 1, false, kAluOpChangedStateNone},
|
||||
{"rsqc", 1, false, kAluOpChangedStateNone},
|
||||
{"rsqf", 1, false, kAluOpChangedStateNone},
|
||||
{"rsq", 1, false, kAluOpChangedStateNone},
|
||||
{"maxas", 1, true, kAluOpChangedStateAddressRegister},
|
||||
{"maxasf", 1, true, kAluOpChangedStateAddressRegister},
|
||||
{"subs", 1, true, kAluOpChangedStateNone},
|
||||
{"subs_prev", 1, false, kAluOpChangedStateNone},
|
||||
{"setp_eq", 1, false, kAluOpChangedStatePredicate},
|
||||
{"setp_ne", 1, false, kAluOpChangedStatePredicate},
|
||||
{"setp_gt", 1, false, kAluOpChangedStatePredicate},
|
||||
{"setp_ge", 1, false, kAluOpChangedStatePredicate},
|
||||
{"setp_inv", 1, false, kAluOpChangedStatePredicate},
|
||||
{"setp_pop", 1, false, kAluOpChangedStatePredicate},
|
||||
{"setp_clr", 0, false, kAluOpChangedStatePredicate},
|
||||
{"setp_rstr", 1, false, kAluOpChangedStatePredicate},
|
||||
{"kills_eq", 1, false, kAluOpChangedStatePixelKill},
|
||||
{"kills_gt", 1, false, kAluOpChangedStatePixelKill},
|
||||
{"kills_ge", 1, false, kAluOpChangedStatePixelKill},
|
||||
{"kills_ne", 1, false, kAluOpChangedStatePixelKill},
|
||||
{"kills_one", 1, false, kAluOpChangedStatePixelKill},
|
||||
{"sqrt", 1, false, kAluOpChangedStateNone},
|
||||
{"opcode_41", 0, false, kAluOpChangedStateNone},
|
||||
{"mulsc", 2, false, kAluOpChangedStateNone},
|
||||
{"mulsc", 2, false, kAluOpChangedStateNone},
|
||||
{"addsc", 2, false, kAluOpChangedStateNone},
|
||||
{"addsc", 2, false, kAluOpChangedStateNone},
|
||||
{"subsc", 2, false, kAluOpChangedStateNone},
|
||||
{"subsc", 2, false, kAluOpChangedStateNone},
|
||||
{"sin", 1, false, kAluOpChangedStateNone},
|
||||
{"cos", 1, false, kAluOpChangedStateNone},
|
||||
{"retain_prev", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_51", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_52", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_53", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_54", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_55", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_56", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_57", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_58", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_59", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_60", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_61", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_62", 0, false, kAluOpChangedStateNone},
|
||||
{"opcode_63", 0, false, kAluOpChangedStateNone},
|
||||
};
|
||||
|
||||
const AluVectorOpcodeInfo kAluVectorOpcodeInfos[32] = {
|
||||
{"add", {0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"mul", {0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"max", {0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"min", {0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"seq", {0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"sgt", {0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"sge", {0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"sne", {0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"frc", {0b1111}, kAluOpChangedStateNone},
|
||||
{"trunc", {0b1111}, kAluOpChangedStateNone},
|
||||
{"floor", {0b1111}, kAluOpChangedStateNone},
|
||||
{"mad", {0b1111, 0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"cndeq", {0b1111, 0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"cndge", {0b1111, 0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"cndgt", {0b1111, 0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"dp4", {0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"dp3", {0b0111, 0b0111}, kAluOpChangedStateNone},
|
||||
{"dp2add", {0b0011, 0b0011, 0b0001}, kAluOpChangedStateNone},
|
||||
{"cube", {0b1111, 0b1111}, kAluOpChangedStateNone},
|
||||
{"max4", {0b1111}, kAluOpChangedStateNone},
|
||||
{"setp_eq_push", {0b1001, 0b1001}, kAluOpChangedStatePredicate},
|
||||
{"setp_ne_push", {0b1001, 0b1001}, kAluOpChangedStatePredicate},
|
||||
{"setp_gt_push", {0b1001, 0b1001}, kAluOpChangedStatePredicate},
|
||||
{"setp_ge_push", {0b1001, 0b1001}, kAluOpChangedStatePredicate},
|
||||
{"kill_eq", {0b1111, 0b1111}, kAluOpChangedStatePixelKill},
|
||||
{"kill_gt", {0b1111, 0b1111}, kAluOpChangedStatePixelKill},
|
||||
{"kill_ge", {0b1111, 0b1111}, kAluOpChangedStatePixelKill},
|
||||
{"kill_ne", {0b1111, 0b1111}, kAluOpChangedStatePixelKill},
|
||||
{"dst", {0b0110, 0b1010}, kAluOpChangedStateNone},
|
||||
{"maxa", {0b1111, 0b1111}, kAluOpChangedStateAddressRegister},
|
||||
{"opcode_30", {}, kAluOpChangedStateNone},
|
||||
{"opcode_31", {}, kAluOpChangedStateNone},
|
||||
};
|
||||
|
||||
} // namespace ucode
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -13,6 +13,7 @@
|
|||
#include <cstdint>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
|
@ -900,8 +901,9 @@ static_assert_size(FetchInstruction, sizeof(uint32_t) * 3);
|
|||
// Conventions:
|
||||
// - All temporary registers are vec4s.
|
||||
// - Most scalar ALU operations work with one or two components of the source
|
||||
// register passed as the third operand of the whole co-issued ALU operation,
|
||||
// denoted by `a` (the left-hand operand) and `b` (the right-hand operand).
|
||||
// register or the float constant passed as the third operand of the whole
|
||||
// co-issued ALU operation, denoted by `a` (the left-hand operand) and `b`
|
||||
// (the right-hand operand).
|
||||
// `a` is the [(3 + src3_swizzle[6:7]) & 3] component (W - alpha).
|
||||
// `b` is the [(0 + src3_swizzle[0:1]) & 3] component (X - red).
|
||||
// - mulsc, addsc, subsc scalar ALU operations accept two operands - a float
|
||||
|
@ -948,6 +950,14 @@ static_assert_size(FetchInstruction, sizeof(uint32_t) * 3);
|
|||
// use instructions that may be interpreted by the host GPU as fused
|
||||
// multiply-add.
|
||||
|
||||
// For analysis of shaders and skipping instructions that write nothing.
|
||||
enum AluOpChangedState {
|
||||
kAluOpChangedStateNone = 0,
|
||||
kAluOpChangedStateAddressRegister = 1 << 0,
|
||||
kAluOpChangedStatePredicate = 1 << 1,
|
||||
kAluOpChangedStatePixelKill = 1 << 2,
|
||||
};
|
||||
|
||||
enum class AluScalarOpcode : uint32_t {
|
||||
// Floating-Point Add
|
||||
// adds/ADDs dest, src0.ab
|
||||
|
@ -1277,17 +1287,28 @@ enum class AluScalarOpcode : uint32_t {
|
|||
kRetainPrev = 50,
|
||||
};
|
||||
|
||||
constexpr bool AluScalarOpcodeIsKill(AluScalarOpcode scalar_opcode) {
|
||||
switch (scalar_opcode) {
|
||||
case AluScalarOpcode::kKillsEq:
|
||||
case AluScalarOpcode::kKillsGt:
|
||||
case AluScalarOpcode::kKillsGe:
|
||||
case AluScalarOpcode::kKillsNe:
|
||||
case AluScalarOpcode::kKillsOne:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
struct AluScalarOpcodeInfo {
|
||||
const char* name;
|
||||
// 0 - no operands.
|
||||
// 1 - one single-component (W) or two-component (WX) r# or c#.
|
||||
// 2 - c#.w and r#.x.
|
||||
uint32_t operand_count;
|
||||
// If operand_count is 1, whether both W and X of the operand are used rather
|
||||
// than only W.
|
||||
bool single_operand_is_two_component;
|
||||
// Note that all scalar instructions except for retain_prev modify the
|
||||
// previous scalar register, so they must be executed even if they don't write
|
||||
// any result and don't perform any other state changes.
|
||||
AluOpChangedState changed_state;
|
||||
};
|
||||
|
||||
// 6 scalar opcode bits - 64 entries.
|
||||
extern const AluScalarOpcodeInfo kAluScalarOpcodeInfos[64];
|
||||
|
||||
inline const AluScalarOpcodeInfo& GetAluScalarOpcodeInfo(
|
||||
AluScalarOpcode opcode) {
|
||||
assert_true(uint32_t(opcode) < xe::countof(kAluScalarOpcodeInfos));
|
||||
return kAluScalarOpcodeInfos[uint32_t(opcode)];
|
||||
}
|
||||
|
||||
enum class AluVectorOpcode : uint32_t {
|
||||
|
@ -1385,6 +1406,9 @@ enum class AluVectorOpcode : uint32_t {
|
|||
// dest.y = src0.y * src1.y + src2.y;
|
||||
// dest.z = src0.z * src1.z + src2.z;
|
||||
// dest.w = src0.w * src1.w + src2.w;
|
||||
// According to SQ_ALU::multiply_add (used in the isHardwareAccurate case)
|
||||
// from IPR2015-00325 sq_alu, this is FMA - rounding to single-precision only
|
||||
// after the addition.
|
||||
kMad = 11,
|
||||
|
||||
// Per-Component Floating-Point Conditional Move If Equal
|
||||
|
@ -1490,6 +1514,17 @@ enum class AluVectorOpcode : uint32_t {
|
|||
// } else {
|
||||
// dest.xyzw = src0.w;
|
||||
// }
|
||||
// However, the comparisons may be >= actually - the XNA documentation on
|
||||
// MSDN, as well as R600 and GCN documentation, describe `max` as being
|
||||
// implemented via >= rather than >. `max4` is documented vaguely, without the
|
||||
// exact calculations for each component - MSDN describes it as max(xyzw), and
|
||||
// in the R600 documentation it's max(wzyx). There's also a case more similar
|
||||
// to `max4` where there also is a discrepancy between IPR2015-00325 sq_alu
|
||||
// and the GCN documentation - `cube` has max3 in zyx priority order, and a >=
|
||||
// comparison is used for this purpose on the GCN, but in IPR2015-00325 sq_alu
|
||||
// it's implemented via >. It's possible that in an early version of the R400,
|
||||
// the comparison was >, but was later changed to >=, but this is merely a
|
||||
// guess.
|
||||
kMax4 = 19,
|
||||
|
||||
// Floating-Point Predicate Counter Increment If Equal
|
||||
|
@ -1627,60 +1662,32 @@ enum class AluVectorOpcode : uint32_t {
|
|||
kMaxA = 29,
|
||||
};
|
||||
|
||||
constexpr bool AluVectorOpcodeIsKill(AluVectorOpcode vector_opcode) {
|
||||
switch (vector_opcode) {
|
||||
case AluVectorOpcode::kKillEq:
|
||||
case AluVectorOpcode::kKillGt:
|
||||
case AluVectorOpcode::kKillGe:
|
||||
case AluVectorOpcode::kKillNe:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
struct AluVectorOpcodeInfo {
|
||||
const char* name;
|
||||
uint32_t operand_components_used[3];
|
||||
AluOpChangedState changed_state;
|
||||
|
||||
// Whether the vector instruction has side effects such as discarding a pixel or
|
||||
// setting the predicate and can't be ignored even if it doesn't write to
|
||||
// anywhere. Note that all scalar operations except for retain_prev have a side
|
||||
// effect of modifying the previous scalar result register, so they must always
|
||||
// be executed even if not writing.
|
||||
constexpr bool AluVectorOpHasSideEffects(AluVectorOpcode vector_opcode) {
|
||||
if (AluVectorOpcodeIsKill(vector_opcode)) {
|
||||
return true;
|
||||
uint32_t GetOperandCount() const {
|
||||
if (!operand_components_used[2]) {
|
||||
if (!operand_components_used[1]) {
|
||||
if (!operand_components_used[0]) {
|
||||
return 0;
|
||||
}
|
||||
switch (vector_opcode) {
|
||||
case AluVectorOpcode::kSetpEqPush:
|
||||
case AluVectorOpcode::kSetpNePush:
|
||||
case AluVectorOpcode::kSetpGtPush:
|
||||
case AluVectorOpcode::kSetpGePush:
|
||||
case AluVectorOpcode::kMaxA:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
};
|
||||
|
||||
// Whether each component of a source operand is used at all in the instruction
|
||||
// (doesn't check the operand count though).
|
||||
constexpr uint32_t GetAluVectorOpUsedSourceComponents(
|
||||
AluVectorOpcode vector_opcode, uint32_t src_index) {
|
||||
assert_not_zero(src_index);
|
||||
switch (vector_opcode) {
|
||||
case AluVectorOpcode::kDp3:
|
||||
return 0b0111;
|
||||
case AluVectorOpcode::kDp2Add:
|
||||
return src_index == 3 ? 0b0001 : 0b0011;
|
||||
case AluVectorOpcode::kSetpEqPush:
|
||||
case AluVectorOpcode::kSetpNePush:
|
||||
case AluVectorOpcode::kSetpGtPush:
|
||||
case AluVectorOpcode::kSetpGePush:
|
||||
return 0b1001;
|
||||
case AluVectorOpcode::kDst:
|
||||
return src_index == 2 ? 0b1010 : 0b0110;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0b1111;
|
||||
// 5 vector opcode bits - 32 entries.
|
||||
extern const AluVectorOpcodeInfo kAluVectorOpcodeInfos[32];
|
||||
|
||||
inline const AluVectorOpcodeInfo& GetAluVectorOpcodeInfo(
|
||||
AluVectorOpcode opcode) {
|
||||
assert_true(uint32_t(opcode) < xe::countof(kAluVectorOpcodeInfos));
|
||||
return kAluVectorOpcodeInfos[uint32_t(opcode)];
|
||||
}
|
||||
|
||||
// Whether each component of a source operand is needed for the instruction if
|
||||
|
@ -1688,7 +1695,7 @@ constexpr uint32_t GetAluVectorOpUsedSourceComponents(
|
|||
// undefined in translation. For per-component operations, for example, only the
|
||||
// components specified in the write mask are needed, but there are instructions
|
||||
// with special behavior for certain components.
|
||||
constexpr uint32_t GetAluVectorOpNeededSourceComponents(
|
||||
inline uint32_t GetAluVectorOpNeededSourceComponents(
|
||||
AluVectorOpcode vector_opcode, uint32_t src_index,
|
||||
uint32_t used_result_components) {
|
||||
assert_not_zero(src_index);
|
||||
|
@ -1721,8 +1728,8 @@ constexpr uint32_t GetAluVectorOpNeededSourceComponents(
|
|||
case AluVectorOpcode::kKillNe:
|
||||
components = 0b1111;
|
||||
break;
|
||||
// kDst is per-component, but not all components are used -
|
||||
// GetAluVectorOpUsedSourceComponents will filter out the unused ones.
|
||||
// kDst is per-component, but not all components are used.
|
||||
// operand_components_used will filter out the unused ones.
|
||||
case AluVectorOpcode::kMaxA:
|
||||
if (src_index == 1) {
|
||||
components |= 0b1000;
|
||||
|
@ -1731,8 +1738,8 @@ constexpr uint32_t GetAluVectorOpNeededSourceComponents(
|
|||
default:
|
||||
break;
|
||||
}
|
||||
return components &
|
||||
GetAluVectorOpUsedSourceComponents(vector_opcode, src_index);
|
||||
return components & GetAluVectorOpcodeInfo(vector_opcode)
|
||||
.operand_components_used[src_index - 1];
|
||||
}
|
||||
|
||||
enum class ExportRegister : uint32_t {
|
||||
|
@ -1787,7 +1794,6 @@ struct alignas(uint32_t) AluInstruction {
|
|||
|
||||
// Whether data is being exported (or written to local registers).
|
||||
bool is_export() const { return data_.export_data == 1; }
|
||||
bool export_write_mask() const { return data_.scalar_dest_rel == 1; }
|
||||
|
||||
// Whether the jump is predicated (or conditional).
|
||||
bool is_predicated() const { return data_.is_predicated; }
|
||||
|
@ -1921,7 +1927,7 @@ struct alignas(uint32_t) AluInstruction {
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t scalar_const_op_src_temp_reg() const {
|
||||
uint32_t scalar_const_reg_op_src_temp_reg() const {
|
||||
return (uint32_t(data_.scalar_opc) & 1) | (data_.src3_sel << 1) |
|
||||
(data_.src3_swiz & 0x3C);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue