[GPU] Fix scalar c[#+aL], shader docs/refactoring

This commit is contained in:
Triang3l 2022-04-13 23:08:19 +03:00
parent 1f324bebcd
commit fea430f1f9
6 changed files with 395 additions and 201 deletions

View File

@ -1331,12 +1331,12 @@ dxbc::Src DxbcShaderTranslator::LoadOperand(const InstructionOperand& operand,
dxbc::Index index(operand.storage_index);
switch (operand.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic:
case InstructionStorageAddressingMode::kAbsolute:
break;
case InstructionStorageAddressingMode::kAddressAbsolute:
case InstructionStorageAddressingMode::kAddressRegisterRelative:
index = dxbc::Index(system_temp_ps_pc_p0_a0_, 3, operand.storage_index);
break;
case InstructionStorageAddressingMode::kAddressRelative:
case InstructionStorageAddressingMode::kLoopRelative:
index = dxbc::Index(system_temp_aL_, 0, operand.storage_index);
break;
}
@ -1365,7 +1365,7 @@ dxbc::Src DxbcShaderTranslator::LoadOperand(const InstructionOperand& operand,
src = dxbc::Src::R(temp);
} else {
assert_true(operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic);
InstructionStorageAddressingMode::kAbsolute);
src = dxbc::Src::R(index.index_);
}
} break;
@ -1376,7 +1376,7 @@ dxbc::Src DxbcShaderTranslator::LoadOperand(const InstructionOperand& operand,
const Shader::ConstantRegisterMap& constant_register_map =
current_shader().constant_register_map();
if (operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) {
InstructionStorageAddressingMode::kAbsolute) {
uint32_t float_constant_index =
constant_register_map.GetPackedFloatConstantIndex(
operand.storage_index);
@ -1429,13 +1429,13 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
if (current_shader().uses_register_dynamic_addressing()) {
dxbc::Index register_index(result.storage_index);
switch (result.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic:
case InstructionStorageAddressingMode::kAbsolute:
break;
case InstructionStorageAddressingMode::kAddressAbsolute:
case InstructionStorageAddressingMode::kAddressRegisterRelative:
register_index =
dxbc::Index(system_temp_ps_pc_p0_a0_, 3, result.storage_index);
break;
case InstructionStorageAddressingMode::kAddressRelative:
case InstructionStorageAddressingMode::kLoopRelative:
register_index =
dxbc::Index(system_temp_aL_, 0, result.storage_index);
break;
@ -1443,7 +1443,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
dest = dxbc::Dest::X(0, register_index);
} else {
assert_true(result.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic);
InstructionStorageAddressingMode::kAbsolute);
dest = dxbc::Dest::R(result.storage_index);
}
break;

View File

@ -44,7 +44,7 @@ namespace gpu {
enum class InstructionStorageTarget {
// Result is not stored.
kNone,
// Result is stored to a temporary register indexed by storage_index [0-31].
// Result is stored to a temporary register indexed by storage_index [0-63].
kRegister,
// Result is stored into a vertex shader interpolator export [0-15].
kInterpolator,
@ -85,11 +85,13 @@ constexpr uint32_t GetInstructionStorageTargetUsedComponentCount(
enum class InstructionStorageAddressingMode {
// The storage index is not dynamically addressed.
kStatic,
kAbsolute,
// The storage index is addressed by a0.
kAddressAbsolute,
// Float constants only.
kAddressRegisterRelative,
// The storage index is addressed by aL.
kAddressRelative,
// Float constants and temporary registers only.
kLoopRelative,
};
// Describes the source value of a particular component.
@ -111,6 +113,12 @@ enum class SwizzleSource {
constexpr SwizzleSource GetSwizzleFromComponentIndex(uint32_t i) {
return static_cast<SwizzleSource>(i);
}
constexpr SwizzleSource GetSwizzledAluSourceComponent(
uint32_t swizzle, uint32_t component_index) {
return GetSwizzleFromComponentIndex(
ucode::AluInstruction::GetSwizzledComponentIndex(swizzle,
component_index));
}
inline char GetCharForComponentIndex(uint32_t i) {
const static char kChars[] = {'x', 'y', 'z', 'w'};
return kChars[i];
@ -127,7 +135,7 @@ struct InstructionResult {
uint32_t storage_index = 0;
// How the storage index is dynamically addressed, if it is.
InstructionStorageAddressingMode storage_addressing_mode =
InstructionStorageAddressingMode::kStatic;
InstructionStorageAddressingMode::kAbsolute;
// True to clamp the result value to [0-1].
bool is_clamped = false;
// Defines whether each output component is written, though this is from the
@ -191,9 +199,9 @@ struct InstructionResult {
};
enum class InstructionStorageSource {
// Source is stored in a temporary register indexed by storage_index [0-31].
// Source is stored in a temporary register indexed by storage_index [0-63].
kRegister,
// Source is stored in a float constant indexed by storage_index [0-511].
// Source is stored in a float constant indexed by storage_index [0-255].
kConstantFloat,
// Source is stored in a vertex fetch constant indexed by storage_index
// [0-95].
@ -210,7 +218,7 @@ struct InstructionOperand {
uint32_t storage_index = 0;
// How the storage index is dynamically addressed, if it is.
InstructionStorageAddressingMode storage_addressing_mode =
InstructionStorageAddressingMode::kStatic;
InstructionStorageAddressingMode::kAbsolute;
// True to negate the operand value.
bool is_negated = false;
// True to take the absolute value of the source (before any negation).

View File

@ -247,22 +247,18 @@ void Shader::GatherExecInformation(
if (sequence & 0b10) {
ucode_disasm_buffer.Append(" serialize\n ");
}
const uint32_t* op_ptr = ucode_data_.data() + instr_offset * 3;
if (sequence & 0b01) {
auto fetch_opcode = FetchOpcode(ucode_data_[instr_offset * 3] & 0x1F);
if (fetch_opcode == FetchOpcode::kVertexFetch) {
auto& op = *reinterpret_cast<const VertexFetchInstruction*>(
ucode_data_.data() + instr_offset * 3);
GatherVertexFetchInformation(op, previous_vfetch_full,
auto& op = *reinterpret_cast<const FetchInstruction*>(op_ptr);
if (op.opcode() == FetchOpcode::kVertexFetch) {
GatherVertexFetchInformation(op.vertex_fetch(), previous_vfetch_full,
ucode_disasm_buffer);
} else {
auto& op = *reinterpret_cast<const TextureFetchInstruction*>(
ucode_data_.data() + instr_offset * 3);
GatherTextureFetchInformation(op, unique_texture_bindings,
ucode_disasm_buffer);
GatherTextureFetchInformation(
op.texture_fetch(), unique_texture_bindings, ucode_disasm_buffer);
}
} else {
auto& op = *reinterpret_cast<const AluInstruction*>(ucode_data_.data() +
instr_offset * 3);
auto& op = *reinterpret_cast<const AluInstruction*>(op_ptr);
GatherAluInstructionInformation(op, memexport_alloc_current_count,
memexport_eA_written,
ucode_disasm_buffer);
@ -420,7 +416,7 @@ void Shader::GatherOperandInformation(const InstructionOperand& operand) {
switch (operand.storage_source) {
case InstructionStorageSource::kRegister:
if (operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) {
InstructionStorageAddressingMode::kAbsolute) {
register_static_address_bound_ =
std::max(register_static_address_bound_,
operand.storage_index + uint32_t(1));
@ -430,7 +426,7 @@ void Shader::GatherOperandInformation(const InstructionOperand& operand) {
break;
case InstructionStorageSource::kConstantFloat:
if (operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) {
InstructionStorageAddressingMode::kAbsolute) {
// Store used float constants before translating so the
// translator can use tightly packed indices if not dynamically
// indexed.
@ -457,7 +453,7 @@ void Shader::GatherFetchResultInformation(const InstructionResult& result) {
// operand.
assert_true(result.storage_target == InstructionStorageTarget::kRegister);
if (result.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) {
InstructionStorageAddressingMode::kAbsolute) {
register_static_address_bound_ = std::max(
register_static_address_bound_, result.storage_index + uint32_t(1));
} else {
@ -473,7 +469,7 @@ void Shader::GatherAluResultInformation(
switch (result.storage_target) {
case InstructionStorageTarget::kRegister:
if (result.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) {
InstructionStorageAddressingMode::kAbsolute) {
register_static_address_bound_ = std::max(
register_static_address_bound_, result.storage_index + uint32_t(1));
} else {
@ -789,28 +785,24 @@ void ShaderTranslator::TranslateExecInstructions(
for (uint32_t instr_offset = instr.instruction_address;
instr_offset < instr.instruction_address + instr.instruction_count;
++instr_offset, sequence >>= 2) {
const uint32_t* op_ptr = ucode_dwords + instr_offset * 3;
if (sequence & 0b01) {
auto fetch_opcode =
static_cast<FetchOpcode>(ucode_dwords[instr_offset * 3] & 0x1F);
if (fetch_opcode == FetchOpcode::kVertexFetch) {
auto& op = *reinterpret_cast<const VertexFetchInstruction*>(
ucode_dwords + instr_offset * 3);
auto& op = *reinterpret_cast<const FetchInstruction*>(op_ptr);
if (op.opcode() == FetchOpcode::kVertexFetch) {
const VertexFetchInstruction& vfetch_op = op.vertex_fetch();
ParsedVertexFetchInstruction vfetch_instr;
if (ParseVertexFetchInstruction(op, previous_vfetch_full_,
if (ParseVertexFetchInstruction(vfetch_op, previous_vfetch_full_,
vfetch_instr)) {
previous_vfetch_full_ = op;
previous_vfetch_full_ = vfetch_op;
}
ProcessVertexFetchInstruction(vfetch_instr);
} else {
auto& op = *reinterpret_cast<const TextureFetchInstruction*>(
ucode_dwords + instr_offset * 3);
ParsedTextureFetchInstruction tfetch_instr;
ParseTextureFetchInstruction(op, tfetch_instr);
ParseTextureFetchInstruction(op.texture_fetch(), tfetch_instr);
ProcessTextureFetchInstruction(tfetch_instr);
}
} else {
auto& op = *reinterpret_cast<const AluInstruction*>(ucode_dwords +
instr_offset * 3);
auto& op = *reinterpret_cast<const AluInstruction*>(op_ptr);
ParsedAluInstruction alu_instr;
ParseAluInstruction(op, current_shader().type(), alu_instr);
ProcessAluInstruction(alu_instr);
@ -826,25 +818,40 @@ static void ParseFetchInstructionResult(uint32_t dest, uint32_t swizzle,
result.storage_index = dest;
result.is_clamped = false;
result.storage_addressing_mode =
is_relative ? InstructionStorageAddressingMode::kAddressRelative
: InstructionStorageAddressingMode::kStatic;
is_relative ? InstructionStorageAddressingMode::kLoopRelative
: InstructionStorageAddressingMode::kAbsolute;
result.original_write_mask = 0b1111;
for (int i = 0; i < 4; ++i) {
switch (swizzle & 0x7) {
case 4:
case 6:
result.components[i] = SwizzleSource::k0;
SwizzleSource component_source = SwizzleSource::k0;
ucode::FetchDestinationSwizzle component_swizzle =
ucode::GetFetchDestinationComponentSwizzle(swizzle, i);
switch (component_swizzle) {
case ucode::FetchDestinationSwizzle::kX:
component_source = SwizzleSource::kX;
break;
case 5:
result.components[i] = SwizzleSource::k1;
case ucode::FetchDestinationSwizzle::kY:
component_source = SwizzleSource::kY;
break;
case 7:
result.original_write_mask &= ~uint32_t(1 << i);
case ucode::FetchDestinationSwizzle::kZ:
component_source = SwizzleSource::kZ;
break;
case ucode::FetchDestinationSwizzle::kW:
component_source = SwizzleSource::kW;
break;
case ucode::FetchDestinationSwizzle::k1:
component_source = SwizzleSource::k1;
break;
case ucode::FetchDestinationSwizzle::kKeep:
result.original_write_mask &= ~(UINT32_C(1) << i);
break;
default:
result.components[i] = GetSwizzleFromComponentIndex(swizzle & 0x3);
// ucode::FetchDestinationSwizzle::k0 or the invalid swizzle 6.
// TODO(Triang3l): Find the correct handling of the invalid swizzle 6.
assert_true(component_swizzle == ucode::FetchDestinationSwizzle::k0);
component_source = SwizzleSource::k0;
break;
}
swizzle >>= 3;
result.components[i] = component_source;
}
}
@ -867,8 +874,8 @@ bool ParseVertexFetchInstruction(const VertexFetchInstruction& op,
src_op.storage_index = full_op.src();
src_op.storage_addressing_mode =
full_op.is_src_relative()
? InstructionStorageAddressingMode::kAddressRelative
: InstructionStorageAddressingMode::kStatic;
? InstructionStorageAddressingMode::kLoopRelative
: InstructionStorageAddressingMode::kAbsolute;
src_op.is_negated = false;
src_op.is_absolute_value = false;
src_op.component_count = 1;
@ -962,8 +969,8 @@ void ParseTextureFetchInstruction(const TextureFetchInstruction& op,
src_op.storage_source = InstructionStorageSource::kRegister;
src_op.storage_index = op.src();
src_op.storage_addressing_mode =
op.is_src_relative() ? InstructionStorageAddressingMode::kAddressRelative
: InstructionStorageAddressingMode::kStatic;
op.is_src_relative() ? InstructionStorageAddressingMode::kLoopRelative
: InstructionStorageAddressingMode::kAbsolute;
src_op.is_negated = false;
src_op.is_absolute_value = false;
src_op.component_count =
@ -1144,91 +1151,51 @@ static const AluOpcodeInfo alu_scalar_opcode_infos[0x40] = {
static void ParseAluInstructionOperand(const AluInstruction& op, uint32_t i,
uint32_t swizzle_component_count,
InstructionOperand& out_op) {
int const_slot = 0;
switch (i) {
case 2:
const_slot = op.src_is_temp(1) ? 0 : 1;
break;
case 3:
const_slot = op.src_is_temp(1) && op.src_is_temp(2) ? 0 : 1;
break;
}
out_op.is_negated = op.src_negate(i);
uint32_t reg = op.src_reg(i);
if (op.src_is_temp(i)) {
out_op.storage_source = InstructionStorageSource::kRegister;
out_op.storage_index = reg & 0x1F;
out_op.is_absolute_value = (reg & 0x80) == 0x80;
out_op.storage_index = AluInstruction::src_temp_reg(reg);
out_op.is_absolute_value = AluInstruction::is_src_temp_value_absolute(reg);
out_op.storage_addressing_mode =
(reg & 0x40) ? InstructionStorageAddressingMode::kAddressRelative
: InstructionStorageAddressingMode::kStatic;
AluInstruction::is_src_temp_relative(reg)
? InstructionStorageAddressingMode::kLoopRelative
: InstructionStorageAddressingMode::kAbsolute;
} else {
out_op.storage_source = InstructionStorageSource::kConstantFloat;
out_op.storage_index = reg;
if ((const_slot == 0 && op.is_const_0_addressed()) ||
(const_slot == 1 && op.is_const_1_addressed())) {
if (op.is_address_relative()) {
if (op.src_const_is_addressed(i)) {
if (op.is_const_address_register_relative()) {
out_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressAbsolute;
InstructionStorageAddressingMode::kAddressRegisterRelative;
} else {
out_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressRelative;
InstructionStorageAddressingMode::kLoopRelative;
}
} else {
out_op.storage_addressing_mode =
InstructionStorageAddressingMode::kStatic;
InstructionStorageAddressingMode::kAbsolute;
}
out_op.is_absolute_value = op.abs_constants();
}
out_op.component_count = swizzle_component_count;
uint32_t swizzle = op.src_swizzle(i);
if (swizzle_component_count == 1) {
uint32_t a = ((swizzle >> 6) + 3) & 0x3;
out_op.components[0] = GetSwizzleFromComponentIndex(a);
// Scalar `a` (W).
out_op.components[0] = GetSwizzledAluSourceComponent(swizzle, 3);
} else if (swizzle_component_count == 2) {
uint32_t a = ((swizzle >> 6) + 3) & 0x3;
uint32_t b = ((swizzle >> 0) + 0) & 0x3;
out_op.components[0] = GetSwizzleFromComponentIndex(a);
out_op.components[1] = GetSwizzleFromComponentIndex(b);
// Scalar left-hand `a` (W) and right-hand `b` (X).
out_op.components[0] = GetSwizzledAluSourceComponent(swizzle, 3);
out_op.components[1] = GetSwizzledAluSourceComponent(swizzle, 0);
} else if (swizzle_component_count == 3) {
assert_always();
} else if (swizzle_component_count == 4) {
for (uint32_t j = 0; j < swizzle_component_count; ++j, swizzle >>= 2) {
out_op.components[j] = GetSwizzleFromComponentIndex((swizzle + j) & 0x3);
for (uint32_t j = 0; j < swizzle_component_count; ++j) {
out_op.components[j] = GetSwizzledAluSourceComponent(swizzle, j);
}
}
}
static void ParseAluInstructionOperandSpecial(
const AluInstruction& op, InstructionStorageSource storage_source,
uint32_t reg, bool negate, int const_slot, uint32_t component_index,
InstructionOperand& out_op) {
out_op.is_negated = negate;
out_op.is_absolute_value = op.abs_constants();
out_op.storage_source = storage_source;
if (storage_source == InstructionStorageSource::kRegister) {
out_op.storage_index = reg & 0x7F;
out_op.storage_addressing_mode = InstructionStorageAddressingMode::kStatic;
} else {
out_op.storage_index = reg;
if ((const_slot == 0 && op.is_const_0_addressed()) ||
(const_slot == 1 && op.is_const_1_addressed())) {
if (op.is_address_relative()) {
out_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressAbsolute;
} else {
out_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressRelative;
}
} else {
out_op.storage_addressing_mode =
InstructionStorageAddressingMode::kStatic;
}
}
out_op.component_count = 1;
out_op.components[0] = GetSwizzleFromComponentIndex(component_index);
}
bool ParsedAluInstruction::IsVectorOpDefaultNop() const {
if (vector_opcode != ucode::AluVectorOpcode::kMax ||
vector_and_constant_result.original_write_mask ||
@ -1237,14 +1204,14 @@ bool ParsedAluInstruction::IsVectorOpDefaultNop() const {
InstructionStorageSource::kRegister ||
vector_operands[0].storage_index != 0 ||
vector_operands[0].storage_addressing_mode !=
InstructionStorageAddressingMode::kStatic ||
InstructionStorageAddressingMode::kAbsolute ||
vector_operands[0].is_negated || vector_operands[0].is_absolute_value ||
!vector_operands[0].IsStandardSwizzle() ||
vector_operands[1].storage_source !=
InstructionStorageSource::kRegister ||
vector_operands[1].storage_index != 0 ||
vector_operands[1].storage_addressing_mode !=
InstructionStorageAddressingMode::kStatic ||
InstructionStorageAddressingMode::kAbsolute ||
vector_operands[1].is_negated || vector_operands[1].is_absolute_value ||
!vector_operands[1].IsStandardSwizzle()) {
return false;
@ -1253,7 +1220,7 @@ bool ParsedAluInstruction::IsVectorOpDefaultNop() const {
InstructionStorageTarget::kRegister) {
if (vector_and_constant_result.storage_index != 0 ||
vector_and_constant_result.storage_addressing_mode !=
InstructionStorageAddressingMode::kStatic) {
InstructionStorageAddressingMode::kAbsolute) {
return false;
}
} else {
@ -1330,14 +1297,14 @@ void ParseAluInstruction(const AluInstruction& op,
instr.vector_and_constant_result.storage_target = storage_target;
instr.vector_and_constant_result.storage_addressing_mode =
InstructionStorageAddressingMode::kStatic;
InstructionStorageAddressingMode::kAbsolute;
if (is_export) {
instr.vector_and_constant_result.storage_index = storage_index_export;
} else {
instr.vector_and_constant_result.storage_index = op.vector_dest();
if (op.is_vector_dest_relative()) {
instr.vector_and_constant_result.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressRelative;
InstructionStorageAddressingMode::kLoopRelative;
}
}
instr.vector_and_constant_result.is_clamped = op.vector_clamp();
@ -1372,14 +1339,14 @@ void ParseAluInstruction(const AluInstruction& op,
instr.scalar_result.storage_target = storage_target;
instr.scalar_result.storage_addressing_mode =
InstructionStorageAddressingMode::kStatic;
InstructionStorageAddressingMode::kAbsolute;
if (is_export) {
instr.scalar_result.storage_index = storage_index_export;
} else {
instr.scalar_result.storage_index = op.scalar_dest();
if (op.is_scalar_dest_relative()) {
instr.scalar_result.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressRelative;
InstructionStorageAddressingMode::kLoopRelative;
}
}
instr.scalar_result.is_clamped = op.scalar_clamp();
@ -1395,20 +1362,42 @@ void ParseAluInstruction(const AluInstruction& op,
scalar_opcode_info.src_swizzle_component_count,
instr.scalar_operands[0]);
} else {
// Constant and temporary register.
bool src3_negate = op.src_negate(3);
uint32_t src3_swizzle = op.src_swizzle(3);
uint32_t component_a = ((src3_swizzle >> 6) + 3) & 0x3;
uint32_t component_b = ((src3_swizzle >> 0) + 0) & 0x3;
uint32_t reg2 = (src3_swizzle & 0x3C) | (op.src_is_temp(3) << 1) |
(static_cast<int>(op.scalar_opcode()) & 1);
int const_slot = (op.src_is_temp(1) || op.src_is_temp(2)) ? 1 : 0;
ParseAluInstructionOperandSpecial(
op, InstructionStorageSource::kConstantFloat, op.src_reg(3),
op.src_negate(3), 0, component_a, instr.scalar_operands[0]);
// Left-hand constant operand (`a` - W swizzle).
InstructionOperand& const_op = instr.scalar_operands[0];
const_op.is_negated = src3_negate;
const_op.is_absolute_value = op.abs_constants();
const_op.storage_source = InstructionStorageSource::kConstantFloat;
const_op.storage_index = op.src_reg(3);
if (op.src_const_is_addressed(3)) {
if (op.is_const_address_register_relative()) {
const_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressRegisterRelative;
} else {
const_op.storage_addressing_mode =
InstructionStorageAddressingMode::kLoopRelative;
}
} else {
const_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAbsolute;
}
const_op.component_count = 1;
const_op.components[0] = GetSwizzledAluSourceComponent(src3_swizzle, 3);
ParseAluInstructionOperandSpecial(op, InstructionStorageSource::kRegister,
reg2, op.src_negate(3), const_slot,
component_b, instr.scalar_operands[1]);
// Right-hand temporary register operand (`b` - X swizzle).
InstructionOperand& temp_op = instr.scalar_operands[1];
temp_op.is_negated = src3_negate;
temp_op.is_absolute_value = op.abs_constants();
temp_op.storage_source = InstructionStorageSource::kRegister;
temp_op.storage_index = op.scalar_const_op_src_temp_reg();
temp_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAbsolute;
temp_op.component_count = 1;
temp_op.components[0] = GetSwizzledAluSourceComponent(src3_swizzle, 0);
}
}
}
@ -1421,7 +1410,7 @@ bool ParsedAluInstruction::IsScalarOpDefaultNop() const {
if (scalar_result.storage_target == InstructionStorageTarget::kRegister) {
if (scalar_result.storage_index != 0 ||
scalar_result.storage_addressing_mode !=
InstructionStorageAddressingMode::kStatic) {
InstructionStorageAddressingMode::kAbsolute) {
return false;
}
}
@ -1446,7 +1435,7 @@ uint32_t ParsedAluInstruction::GetMemExportStreamConstant() const {
vector_operands[2].storage_source ==
InstructionStorageSource::kConstantFloat &&
vector_operands[2].storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic &&
InstructionStorageAddressingMode::kAbsolute &&
vector_operands[2].IsStandardSwizzle() &&
!vector_operands[2].is_negated && !vector_operands[2].is_absolute_value) {
return vector_operands[2].storage_index;

View File

@ -57,13 +57,13 @@ void DisassembleResultOperand(const InstructionResult& result,
}
if (uses_storage_index) {
switch (result.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic:
case InstructionStorageAddressingMode::kAbsolute:
out->AppendFormat("{}", result.storage_index);
break;
case InstructionStorageAddressingMode::kAddressAbsolute:
case InstructionStorageAddressingMode::kAddressRegisterRelative:
out->AppendFormat("[{}+a0]", result.storage_index);
break;
case InstructionStorageAddressingMode::kAddressRelative:
case InstructionStorageAddressingMode::kLoopRelative:
out->AppendFormat("[{}+aL]", result.storage_index);
break;
}
@ -109,17 +109,17 @@ void DisassembleSourceOperand(const InstructionOperand& op, StringBuffer* out) {
out->Append("_abs");
}
switch (op.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic:
case InstructionStorageAddressingMode::kAbsolute:
if (op.is_absolute_value) {
out->AppendFormat("[{}]", op.storage_index);
} else {
out->AppendFormat("{}", op.storage_index);
}
break;
case InstructionStorageAddressingMode::kAddressAbsolute:
case InstructionStorageAddressingMode::kAddressRegisterRelative:
out->AppendFormat("[{}+a0]", op.storage_index);
break;
case InstructionStorageAddressingMode::kAddressRelative:
case InstructionStorageAddressingMode::kLoopRelative:
out->AppendFormat("[{}+aL]", op.storage_index);
break;
}

View File

@ -3110,16 +3110,16 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) {
}
switch (op.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic: {
case InstructionStorageAddressingMode::kAbsolute: {
storage_index = b.makeUintConstant(storage_base + op.storage_index);
} break;
case InstructionStorageAddressingMode::kAddressAbsolute: {
case InstructionStorageAddressingMode::kAddressRegisterRelative: {
// storage_index + a0
storage_index =
b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_),
b.makeUintConstant(storage_base + op.storage_index));
} break;
case InstructionStorageAddressingMode::kAddressRelative: {
case InstructionStorageAddressingMode::kLoopRelative: {
// storage_index + aL.x
auto idx = b.createCompositeExtract(b.createLoad(aL_), uint_type_, 0);
storage_index =
@ -3269,16 +3269,16 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
std::vector<Id> storage_offsets; // Offsets in nested arrays -> storage
switch (result.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic: {
case InstructionStorageAddressingMode::kAbsolute: {
storage_index = b.makeUintConstant(result.storage_index);
} break;
case InstructionStorageAddressingMode::kAddressAbsolute: {
case InstructionStorageAddressingMode::kAddressRegisterRelative: {
// storage_index + a0
storage_index =
b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_),
b.makeUintConstant(result.storage_index));
} break;
case InstructionStorageAddressingMode::kAddressRelative: {
case InstructionStorageAddressingMode::kLoopRelative: {
// storage_index + aL.x
auto idx = b.createCompositeExtract(b.createLoad(aL_), uint_type_, 0);
storage_index = b.createBinOp(spv::Op::OpIAdd, uint_type_, idx,

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -16,11 +16,45 @@
#include "xenia/base/platform.h"
#include "xenia/gpu/xenos.h"
// Closest AMD doc:
// The XNA Game Studio 3.1 contains Graphics.ShaderCompiler.AssembleFromSource,
// which, for TargetPlatform.Xbox360, can validate and assemble Xbox 360 shader
// microcode from Xbox 360 and Direct3D 9 shader assembly, returning the binary,
// as well as validation warnings and errors and the disassembly via the warning
// output. It is the primary source of information about the binary encoding of
// the instructions, as well as valid usage of instruction parameters and
// sequences.
// https://www.microsoft.com/en-us/download/details.aspx?id=39
// (XNAGS31_setup.exe)
// Xenia provides a tool, tools/shader-playground, that invokes the assembler,
// displays the binary and the disassembly from the official assembler, and also
// shows the disassembly generated by Xenia, and passes it back to the assembler
// to validate Xenia's microcode parsing and disassembly by checking if
// reassembling the disassembly results in the same binary.
//
// The behavior and the parameters of some of the instructions were previously
// documented on MSDN in the XNA Game Studio programming guide:
// http://web.archive.org/web/20081211005537/http://msdn.microsoft.com/en-us/library/bb313877.aspx
//
// A great amount of documentation, such as the R400 sequencer specification and
// the official emulator code, was made available during the LG Electronics,
// Inc. v. ATI Technologies ULC "Multi-thread Graphics Processing System" patent
// dispute IPR2015-00325, with the motion to seal having been denied due to "a
// strong public policy interest in making all information filed in an inter
// partes review publicly available". Most of the documents attached, however,
// cover early versions - the development process - of the R400 architecture, so
// there are some differences from the final Xenos GPU (DOT2ADDv is defined
// differently, for example, and MUL/ADD/SUB_CONST are missing).
// https://portal.unifiedpatents.com/ptab/case/IPR2015-00325
//
// Also, the R600, while having a different 5-scalar, as opposed to vec4|scalar,
// parallelism model and instruction encodings and targeting Direct3D 10 rather
// that 9, inherits a lot of instructions and architectural concepts from the
// R400.
// https://www.x.org/docs/AMD/old/r600isa.pdf
// https://developer.amd.com/wordpress/media/2012/10/r600isa.pdf
// https://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf
// Microcode format differs, but most fields/enums are the same.
// This code comes from the freedreno project:
// Parts of this code also come from the freedreno project:
// https://github.com/freedreno/freedreno/blob/master/includes/instr-a2xx.h
/*
* Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
@ -156,7 +190,8 @@ struct ControlFlowExecInstruction {
uint32_t address() const { return address_; }
// Number of instructions being executed.
uint32_t count() const { return count_; }
// Sequence bits, 2 per instruction, indicating whether ALU or fetch.
// Sequence bits, 2 per instruction.
// [0] - ALU (0) or fetch (1), [1] - serialize.
uint32_t sequence() const { return serialize_; }
// Whether to reset the current predicate.
bool clean() const { return clean_ == 1; }
@ -189,7 +224,8 @@ struct ControlFlowCondExecInstruction {
uint32_t address() const { return address_; }
// Number of instructions being executed.
uint32_t count() const { return count_; }
// Sequence bits, 2 per instruction, indicating whether ALU or fetch.
// Sequence bits, 2 per instruction.
// [0] - ALU (0) or fetch (1), [1] - serialize.
uint32_t sequence() const { return serialize_; }
// Constant index used as the conditional.
uint32_t bool_address() const { return bool_address_; }
@ -224,7 +260,8 @@ struct ControlFlowCondExecPredInstruction {
uint32_t address() const { return address_; }
// Number of instructions being executed.
uint32_t count() const { return count_; }
// Sequence bits, 2 per instruction, indicating whether ALU or fetch.
// Sequence bits, 2 per instruction.
// [0] - ALU (0) or fetch (1), [1] - serialize.
uint32_t sequence() const { return serialize_; }
// Whether to reset the current predicate.
bool clean() const { return clean_ == 1; }
@ -591,6 +628,24 @@ enum class FetchOpcode : uint32_t {
kSetTextureGradientsVert = 26,
};
enum class FetchDestinationSwizzle {
// The component indices are absolute (not relative to the component itself,
// unlike in ALU operation sources).
kX = 0,
kY = 1,
kZ = 2,
kW = 3,
k0 = 4,
k1 = 5,
// Keep the current value of the destination register (don't write).
kKeep = 7,
};
constexpr FetchDestinationSwizzle GetFetchDestinationComponentSwizzle(
uint32_t swizzle, uint32_t component) {
return FetchDestinationSwizzle((swizzle >> (3 * component)) & 0b111);
}
struct alignas(uint32_t) VertexFetchInstruction {
FetchOpcode opcode() const { return data_.opcode_value; }
@ -614,29 +669,6 @@ struct alignas(uint32_t) VertexFetchInstruction {
uint32_t src_swizzle() const { return data_.src_swiz; }
bool is_src_relative() const { return data_.src_reg_am; }
// Returns true if the fetch actually fetches data.
// This may be false if it's used only to populate constants.
bool fetches_any_data() const {
uint32_t dst_swiz = data_.dst_swiz;
bool fetches_any_data = false;
for (int i = 0; i < 4; i++) {
if ((dst_swiz & 0x7) == 4) {
// 0.0
} else if ((dst_swiz & 0x7) == 5) {
// 1.0
} else if ((dst_swiz & 0x7) == 6) {
// ?
} else if ((dst_swiz & 0x7) == 7) {
// Previous register value.
} else {
fetches_any_data = true;
break;
}
dst_swiz >>= 3;
}
return fetches_any_data;
}
uint32_t prefetch_count() const { return data_.prefetch_count; }
bool is_mini_fetch() const { return data_.is_mini_fetch == 1; }
@ -676,6 +708,7 @@ struct alignas(uint32_t) VertexFetchInstruction {
uint32_t const_index_sel : 2;
// Prefetch count minus 1.
uint32_t prefetch_count : 3;
// Absolute, one component.
uint32_t src_swiz : 2;
};
struct {
@ -769,10 +802,11 @@ struct alignas(uint32_t) TextureFetchInstruction {
uint32_t fetch_valid_only : 1;
uint32_t const_index : 5;
uint32_t tx_coord_denorm : 1;
uint32_t src_swiz : 6; // xyz
// Absolute, three components.
uint32_t src_swiz : 6;
};
struct {
uint32_t dst_swiz : 12; // xyzw
uint32_t dst_swiz : 12;
xenos::TextureFilter mag_filter : 2;
xenos::TextureFilter min_filter : 2;
xenos::TextureFilter mip_filter : 2;
@ -801,21 +835,96 @@ struct alignas(uint32_t) TextureFetchInstruction {
};
static_assert_size(TextureFetchInstruction, sizeof(uint32_t) * 3);
union alignas(uint32_t) FetchInstruction {
public:
FetchOpcode opcode() const { return data_.opcode_value; }
// Whether the jump is predicated (or conditional).
bool is_predicated() const { return data_.is_predicated; }
// Required condition value of the comparision (true or false).
bool predicate_condition() const { return data_.pred_condition == 1; }
uint32_t dest() const { return data_.dst_reg; }
uint32_t dest_swizzle() const { return data_.dst_swiz; }
bool is_dest_relative() const { return data_.dst_reg_am; }
uint32_t src() const { return data_.src_reg; }
bool is_src_relative() const { return data_.src_reg_am; }
// For FetchOpcode::kVertexFetch.
const VertexFetchInstruction& vertex_fetch() const { return vertex_fetch_; }
// For operations other than FetchOpcode::kVertexFetch.
const TextureFetchInstruction& texture_fetch() const {
return texture_fetch_;
}
private:
struct Data {
struct {
FetchOpcode opcode_value : 5;
uint32_t src_reg : 6;
uint32_t src_reg_am : 1;
uint32_t dst_reg : 6;
uint32_t dst_reg_am : 1;
// Specific to vertex or texture fetch.
uint32_t : 1;
// [0-31], points to one tf# or three vf# constants.
uint32_t const_index : 5;
// Specific to vertex or texture fetch.
uint32_t : 7;
};
struct {
uint32_t dst_swiz : 12;
// Specific to vertex or texture fetch.
uint32_t : 19;
uint32_t is_predicated : 1;
};
struct {
// Specific to vertex or texture fetch.
uint32_t : 31;
uint32_t pred_condition : 1;
};
};
Data data_;
VertexFetchInstruction vertex_fetch_;
TextureFetchInstruction texture_fetch_;
};
static_assert_size(FetchInstruction, sizeof(uint32_t) * 3);
// What follows is largely a mash up of the microcode assembly naming and the
// R600 docs that have a near 1:1 with the instructions available in the xenos
// R600 docs that have a near 1:1 with the instructions available in the Xenos
// GPU, and Adreno 2xx instruction names found in Freedreno. Some of the
// behavior has been experimentally verified. Some has been guessed.
// Docs: https://www.x.org/docs/AMD/old/r600isa.pdf
// behavior has been experimentally verified. Some has been guessed. Some
// instructions are implemented in the Exhibit 2092 - sq_alu of IPR2015-00325,
// however, the code provided there is early and incomplete.
//
// Conventions:
// - All temporary registers are vec4s.
// - Scalar ops swizzle out a single component of their source registers denoted
// by 'a' or 'b'. src0.a means 'the first component specified for src0' and
// src0.ab means 'two components specified for src0, in order'.
// - Scalar ops write the result to the entire destination register.
// - pv and ps are the previous results of a vector or scalar ALU operation.
// Both are valid only within the current ALU clause. They are not modified
// when the instruction that would write them fails its predication check.
// - Most scalar ALU operations work with one or two components of the source
// register passed as the third operand of the whole co-issued ALU operation,
// denoted by `a` (the left-hand operand) and `b` (the right-hand operand).
// `a` is the [(3 + src3_swizzle[6:7]) & 3] component (W - alpha).
// `b` is the [(0 + src3_swizzle[0:1]) & 3] component (X - red).
// - mulsc, addsc, subsc scalar ALU operations accept two operands - a float
// constant with the `a` (W) swizzle (addressed by the third operand index and
// addressing mode) being the left-hand operand, and a temporary register with
// the `b` (X) swizzle with the index constructed from:
// - [0:0] = scalar_opcode[0:0]
// - [1:1] = src3_sel[0:0]
// - [2:5] = src3_swizzle[2:5]
// abs_constants and third source's negation are applied to both the constant
// and the temporary register.
// - Some scalar ALU instructions don't have operands.
// - Scalar ALU operations replicate the result into all masked components.
// - Overall, the WXYZ order is pretty commonly used in the Exhibit 2092 -
// sq_alu of IPR2015-00325, this is where the AB = WX order of scalar operands
// likely comes from. Vector predicate instructions also involve the W and X
// components, and in IPR2015-00325 sq_alu, individual components in the
// emulated vector instructions are handled in the WXYZ order. However, max4's
// "greater than the rest" check order is RGBA (XYZW) there. dp4, though, sums
// the products in WXYZ order in IPR2015-00325 sq_alu (but in XYZW order on
// MSDN).
// - ps is the previous result of a scalar ALU operation. It is not modified
// when the instruction that would write it fails its predication check.
// - Direct3D 9 rules (like in GCN v_*_legacy_f32 instructions) for
// multiplication (+-0 or denormal * anything = +0) wherever it's present
// (mul, mad, dp, etc.) and for NaN in min/max. It's very important to respect
@ -1137,6 +1246,9 @@ enum class AluScalarOpcode : uint32_t {
// dest.xyzw = sqrt(src0.a);
kSqrt = 40,
// 0 and 1 are the same instruction - one bit of the register index is stored
// in the opcode field.
// mulsc/MUL_CONST_0 dest, src0.a, src1.a
kMulsc0 = 42,
// mulsc/MUL_CONST_1 dest, src0.a, src1.a
@ -1303,19 +1415,24 @@ enum class AluVectorOpcode : uint32_t {
// dp4/DOT4v dest, src0, src1
// dest.xyzw = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z +
// src0.w * src1.w;
// Note: only pv.x contains the value.
kDp4 = 15,
// Three-Element Dot Product
// dp3/DOT3v dest, src0, src1
// dest.xyzw = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z;
// Note: only pv.x contains the value.
kDp3 = 16,
// Two-Element Dot Product and Add
// dp2add/DOT2ADDv dest, src0, src1, src2
// dest.xyzw = src0.x * src1.x + src0.y * src1.y + src2.x;
// Note: only pv.x contains the value.
// IPR2015-00325 sq_alu may be an outdated and unreliable reference (Sequencer
// Parts Development folder history lists a few changes regarding the swizzle
// in dot2add, sq_alu though implements the instruction as
// src0.x * src1.x + src0.z * src1.z + src2.y, but MSDN specifies the correct
// order as provided in the beginning of this comment, further proven by
// assembling PC shader assembly using XNA, with Shader Model 2 dp2add being
// translated directly into Xenos dp2add without additional swizzling).
// http://web.archive.org/web/20100705150552/http://msdn.microsoft.com/en-us/library/bb313922.aspx
kDp2Add = 17,
// Cube Map
@ -1363,8 +1480,16 @@ enum class AluVectorOpcode : uint32_t {
// Four-Element Maximum
// max4/MAX4v dest, src0
// dest.xyzw = max(src0.x, src0.y, src0.z, src0.w);
// Note: only pv.x contains the value.
// According to IPR2015-00325 sq_alu:
// if (src0.x > src0.y && src0.x > src0.z && src0.x > src0.w) {
// dest.xyzw = src0.x;
// } else if (src0.y > src0.z && src0.y > src0.w) {
// dest.xyzw = src0.y;
// } else if (src0.z > src0.w) {
// dest.xyzw = src0.z;
// } else {
// dest.xyzw = src0.w;
// }
kMax4 = 19,
// Floating-Point Predicate Counter Increment If Equal
@ -1672,7 +1797,9 @@ struct alignas(uint32_t) AluInstruction {
bool abs_constants() const { return data_.abs_constants == 1; }
bool is_const_0_addressed() const { return data_.const_0_rel_abs == 1; }
bool is_const_1_addressed() const { return data_.const_1_rel_abs == 1; }
bool is_address_relative() const { return data_.address_absolute == 1; }
bool is_const_address_register_relative() const {
return data_.const_address_register_relative == 1;
}
AluVectorOpcode vector_opcode() const { return data_.vector_opc; }
uint32_t vector_write_mask() const { return data_.vector_write_mask; }
@ -1686,6 +1813,18 @@ struct alignas(uint32_t) AluInstruction {
bool is_scalar_dest_relative() const { return data_.scalar_dest_rel == 1; }
bool scalar_clamp() const { return data_.scalar_clamp == 1; }
static constexpr uint32_t src_temp_reg(uint32_t src_reg) {
return src_reg & 0x3F;
}
static constexpr bool is_src_temp_relative(uint32_t src_reg) {
return (src_reg & 0x40) != 0;
}
static constexpr bool is_src_temp_value_absolute(uint32_t src_reg) {
return (src_reg & 0x80) != 0;
}
// Full register index for constants, packed structure for temporary
// registers (unpack using src_temp_reg, is_src_temp_relative,
// is_src_temp_value_absolute).
uint32_t src_reg(size_t i) const {
switch (i) {
case 1:
@ -1702,16 +1841,59 @@ struct alignas(uint32_t) AluInstruction {
bool src_is_temp(size_t i) const {
switch (i) {
case 1:
return data_.src1_sel == 1;
return bool(data_.src1_sel);
case 2:
return data_.src2_sel == 1;
return bool(data_.src2_sel);
case 3:
return data_.src3_sel == 1;
return bool(data_.src3_sel);
default:
assert_unhandled_case(i);
return 0;
}
}
// Whether the specified operand is actually a constant is disregarded in this
// function so its scope is limited to just parsing the structure's layout -
// to decide whether to use relative addressing for the operand as a whole,
// check externally whether the operand is actually a constant first.
//
// For the constant operand in mulsc, addsc, subsc, this should be called for
// the operand index 3. Note that the XNA disassembler takes the addressing
// mode for the constant scalar operand unconditionally from const_1_rel_abs,
// and ignores the +aL for it unless the scalar operation is co-issued with a
// vector operation reading from a constant. However, the XNA assembler treats
// the constant scalar operand as a constant in the third operand, and places
// the addressing mode for it in const_0_rel_abs if no other constants are
// used in the whole ALU instruction. The validator also doesn't report
// anything if +aL is used when the constant scalar operand is the only
// constant in the instruction (and explicitly calls it the third constant in
// the error message in case both vector operands are constants, and different
// addressing modes are used for the second vector operand and the constant
// scalar operand). Passing the disassembly produced by XNA back to the
// assembler results in different microcode in this case. This indicates that
// most likely there's a bug in the XNA disassembler, and that the addressing
// mode for the constant scalar operand should actually be taken the same way
// as for the third vector operand - from const_0_rel_abs if there are no
// constant vector operands, or from const_1_rel_abs if there is at least one.
bool src_const_is_addressed(size_t i) const {
// "error X7100: When three constants are used in one instruction, the
// second and third constant must either both be non-relative, or both be
// relative."
// Whether to use const_0_rel_abs or const_1_rel_abs is essentially
// min(sum of whether the previous operands are constants, 1).
switch (i) {
case 1:
return bool(data_.const_0_rel_abs);
case 2:
return bool(src_is_temp(1) ? data_.const_0_rel_abs
: data_.const_1_rel_abs);
case 3:
return bool((src_is_temp(1) && src_is_temp(2)) ? data_.const_0_rel_abs
: data_.const_1_rel_abs);
default:
assert_unhandled_case(i);
return false;
}
}
uint32_t src_swizzle(size_t i) const {
switch (i) {
case 1:
@ -1739,8 +1921,20 @@ struct alignas(uint32_t) AluInstruction {
}
}
uint32_t scalar_const_op_src_temp_reg() const {
return (uint32_t(data_.scalar_opc) & 1) | (data_.src3_sel << 1) |
(data_.src3_swiz & 0x3C);
}
// Helpers.
// Returns the absolute component index calculated from the relative swizzle
// in an ALU instruction.
static constexpr uint32_t GetSwizzledComponentIndex(
uint32_t swizzle, uint32_t component_index) {
return ((swizzle >> (2 * component_index)) + component_index) & 3;
}
// Note that even if the export component is unused (like W of the vertex
// shader misc register, YZW of pixel shader depth), it must still not be
// excluded - that may make disassembly not reassemblable if there are
@ -1803,6 +1997,7 @@ struct alignas(uint32_t) AluInstruction {
AluScalarOpcode scalar_opc : 6;
};
struct {
// Swizzles are component-relative.
uint32_t src3_swiz : 8;
uint32_t src2_swiz : 8;
uint32_t src1_swiz : 8;
@ -1811,7 +2006,9 @@ struct alignas(uint32_t) AluInstruction {
uint32_t src1_reg_negate : 1;
uint32_t pred_condition : 1;
uint32_t is_predicated : 1;
uint32_t address_absolute : 1;
// Temporary registers can have only absolute and aL-relative indices, not
// a0-relative.
uint32_t const_address_register_relative : 1;
uint32_t const_1_rel_abs : 1;
uint32_t const_0_rel_abs : 1;
};