[GPU] Fix scalar c[#+aL], shader docs/refactoring
This commit is contained in:
parent
1f324bebcd
commit
fea430f1f9
|
@ -1331,12 +1331,12 @@ dxbc::Src DxbcShaderTranslator::LoadOperand(const InstructionOperand& operand,
|
|||
|
||||
dxbc::Index index(operand.storage_index);
|
||||
switch (operand.storage_addressing_mode) {
|
||||
case InstructionStorageAddressingMode::kStatic:
|
||||
case InstructionStorageAddressingMode::kAbsolute:
|
||||
break;
|
||||
case InstructionStorageAddressingMode::kAddressAbsolute:
|
||||
case InstructionStorageAddressingMode::kAddressRegisterRelative:
|
||||
index = dxbc::Index(system_temp_ps_pc_p0_a0_, 3, operand.storage_index);
|
||||
break;
|
||||
case InstructionStorageAddressingMode::kAddressRelative:
|
||||
case InstructionStorageAddressingMode::kLoopRelative:
|
||||
index = dxbc::Index(system_temp_aL_, 0, operand.storage_index);
|
||||
break;
|
||||
}
|
||||
|
@ -1365,7 +1365,7 @@ dxbc::Src DxbcShaderTranslator::LoadOperand(const InstructionOperand& operand,
|
|||
src = dxbc::Src::R(temp);
|
||||
} else {
|
||||
assert_true(operand.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic);
|
||||
InstructionStorageAddressingMode::kAbsolute);
|
||||
src = dxbc::Src::R(index.index_);
|
||||
}
|
||||
} break;
|
||||
|
@ -1376,7 +1376,7 @@ dxbc::Src DxbcShaderTranslator::LoadOperand(const InstructionOperand& operand,
|
|||
const Shader::ConstantRegisterMap& constant_register_map =
|
||||
current_shader().constant_register_map();
|
||||
if (operand.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
InstructionStorageAddressingMode::kAbsolute) {
|
||||
uint32_t float_constant_index =
|
||||
constant_register_map.GetPackedFloatConstantIndex(
|
||||
operand.storage_index);
|
||||
|
@ -1429,13 +1429,13 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
if (current_shader().uses_register_dynamic_addressing()) {
|
||||
dxbc::Index register_index(result.storage_index);
|
||||
switch (result.storage_addressing_mode) {
|
||||
case InstructionStorageAddressingMode::kStatic:
|
||||
case InstructionStorageAddressingMode::kAbsolute:
|
||||
break;
|
||||
case InstructionStorageAddressingMode::kAddressAbsolute:
|
||||
case InstructionStorageAddressingMode::kAddressRegisterRelative:
|
||||
register_index =
|
||||
dxbc::Index(system_temp_ps_pc_p0_a0_, 3, result.storage_index);
|
||||
break;
|
||||
case InstructionStorageAddressingMode::kAddressRelative:
|
||||
case InstructionStorageAddressingMode::kLoopRelative:
|
||||
register_index =
|
||||
dxbc::Index(system_temp_aL_, 0, result.storage_index);
|
||||
break;
|
||||
|
@ -1443,7 +1443,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
dest = dxbc::Dest::X(0, register_index);
|
||||
} else {
|
||||
assert_true(result.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic);
|
||||
InstructionStorageAddressingMode::kAbsolute);
|
||||
dest = dxbc::Dest::R(result.storage_index);
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -44,7 +44,7 @@ namespace gpu {
|
|||
enum class InstructionStorageTarget {
|
||||
// Result is not stored.
|
||||
kNone,
|
||||
// Result is stored to a temporary register indexed by storage_index [0-31].
|
||||
// Result is stored to a temporary register indexed by storage_index [0-63].
|
||||
kRegister,
|
||||
// Result is stored into a vertex shader interpolator export [0-15].
|
||||
kInterpolator,
|
||||
|
@ -85,11 +85,13 @@ constexpr uint32_t GetInstructionStorageTargetUsedComponentCount(
|
|||
|
||||
enum class InstructionStorageAddressingMode {
|
||||
// The storage index is not dynamically addressed.
|
||||
kStatic,
|
||||
kAbsolute,
|
||||
// The storage index is addressed by a0.
|
||||
kAddressAbsolute,
|
||||
// Float constants only.
|
||||
kAddressRegisterRelative,
|
||||
// The storage index is addressed by aL.
|
||||
kAddressRelative,
|
||||
// Float constants and temporary registers only.
|
||||
kLoopRelative,
|
||||
};
|
||||
|
||||
// Describes the source value of a particular component.
|
||||
|
@ -111,6 +113,12 @@ enum class SwizzleSource {
|
|||
constexpr SwizzleSource GetSwizzleFromComponentIndex(uint32_t i) {
|
||||
return static_cast<SwizzleSource>(i);
|
||||
}
|
||||
constexpr SwizzleSource GetSwizzledAluSourceComponent(
|
||||
uint32_t swizzle, uint32_t component_index) {
|
||||
return GetSwizzleFromComponentIndex(
|
||||
ucode::AluInstruction::GetSwizzledComponentIndex(swizzle,
|
||||
component_index));
|
||||
}
|
||||
inline char GetCharForComponentIndex(uint32_t i) {
|
||||
const static char kChars[] = {'x', 'y', 'z', 'w'};
|
||||
return kChars[i];
|
||||
|
@ -127,7 +135,7 @@ struct InstructionResult {
|
|||
uint32_t storage_index = 0;
|
||||
// How the storage index is dynamically addressed, if it is.
|
||||
InstructionStorageAddressingMode storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kStatic;
|
||||
InstructionStorageAddressingMode::kAbsolute;
|
||||
// True to clamp the result value to [0-1].
|
||||
bool is_clamped = false;
|
||||
// Defines whether each output component is written, though this is from the
|
||||
|
@ -191,9 +199,9 @@ struct InstructionResult {
|
|||
};
|
||||
|
||||
enum class InstructionStorageSource {
|
||||
// Source is stored in a temporary register indexed by storage_index [0-31].
|
||||
// Source is stored in a temporary register indexed by storage_index [0-63].
|
||||
kRegister,
|
||||
// Source is stored in a float constant indexed by storage_index [0-511].
|
||||
// Source is stored in a float constant indexed by storage_index [0-255].
|
||||
kConstantFloat,
|
||||
// Source is stored in a vertex fetch constant indexed by storage_index
|
||||
// [0-95].
|
||||
|
@ -210,7 +218,7 @@ struct InstructionOperand {
|
|||
uint32_t storage_index = 0;
|
||||
// How the storage index is dynamically addressed, if it is.
|
||||
InstructionStorageAddressingMode storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kStatic;
|
||||
InstructionStorageAddressingMode::kAbsolute;
|
||||
// True to negate the operand value.
|
||||
bool is_negated = false;
|
||||
// True to take the absolute value of the source (before any negation).
|
||||
|
|
|
@ -247,22 +247,18 @@ void Shader::GatherExecInformation(
|
|||
if (sequence & 0b10) {
|
||||
ucode_disasm_buffer.Append(" serialize\n ");
|
||||
}
|
||||
const uint32_t* op_ptr = ucode_data_.data() + instr_offset * 3;
|
||||
if (sequence & 0b01) {
|
||||
auto fetch_opcode = FetchOpcode(ucode_data_[instr_offset * 3] & 0x1F);
|
||||
if (fetch_opcode == FetchOpcode::kVertexFetch) {
|
||||
auto& op = *reinterpret_cast<const VertexFetchInstruction*>(
|
||||
ucode_data_.data() + instr_offset * 3);
|
||||
GatherVertexFetchInformation(op, previous_vfetch_full,
|
||||
auto& op = *reinterpret_cast<const FetchInstruction*>(op_ptr);
|
||||
if (op.opcode() == FetchOpcode::kVertexFetch) {
|
||||
GatherVertexFetchInformation(op.vertex_fetch(), previous_vfetch_full,
|
||||
ucode_disasm_buffer);
|
||||
} else {
|
||||
auto& op = *reinterpret_cast<const TextureFetchInstruction*>(
|
||||
ucode_data_.data() + instr_offset * 3);
|
||||
GatherTextureFetchInformation(op, unique_texture_bindings,
|
||||
ucode_disasm_buffer);
|
||||
GatherTextureFetchInformation(
|
||||
op.texture_fetch(), unique_texture_bindings, ucode_disasm_buffer);
|
||||
}
|
||||
} else {
|
||||
auto& op = *reinterpret_cast<const AluInstruction*>(ucode_data_.data() +
|
||||
instr_offset * 3);
|
||||
auto& op = *reinterpret_cast<const AluInstruction*>(op_ptr);
|
||||
GatherAluInstructionInformation(op, memexport_alloc_current_count,
|
||||
memexport_eA_written,
|
||||
ucode_disasm_buffer);
|
||||
|
@ -420,7 +416,7 @@ void Shader::GatherOperandInformation(const InstructionOperand& operand) {
|
|||
switch (operand.storage_source) {
|
||||
case InstructionStorageSource::kRegister:
|
||||
if (operand.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
InstructionStorageAddressingMode::kAbsolute) {
|
||||
register_static_address_bound_ =
|
||||
std::max(register_static_address_bound_,
|
||||
operand.storage_index + uint32_t(1));
|
||||
|
@ -430,7 +426,7 @@ void Shader::GatherOperandInformation(const InstructionOperand& operand) {
|
|||
break;
|
||||
case InstructionStorageSource::kConstantFloat:
|
||||
if (operand.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
InstructionStorageAddressingMode::kAbsolute) {
|
||||
// Store used float constants before translating so the
|
||||
// translator can use tightly packed indices if not dynamically
|
||||
// indexed.
|
||||
|
@ -457,7 +453,7 @@ void Shader::GatherFetchResultInformation(const InstructionResult& result) {
|
|||
// operand.
|
||||
assert_true(result.storage_target == InstructionStorageTarget::kRegister);
|
||||
if (result.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
InstructionStorageAddressingMode::kAbsolute) {
|
||||
register_static_address_bound_ = std::max(
|
||||
register_static_address_bound_, result.storage_index + uint32_t(1));
|
||||
} else {
|
||||
|
@ -473,7 +469,7 @@ void Shader::GatherAluResultInformation(
|
|||
switch (result.storage_target) {
|
||||
case InstructionStorageTarget::kRegister:
|
||||
if (result.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
InstructionStorageAddressingMode::kAbsolute) {
|
||||
register_static_address_bound_ = std::max(
|
||||
register_static_address_bound_, result.storage_index + uint32_t(1));
|
||||
} else {
|
||||
|
@ -789,28 +785,24 @@ void ShaderTranslator::TranslateExecInstructions(
|
|||
for (uint32_t instr_offset = instr.instruction_address;
|
||||
instr_offset < instr.instruction_address + instr.instruction_count;
|
||||
++instr_offset, sequence >>= 2) {
|
||||
const uint32_t* op_ptr = ucode_dwords + instr_offset * 3;
|
||||
if (sequence & 0b01) {
|
||||
auto fetch_opcode =
|
||||
static_cast<FetchOpcode>(ucode_dwords[instr_offset * 3] & 0x1F);
|
||||
if (fetch_opcode == FetchOpcode::kVertexFetch) {
|
||||
auto& op = *reinterpret_cast<const VertexFetchInstruction*>(
|
||||
ucode_dwords + instr_offset * 3);
|
||||
auto& op = *reinterpret_cast<const FetchInstruction*>(op_ptr);
|
||||
if (op.opcode() == FetchOpcode::kVertexFetch) {
|
||||
const VertexFetchInstruction& vfetch_op = op.vertex_fetch();
|
||||
ParsedVertexFetchInstruction vfetch_instr;
|
||||
if (ParseVertexFetchInstruction(op, previous_vfetch_full_,
|
||||
if (ParseVertexFetchInstruction(vfetch_op, previous_vfetch_full_,
|
||||
vfetch_instr)) {
|
||||
previous_vfetch_full_ = op;
|
||||
previous_vfetch_full_ = vfetch_op;
|
||||
}
|
||||
ProcessVertexFetchInstruction(vfetch_instr);
|
||||
} else {
|
||||
auto& op = *reinterpret_cast<const TextureFetchInstruction*>(
|
||||
ucode_dwords + instr_offset * 3);
|
||||
ParsedTextureFetchInstruction tfetch_instr;
|
||||
ParseTextureFetchInstruction(op, tfetch_instr);
|
||||
ParseTextureFetchInstruction(op.texture_fetch(), tfetch_instr);
|
||||
ProcessTextureFetchInstruction(tfetch_instr);
|
||||
}
|
||||
} else {
|
||||
auto& op = *reinterpret_cast<const AluInstruction*>(ucode_dwords +
|
||||
instr_offset * 3);
|
||||
auto& op = *reinterpret_cast<const AluInstruction*>(op_ptr);
|
||||
ParsedAluInstruction alu_instr;
|
||||
ParseAluInstruction(op, current_shader().type(), alu_instr);
|
||||
ProcessAluInstruction(alu_instr);
|
||||
|
@ -826,25 +818,40 @@ static void ParseFetchInstructionResult(uint32_t dest, uint32_t swizzle,
|
|||
result.storage_index = dest;
|
||||
result.is_clamped = false;
|
||||
result.storage_addressing_mode =
|
||||
is_relative ? InstructionStorageAddressingMode::kAddressRelative
|
||||
: InstructionStorageAddressingMode::kStatic;
|
||||
is_relative ? InstructionStorageAddressingMode::kLoopRelative
|
||||
: InstructionStorageAddressingMode::kAbsolute;
|
||||
result.original_write_mask = 0b1111;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
switch (swizzle & 0x7) {
|
||||
case 4:
|
||||
case 6:
|
||||
result.components[i] = SwizzleSource::k0;
|
||||
SwizzleSource component_source = SwizzleSource::k0;
|
||||
ucode::FetchDestinationSwizzle component_swizzle =
|
||||
ucode::GetFetchDestinationComponentSwizzle(swizzle, i);
|
||||
switch (component_swizzle) {
|
||||
case ucode::FetchDestinationSwizzle::kX:
|
||||
component_source = SwizzleSource::kX;
|
||||
break;
|
||||
case 5:
|
||||
result.components[i] = SwizzleSource::k1;
|
||||
case ucode::FetchDestinationSwizzle::kY:
|
||||
component_source = SwizzleSource::kY;
|
||||
break;
|
||||
case 7:
|
||||
result.original_write_mask &= ~uint32_t(1 << i);
|
||||
case ucode::FetchDestinationSwizzle::kZ:
|
||||
component_source = SwizzleSource::kZ;
|
||||
break;
|
||||
case ucode::FetchDestinationSwizzle::kW:
|
||||
component_source = SwizzleSource::kW;
|
||||
break;
|
||||
case ucode::FetchDestinationSwizzle::k1:
|
||||
component_source = SwizzleSource::k1;
|
||||
break;
|
||||
case ucode::FetchDestinationSwizzle::kKeep:
|
||||
result.original_write_mask &= ~(UINT32_C(1) << i);
|
||||
break;
|
||||
default:
|
||||
result.components[i] = GetSwizzleFromComponentIndex(swizzle & 0x3);
|
||||
// ucode::FetchDestinationSwizzle::k0 or the invalid swizzle 6.
|
||||
// TODO(Triang3l): Find the correct handling of the invalid swizzle 6.
|
||||
assert_true(component_swizzle == ucode::FetchDestinationSwizzle::k0);
|
||||
component_source = SwizzleSource::k0;
|
||||
break;
|
||||
}
|
||||
swizzle >>= 3;
|
||||
result.components[i] = component_source;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -867,8 +874,8 @@ bool ParseVertexFetchInstruction(const VertexFetchInstruction& op,
|
|||
src_op.storage_index = full_op.src();
|
||||
src_op.storage_addressing_mode =
|
||||
full_op.is_src_relative()
|
||||
? InstructionStorageAddressingMode::kAddressRelative
|
||||
: InstructionStorageAddressingMode::kStatic;
|
||||
? InstructionStorageAddressingMode::kLoopRelative
|
||||
: InstructionStorageAddressingMode::kAbsolute;
|
||||
src_op.is_negated = false;
|
||||
src_op.is_absolute_value = false;
|
||||
src_op.component_count = 1;
|
||||
|
@ -962,8 +969,8 @@ void ParseTextureFetchInstruction(const TextureFetchInstruction& op,
|
|||
src_op.storage_source = InstructionStorageSource::kRegister;
|
||||
src_op.storage_index = op.src();
|
||||
src_op.storage_addressing_mode =
|
||||
op.is_src_relative() ? InstructionStorageAddressingMode::kAddressRelative
|
||||
: InstructionStorageAddressingMode::kStatic;
|
||||
op.is_src_relative() ? InstructionStorageAddressingMode::kLoopRelative
|
||||
: InstructionStorageAddressingMode::kAbsolute;
|
||||
src_op.is_negated = false;
|
||||
src_op.is_absolute_value = false;
|
||||
src_op.component_count =
|
||||
|
@ -1144,91 +1151,51 @@ static const AluOpcodeInfo alu_scalar_opcode_infos[0x40] = {
|
|||
static void ParseAluInstructionOperand(const AluInstruction& op, uint32_t i,
|
||||
uint32_t swizzle_component_count,
|
||||
InstructionOperand& out_op) {
|
||||
int const_slot = 0;
|
||||
switch (i) {
|
||||
case 2:
|
||||
const_slot = op.src_is_temp(1) ? 0 : 1;
|
||||
break;
|
||||
case 3:
|
||||
const_slot = op.src_is_temp(1) && op.src_is_temp(2) ? 0 : 1;
|
||||
break;
|
||||
}
|
||||
out_op.is_negated = op.src_negate(i);
|
||||
uint32_t reg = op.src_reg(i);
|
||||
if (op.src_is_temp(i)) {
|
||||
out_op.storage_source = InstructionStorageSource::kRegister;
|
||||
out_op.storage_index = reg & 0x1F;
|
||||
out_op.is_absolute_value = (reg & 0x80) == 0x80;
|
||||
out_op.storage_index = AluInstruction::src_temp_reg(reg);
|
||||
out_op.is_absolute_value = AluInstruction::is_src_temp_value_absolute(reg);
|
||||
out_op.storage_addressing_mode =
|
||||
(reg & 0x40) ? InstructionStorageAddressingMode::kAddressRelative
|
||||
: InstructionStorageAddressingMode::kStatic;
|
||||
AluInstruction::is_src_temp_relative(reg)
|
||||
? InstructionStorageAddressingMode::kLoopRelative
|
||||
: InstructionStorageAddressingMode::kAbsolute;
|
||||
} else {
|
||||
out_op.storage_source = InstructionStorageSource::kConstantFloat;
|
||||
out_op.storage_index = reg;
|
||||
if ((const_slot == 0 && op.is_const_0_addressed()) ||
|
||||
(const_slot == 1 && op.is_const_1_addressed())) {
|
||||
if (op.is_address_relative()) {
|
||||
if (op.src_const_is_addressed(i)) {
|
||||
if (op.is_const_address_register_relative()) {
|
||||
out_op.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kAddressAbsolute;
|
||||
InstructionStorageAddressingMode::kAddressRegisterRelative;
|
||||
} else {
|
||||
out_op.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kAddressRelative;
|
||||
InstructionStorageAddressingMode::kLoopRelative;
|
||||
}
|
||||
} else {
|
||||
out_op.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kStatic;
|
||||
InstructionStorageAddressingMode::kAbsolute;
|
||||
}
|
||||
out_op.is_absolute_value = op.abs_constants();
|
||||
}
|
||||
out_op.component_count = swizzle_component_count;
|
||||
uint32_t swizzle = op.src_swizzle(i);
|
||||
if (swizzle_component_count == 1) {
|
||||
uint32_t a = ((swizzle >> 6) + 3) & 0x3;
|
||||
out_op.components[0] = GetSwizzleFromComponentIndex(a);
|
||||
// Scalar `a` (W).
|
||||
out_op.components[0] = GetSwizzledAluSourceComponent(swizzle, 3);
|
||||
} else if (swizzle_component_count == 2) {
|
||||
uint32_t a = ((swizzle >> 6) + 3) & 0x3;
|
||||
uint32_t b = ((swizzle >> 0) + 0) & 0x3;
|
||||
out_op.components[0] = GetSwizzleFromComponentIndex(a);
|
||||
out_op.components[1] = GetSwizzleFromComponentIndex(b);
|
||||
// Scalar left-hand `a` (W) and right-hand `b` (X).
|
||||
out_op.components[0] = GetSwizzledAluSourceComponent(swizzle, 3);
|
||||
out_op.components[1] = GetSwizzledAluSourceComponent(swizzle, 0);
|
||||
} else if (swizzle_component_count == 3) {
|
||||
assert_always();
|
||||
} else if (swizzle_component_count == 4) {
|
||||
for (uint32_t j = 0; j < swizzle_component_count; ++j, swizzle >>= 2) {
|
||||
out_op.components[j] = GetSwizzleFromComponentIndex((swizzle + j) & 0x3);
|
||||
for (uint32_t j = 0; j < swizzle_component_count; ++j) {
|
||||
out_op.components[j] = GetSwizzledAluSourceComponent(swizzle, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ParseAluInstructionOperandSpecial(
|
||||
const AluInstruction& op, InstructionStorageSource storage_source,
|
||||
uint32_t reg, bool negate, int const_slot, uint32_t component_index,
|
||||
InstructionOperand& out_op) {
|
||||
out_op.is_negated = negate;
|
||||
out_op.is_absolute_value = op.abs_constants();
|
||||
out_op.storage_source = storage_source;
|
||||
if (storage_source == InstructionStorageSource::kRegister) {
|
||||
out_op.storage_index = reg & 0x7F;
|
||||
out_op.storage_addressing_mode = InstructionStorageAddressingMode::kStatic;
|
||||
} else {
|
||||
out_op.storage_index = reg;
|
||||
if ((const_slot == 0 && op.is_const_0_addressed()) ||
|
||||
(const_slot == 1 && op.is_const_1_addressed())) {
|
||||
if (op.is_address_relative()) {
|
||||
out_op.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kAddressAbsolute;
|
||||
} else {
|
||||
out_op.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kAddressRelative;
|
||||
}
|
||||
} else {
|
||||
out_op.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kStatic;
|
||||
}
|
||||
}
|
||||
out_op.component_count = 1;
|
||||
out_op.components[0] = GetSwizzleFromComponentIndex(component_index);
|
||||
}
|
||||
|
||||
bool ParsedAluInstruction::IsVectorOpDefaultNop() const {
|
||||
if (vector_opcode != ucode::AluVectorOpcode::kMax ||
|
||||
vector_and_constant_result.original_write_mask ||
|
||||
|
@ -1237,14 +1204,14 @@ bool ParsedAluInstruction::IsVectorOpDefaultNop() const {
|
|||
InstructionStorageSource::kRegister ||
|
||||
vector_operands[0].storage_index != 0 ||
|
||||
vector_operands[0].storage_addressing_mode !=
|
||||
InstructionStorageAddressingMode::kStatic ||
|
||||
InstructionStorageAddressingMode::kAbsolute ||
|
||||
vector_operands[0].is_negated || vector_operands[0].is_absolute_value ||
|
||||
!vector_operands[0].IsStandardSwizzle() ||
|
||||
vector_operands[1].storage_source !=
|
||||
InstructionStorageSource::kRegister ||
|
||||
vector_operands[1].storage_index != 0 ||
|
||||
vector_operands[1].storage_addressing_mode !=
|
||||
InstructionStorageAddressingMode::kStatic ||
|
||||
InstructionStorageAddressingMode::kAbsolute ||
|
||||
vector_operands[1].is_negated || vector_operands[1].is_absolute_value ||
|
||||
!vector_operands[1].IsStandardSwizzle()) {
|
||||
return false;
|
||||
|
@ -1253,7 +1220,7 @@ bool ParsedAluInstruction::IsVectorOpDefaultNop() const {
|
|||
InstructionStorageTarget::kRegister) {
|
||||
if (vector_and_constant_result.storage_index != 0 ||
|
||||
vector_and_constant_result.storage_addressing_mode !=
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
InstructionStorageAddressingMode::kAbsolute) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
|
@ -1330,14 +1297,14 @@ void ParseAluInstruction(const AluInstruction& op,
|
|||
|
||||
instr.vector_and_constant_result.storage_target = storage_target;
|
||||
instr.vector_and_constant_result.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kStatic;
|
||||
InstructionStorageAddressingMode::kAbsolute;
|
||||
if (is_export) {
|
||||
instr.vector_and_constant_result.storage_index = storage_index_export;
|
||||
} else {
|
||||
instr.vector_and_constant_result.storage_index = op.vector_dest();
|
||||
if (op.is_vector_dest_relative()) {
|
||||
instr.vector_and_constant_result.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kAddressRelative;
|
||||
InstructionStorageAddressingMode::kLoopRelative;
|
||||
}
|
||||
}
|
||||
instr.vector_and_constant_result.is_clamped = op.vector_clamp();
|
||||
|
@ -1372,14 +1339,14 @@ void ParseAluInstruction(const AluInstruction& op,
|
|||
|
||||
instr.scalar_result.storage_target = storage_target;
|
||||
instr.scalar_result.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kStatic;
|
||||
InstructionStorageAddressingMode::kAbsolute;
|
||||
if (is_export) {
|
||||
instr.scalar_result.storage_index = storage_index_export;
|
||||
} else {
|
||||
instr.scalar_result.storage_index = op.scalar_dest();
|
||||
if (op.is_scalar_dest_relative()) {
|
||||
instr.scalar_result.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kAddressRelative;
|
||||
InstructionStorageAddressingMode::kLoopRelative;
|
||||
}
|
||||
}
|
||||
instr.scalar_result.is_clamped = op.scalar_clamp();
|
||||
|
@ -1395,20 +1362,42 @@ void ParseAluInstruction(const AluInstruction& op,
|
|||
scalar_opcode_info.src_swizzle_component_count,
|
||||
instr.scalar_operands[0]);
|
||||
} else {
|
||||
// Constant and temporary register.
|
||||
|
||||
bool src3_negate = op.src_negate(3);
|
||||
uint32_t src3_swizzle = op.src_swizzle(3);
|
||||
uint32_t component_a = ((src3_swizzle >> 6) + 3) & 0x3;
|
||||
uint32_t component_b = ((src3_swizzle >> 0) + 0) & 0x3;
|
||||
uint32_t reg2 = (src3_swizzle & 0x3C) | (op.src_is_temp(3) << 1) |
|
||||
(static_cast<int>(op.scalar_opcode()) & 1);
|
||||
int const_slot = (op.src_is_temp(1) || op.src_is_temp(2)) ? 1 : 0;
|
||||
|
||||
ParseAluInstructionOperandSpecial(
|
||||
op, InstructionStorageSource::kConstantFloat, op.src_reg(3),
|
||||
op.src_negate(3), 0, component_a, instr.scalar_operands[0]);
|
||||
// Left-hand constant operand (`a` - W swizzle).
|
||||
InstructionOperand& const_op = instr.scalar_operands[0];
|
||||
const_op.is_negated = src3_negate;
|
||||
const_op.is_absolute_value = op.abs_constants();
|
||||
const_op.storage_source = InstructionStorageSource::kConstantFloat;
|
||||
const_op.storage_index = op.src_reg(3);
|
||||
if (op.src_const_is_addressed(3)) {
|
||||
if (op.is_const_address_register_relative()) {
|
||||
const_op.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kAddressRegisterRelative;
|
||||
} else {
|
||||
const_op.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kLoopRelative;
|
||||
}
|
||||
} else {
|
||||
const_op.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kAbsolute;
|
||||
}
|
||||
const_op.component_count = 1;
|
||||
const_op.components[0] = GetSwizzledAluSourceComponent(src3_swizzle, 3);
|
||||
|
||||
ParseAluInstructionOperandSpecial(op, InstructionStorageSource::kRegister,
|
||||
reg2, op.src_negate(3), const_slot,
|
||||
component_b, instr.scalar_operands[1]);
|
||||
// Right-hand temporary register operand (`b` - X swizzle).
|
||||
InstructionOperand& temp_op = instr.scalar_operands[1];
|
||||
temp_op.is_negated = src3_negate;
|
||||
temp_op.is_absolute_value = op.abs_constants();
|
||||
temp_op.storage_source = InstructionStorageSource::kRegister;
|
||||
temp_op.storage_index = op.scalar_const_op_src_temp_reg();
|
||||
temp_op.storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kAbsolute;
|
||||
temp_op.component_count = 1;
|
||||
temp_op.components[0] = GetSwizzledAluSourceComponent(src3_swizzle, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1421,7 +1410,7 @@ bool ParsedAluInstruction::IsScalarOpDefaultNop() const {
|
|||
if (scalar_result.storage_target == InstructionStorageTarget::kRegister) {
|
||||
if (scalar_result.storage_index != 0 ||
|
||||
scalar_result.storage_addressing_mode !=
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
InstructionStorageAddressingMode::kAbsolute) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1446,7 +1435,7 @@ uint32_t ParsedAluInstruction::GetMemExportStreamConstant() const {
|
|||
vector_operands[2].storage_source ==
|
||||
InstructionStorageSource::kConstantFloat &&
|
||||
vector_operands[2].storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic &&
|
||||
InstructionStorageAddressingMode::kAbsolute &&
|
||||
vector_operands[2].IsStandardSwizzle() &&
|
||||
!vector_operands[2].is_negated && !vector_operands[2].is_absolute_value) {
|
||||
return vector_operands[2].storage_index;
|
||||
|
|
|
@ -57,13 +57,13 @@ void DisassembleResultOperand(const InstructionResult& result,
|
|||
}
|
||||
if (uses_storage_index) {
|
||||
switch (result.storage_addressing_mode) {
|
||||
case InstructionStorageAddressingMode::kStatic:
|
||||
case InstructionStorageAddressingMode::kAbsolute:
|
||||
out->AppendFormat("{}", result.storage_index);
|
||||
break;
|
||||
case InstructionStorageAddressingMode::kAddressAbsolute:
|
||||
case InstructionStorageAddressingMode::kAddressRegisterRelative:
|
||||
out->AppendFormat("[{}+a0]", result.storage_index);
|
||||
break;
|
||||
case InstructionStorageAddressingMode::kAddressRelative:
|
||||
case InstructionStorageAddressingMode::kLoopRelative:
|
||||
out->AppendFormat("[{}+aL]", result.storage_index);
|
||||
break;
|
||||
}
|
||||
|
@ -109,17 +109,17 @@ void DisassembleSourceOperand(const InstructionOperand& op, StringBuffer* out) {
|
|||
out->Append("_abs");
|
||||
}
|
||||
switch (op.storage_addressing_mode) {
|
||||
case InstructionStorageAddressingMode::kStatic:
|
||||
case InstructionStorageAddressingMode::kAbsolute:
|
||||
if (op.is_absolute_value) {
|
||||
out->AppendFormat("[{}]", op.storage_index);
|
||||
} else {
|
||||
out->AppendFormat("{}", op.storage_index);
|
||||
}
|
||||
break;
|
||||
case InstructionStorageAddressingMode::kAddressAbsolute:
|
||||
case InstructionStorageAddressingMode::kAddressRegisterRelative:
|
||||
out->AppendFormat("[{}+a0]", op.storage_index);
|
||||
break;
|
||||
case InstructionStorageAddressingMode::kAddressRelative:
|
||||
case InstructionStorageAddressingMode::kLoopRelative:
|
||||
out->AppendFormat("[{}+aL]", op.storage_index);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -3110,16 +3110,16 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) {
|
|||
}
|
||||
|
||||
switch (op.storage_addressing_mode) {
|
||||
case InstructionStorageAddressingMode::kStatic: {
|
||||
case InstructionStorageAddressingMode::kAbsolute: {
|
||||
storage_index = b.makeUintConstant(storage_base + op.storage_index);
|
||||
} break;
|
||||
case InstructionStorageAddressingMode::kAddressAbsolute: {
|
||||
case InstructionStorageAddressingMode::kAddressRegisterRelative: {
|
||||
// storage_index + a0
|
||||
storage_index =
|
||||
b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_),
|
||||
b.makeUintConstant(storage_base + op.storage_index));
|
||||
} break;
|
||||
case InstructionStorageAddressingMode::kAddressRelative: {
|
||||
case InstructionStorageAddressingMode::kLoopRelative: {
|
||||
// storage_index + aL.x
|
||||
auto idx = b.createCompositeExtract(b.createLoad(aL_), uint_type_, 0);
|
||||
storage_index =
|
||||
|
@ -3269,16 +3269,16 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
|
|||
std::vector<Id> storage_offsets; // Offsets in nested arrays -> storage
|
||||
|
||||
switch (result.storage_addressing_mode) {
|
||||
case InstructionStorageAddressingMode::kStatic: {
|
||||
case InstructionStorageAddressingMode::kAbsolute: {
|
||||
storage_index = b.makeUintConstant(result.storage_index);
|
||||
} break;
|
||||
case InstructionStorageAddressingMode::kAddressAbsolute: {
|
||||
case InstructionStorageAddressingMode::kAddressRegisterRelative: {
|
||||
// storage_index + a0
|
||||
storage_index =
|
||||
b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_),
|
||||
b.makeUintConstant(result.storage_index));
|
||||
} break;
|
||||
case InstructionStorageAddressingMode::kAddressRelative: {
|
||||
case InstructionStorageAddressingMode::kLoopRelative: {
|
||||
// storage_index + aL.x
|
||||
auto idx = b.createCompositeExtract(b.createLoad(aL_), uint_type_, 0);
|
||||
storage_index = b.createBinOp(spv::Op::OpIAdd, uint_type_, idx,
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -16,11 +16,45 @@
|
|||
#include "xenia/base/platform.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
// Closest AMD doc:
|
||||
// The XNA Game Studio 3.1 contains Graphics.ShaderCompiler.AssembleFromSource,
|
||||
// which, for TargetPlatform.Xbox360, can validate and assemble Xbox 360 shader
|
||||
// microcode from Xbox 360 and Direct3D 9 shader assembly, returning the binary,
|
||||
// as well as validation warnings and errors and the disassembly via the warning
|
||||
// output. It is the primary source of information about the binary encoding of
|
||||
// the instructions, as well as valid usage of instruction parameters and
|
||||
// sequences.
|
||||
// https://www.microsoft.com/en-us/download/details.aspx?id=39
|
||||
// (XNAGS31_setup.exe)
|
||||
// Xenia provides a tool, tools/shader-playground, that invokes the assembler,
|
||||
// displays the binary and the disassembly from the official assembler, and also
|
||||
// shows the disassembly generated by Xenia, and passes it back to the assembler
|
||||
// to validate Xenia's microcode parsing and disassembly by checking if
|
||||
// reassembling the disassembly results in the same binary.
|
||||
//
|
||||
// The behavior and the parameters of some of the instructions were previously
|
||||
// documented on MSDN in the XNA Game Studio programming guide:
|
||||
// http://web.archive.org/web/20081211005537/http://msdn.microsoft.com/en-us/library/bb313877.aspx
|
||||
//
|
||||
// A great amount of documentation, such as the R400 sequencer specification and
|
||||
// the official emulator code, was made available during the LG Electronics,
|
||||
// Inc. v. ATI Technologies ULC "Multi-thread Graphics Processing System" patent
|
||||
// dispute IPR2015-00325, with the motion to seal having been denied due to "a
|
||||
// strong public policy interest in making all information filed in an inter
|
||||
// partes review publicly available". Most of the documents attached, however,
|
||||
// cover early versions - the development process - of the R400 architecture, so
|
||||
// there are some differences from the final Xenos GPU (DOT2ADDv is defined
|
||||
// differently, for example, and MUL/ADD/SUB_CONST are missing).
|
||||
// https://portal.unifiedpatents.com/ptab/case/IPR2015-00325
|
||||
//
|
||||
// Also, the R600, while having a different 5-scalar, as opposed to vec4|scalar,
|
||||
// parallelism model and instruction encodings and targeting Direct3D 10 rather
|
||||
// that 9, inherits a lot of instructions and architectural concepts from the
|
||||
// R400.
|
||||
// https://www.x.org/docs/AMD/old/r600isa.pdf
|
||||
// https://developer.amd.com/wordpress/media/2012/10/r600isa.pdf
|
||||
// https://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf
|
||||
// Microcode format differs, but most fields/enums are the same.
|
||||
|
||||
// This code comes from the freedreno project:
|
||||
// Parts of this code also come from the freedreno project:
|
||||
// https://github.com/freedreno/freedreno/blob/master/includes/instr-a2xx.h
|
||||
/*
|
||||
* Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
|
||||
|
@ -156,7 +190,8 @@ struct ControlFlowExecInstruction {
|
|||
uint32_t address() const { return address_; }
|
||||
// Number of instructions being executed.
|
||||
uint32_t count() const { return count_; }
|
||||
// Sequence bits, 2 per instruction, indicating whether ALU or fetch.
|
||||
// Sequence bits, 2 per instruction.
|
||||
// [0] - ALU (0) or fetch (1), [1] - serialize.
|
||||
uint32_t sequence() const { return serialize_; }
|
||||
// Whether to reset the current predicate.
|
||||
bool clean() const { return clean_ == 1; }
|
||||
|
@ -189,7 +224,8 @@ struct ControlFlowCondExecInstruction {
|
|||
uint32_t address() const { return address_; }
|
||||
// Number of instructions being executed.
|
||||
uint32_t count() const { return count_; }
|
||||
// Sequence bits, 2 per instruction, indicating whether ALU or fetch.
|
||||
// Sequence bits, 2 per instruction.
|
||||
// [0] - ALU (0) or fetch (1), [1] - serialize.
|
||||
uint32_t sequence() const { return serialize_; }
|
||||
// Constant index used as the conditional.
|
||||
uint32_t bool_address() const { return bool_address_; }
|
||||
|
@ -224,7 +260,8 @@ struct ControlFlowCondExecPredInstruction {
|
|||
uint32_t address() const { return address_; }
|
||||
// Number of instructions being executed.
|
||||
uint32_t count() const { return count_; }
|
||||
// Sequence bits, 2 per instruction, indicating whether ALU or fetch.
|
||||
// Sequence bits, 2 per instruction.
|
||||
// [0] - ALU (0) or fetch (1), [1] - serialize.
|
||||
uint32_t sequence() const { return serialize_; }
|
||||
// Whether to reset the current predicate.
|
||||
bool clean() const { return clean_ == 1; }
|
||||
|
@ -591,6 +628,24 @@ enum class FetchOpcode : uint32_t {
|
|||
kSetTextureGradientsVert = 26,
|
||||
};
|
||||
|
||||
enum class FetchDestinationSwizzle {
|
||||
// The component indices are absolute (not relative to the component itself,
|
||||
// unlike in ALU operation sources).
|
||||
kX = 0,
|
||||
kY = 1,
|
||||
kZ = 2,
|
||||
kW = 3,
|
||||
k0 = 4,
|
||||
k1 = 5,
|
||||
// Keep the current value of the destination register (don't write).
|
||||
kKeep = 7,
|
||||
};
|
||||
|
||||
constexpr FetchDestinationSwizzle GetFetchDestinationComponentSwizzle(
|
||||
uint32_t swizzle, uint32_t component) {
|
||||
return FetchDestinationSwizzle((swizzle >> (3 * component)) & 0b111);
|
||||
}
|
||||
|
||||
struct alignas(uint32_t) VertexFetchInstruction {
|
||||
FetchOpcode opcode() const { return data_.opcode_value; }
|
||||
|
||||
|
@ -614,29 +669,6 @@ struct alignas(uint32_t) VertexFetchInstruction {
|
|||
uint32_t src_swizzle() const { return data_.src_swiz; }
|
||||
bool is_src_relative() const { return data_.src_reg_am; }
|
||||
|
||||
// Returns true if the fetch actually fetches data.
|
||||
// This may be false if it's used only to populate constants.
|
||||
bool fetches_any_data() const {
|
||||
uint32_t dst_swiz = data_.dst_swiz;
|
||||
bool fetches_any_data = false;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if ((dst_swiz & 0x7) == 4) {
|
||||
// 0.0
|
||||
} else if ((dst_swiz & 0x7) == 5) {
|
||||
// 1.0
|
||||
} else if ((dst_swiz & 0x7) == 6) {
|
||||
// ?
|
||||
} else if ((dst_swiz & 0x7) == 7) {
|
||||
// Previous register value.
|
||||
} else {
|
||||
fetches_any_data = true;
|
||||
break;
|
||||
}
|
||||
dst_swiz >>= 3;
|
||||
}
|
||||
return fetches_any_data;
|
||||
}
|
||||
|
||||
uint32_t prefetch_count() const { return data_.prefetch_count; }
|
||||
bool is_mini_fetch() const { return data_.is_mini_fetch == 1; }
|
||||
|
||||
|
@ -676,6 +708,7 @@ struct alignas(uint32_t) VertexFetchInstruction {
|
|||
uint32_t const_index_sel : 2;
|
||||
// Prefetch count minus 1.
|
||||
uint32_t prefetch_count : 3;
|
||||
// Absolute, one component.
|
||||
uint32_t src_swiz : 2;
|
||||
};
|
||||
struct {
|
||||
|
@ -769,10 +802,11 @@ struct alignas(uint32_t) TextureFetchInstruction {
|
|||
uint32_t fetch_valid_only : 1;
|
||||
uint32_t const_index : 5;
|
||||
uint32_t tx_coord_denorm : 1;
|
||||
uint32_t src_swiz : 6; // xyz
|
||||
// Absolute, three components.
|
||||
uint32_t src_swiz : 6;
|
||||
};
|
||||
struct {
|
||||
uint32_t dst_swiz : 12; // xyzw
|
||||
uint32_t dst_swiz : 12;
|
||||
xenos::TextureFilter mag_filter : 2;
|
||||
xenos::TextureFilter min_filter : 2;
|
||||
xenos::TextureFilter mip_filter : 2;
|
||||
|
@ -801,21 +835,96 @@ struct alignas(uint32_t) TextureFetchInstruction {
|
|||
};
|
||||
static_assert_size(TextureFetchInstruction, sizeof(uint32_t) * 3);
|
||||
|
||||
union alignas(uint32_t) FetchInstruction {
|
||||
public:
|
||||
FetchOpcode opcode() const { return data_.opcode_value; }
|
||||
|
||||
// Whether the jump is predicated (or conditional).
|
||||
bool is_predicated() const { return data_.is_predicated; }
|
||||
// Required condition value of the comparision (true or false).
|
||||
bool predicate_condition() const { return data_.pred_condition == 1; }
|
||||
|
||||
uint32_t dest() const { return data_.dst_reg; }
|
||||
uint32_t dest_swizzle() const { return data_.dst_swiz; }
|
||||
bool is_dest_relative() const { return data_.dst_reg_am; }
|
||||
uint32_t src() const { return data_.src_reg; }
|
||||
bool is_src_relative() const { return data_.src_reg_am; }
|
||||
|
||||
// For FetchOpcode::kVertexFetch.
|
||||
const VertexFetchInstruction& vertex_fetch() const { return vertex_fetch_; }
|
||||
// For operations other than FetchOpcode::kVertexFetch.
|
||||
const TextureFetchInstruction& texture_fetch() const {
|
||||
return texture_fetch_;
|
||||
}
|
||||
|
||||
private:
|
||||
struct Data {
|
||||
struct {
|
||||
FetchOpcode opcode_value : 5;
|
||||
uint32_t src_reg : 6;
|
||||
uint32_t src_reg_am : 1;
|
||||
uint32_t dst_reg : 6;
|
||||
uint32_t dst_reg_am : 1;
|
||||
// Specific to vertex or texture fetch.
|
||||
uint32_t : 1;
|
||||
// [0-31], points to one tf# or three vf# constants.
|
||||
uint32_t const_index : 5;
|
||||
// Specific to vertex or texture fetch.
|
||||
uint32_t : 7;
|
||||
};
|
||||
struct {
|
||||
uint32_t dst_swiz : 12;
|
||||
// Specific to vertex or texture fetch.
|
||||
uint32_t : 19;
|
||||
uint32_t is_predicated : 1;
|
||||
};
|
||||
struct {
|
||||
// Specific to vertex or texture fetch.
|
||||
uint32_t : 31;
|
||||
uint32_t pred_condition : 1;
|
||||
};
|
||||
};
|
||||
Data data_;
|
||||
VertexFetchInstruction vertex_fetch_;
|
||||
TextureFetchInstruction texture_fetch_;
|
||||
};
|
||||
static_assert_size(FetchInstruction, sizeof(uint32_t) * 3);
|
||||
|
||||
// What follows is largely a mash up of the microcode assembly naming and the
|
||||
// R600 docs that have a near 1:1 with the instructions available in the xenos
|
||||
// R600 docs that have a near 1:1 with the instructions available in the Xenos
|
||||
// GPU, and Adreno 2xx instruction names found in Freedreno. Some of the
|
||||
// behavior has been experimentally verified. Some has been guessed.
|
||||
// Docs: https://www.x.org/docs/AMD/old/r600isa.pdf
|
||||
// behavior has been experimentally verified. Some has been guessed. Some
|
||||
// instructions are implemented in the Exhibit 2092 - sq_alu of IPR2015-00325,
|
||||
// however, the code provided there is early and incomplete.
|
||||
//
|
||||
// Conventions:
|
||||
// - All temporary registers are vec4s.
|
||||
// - Scalar ops swizzle out a single component of their source registers denoted
|
||||
// by 'a' or 'b'. src0.a means 'the first component specified for src0' and
|
||||
// src0.ab means 'two components specified for src0, in order'.
|
||||
// - Scalar ops write the result to the entire destination register.
|
||||
// - pv and ps are the previous results of a vector or scalar ALU operation.
|
||||
// Both are valid only within the current ALU clause. They are not modified
|
||||
// when the instruction that would write them fails its predication check.
|
||||
// - Most scalar ALU operations work with one or two components of the source
|
||||
// register passed as the third operand of the whole co-issued ALU operation,
|
||||
// denoted by `a` (the left-hand operand) and `b` (the right-hand operand).
|
||||
// `a` is the [(3 + src3_swizzle[6:7]) & 3] component (W - alpha).
|
||||
// `b` is the [(0 + src3_swizzle[0:1]) & 3] component (X - red).
|
||||
// - mulsc, addsc, subsc scalar ALU operations accept two operands - a float
|
||||
// constant with the `a` (W) swizzle (addressed by the third operand index and
|
||||
// addressing mode) being the left-hand operand, and a temporary register with
|
||||
// the `b` (X) swizzle with the index constructed from:
|
||||
// - [0:0] = scalar_opcode[0:0]
|
||||
// - [1:1] = src3_sel[0:0]
|
||||
// - [2:5] = src3_swizzle[2:5]
|
||||
// abs_constants and third source's negation are applied to both the constant
|
||||
// and the temporary register.
|
||||
// - Some scalar ALU instructions don't have operands.
|
||||
// - Scalar ALU operations replicate the result into all masked components.
|
||||
// - Overall, the WXYZ order is pretty commonly used in the Exhibit 2092 -
|
||||
// sq_alu of IPR2015-00325, this is where the AB = WX order of scalar operands
|
||||
// likely comes from. Vector predicate instructions also involve the W and X
|
||||
// components, and in IPR2015-00325 sq_alu, individual components in the
|
||||
// emulated vector instructions are handled in the WXYZ order. However, max4's
|
||||
// "greater than the rest" check order is RGBA (XYZW) there. dp4, though, sums
|
||||
// the products in WXYZ order in IPR2015-00325 sq_alu (but in XYZW order on
|
||||
// MSDN).
|
||||
// - ps is the previous result of a scalar ALU operation. It is not modified
|
||||
// when the instruction that would write it fails its predication check.
|
||||
// - Direct3D 9 rules (like in GCN v_*_legacy_f32 instructions) for
|
||||
// multiplication (+-0 or denormal * anything = +0) wherever it's present
|
||||
// (mul, mad, dp, etc.) and for NaN in min/max. It's very important to respect
|
||||
|
@ -1137,6 +1246,9 @@ enum class AluScalarOpcode : uint32_t {
|
|||
// dest.xyzw = sqrt(src0.a);
|
||||
kSqrt = 40,
|
||||
|
||||
// 0 and 1 are the same instruction - one bit of the register index is stored
|
||||
// in the opcode field.
|
||||
|
||||
// mulsc/MUL_CONST_0 dest, src0.a, src1.a
|
||||
kMulsc0 = 42,
|
||||
// mulsc/MUL_CONST_1 dest, src0.a, src1.a
|
||||
|
@ -1303,19 +1415,24 @@ enum class AluVectorOpcode : uint32_t {
|
|||
// dp4/DOT4v dest, src0, src1
|
||||
// dest.xyzw = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z +
|
||||
// src0.w * src1.w;
|
||||
// Note: only pv.x contains the value.
|
||||
kDp4 = 15,
|
||||
|
||||
// Three-Element Dot Product
|
||||
// dp3/DOT3v dest, src0, src1
|
||||
// dest.xyzw = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z;
|
||||
// Note: only pv.x contains the value.
|
||||
kDp3 = 16,
|
||||
|
||||
// Two-Element Dot Product and Add
|
||||
// dp2add/DOT2ADDv dest, src0, src1, src2
|
||||
// dest.xyzw = src0.x * src1.x + src0.y * src1.y + src2.x;
|
||||
// Note: only pv.x contains the value.
|
||||
// IPR2015-00325 sq_alu may be an outdated and unreliable reference (Sequencer
|
||||
// Parts Development folder history lists a few changes regarding the swizzle
|
||||
// in dot2add, sq_alu though implements the instruction as
|
||||
// src0.x * src1.x + src0.z * src1.z + src2.y, but MSDN specifies the correct
|
||||
// order as provided in the beginning of this comment, further proven by
|
||||
// assembling PC shader assembly using XNA, with Shader Model 2 dp2add being
|
||||
// translated directly into Xenos dp2add without additional swizzling).
|
||||
// http://web.archive.org/web/20100705150552/http://msdn.microsoft.com/en-us/library/bb313922.aspx
|
||||
kDp2Add = 17,
|
||||
|
||||
// Cube Map
|
||||
|
@ -1363,8 +1480,16 @@ enum class AluVectorOpcode : uint32_t {
|
|||
|
||||
// Four-Element Maximum
|
||||
// max4/MAX4v dest, src0
|
||||
// dest.xyzw = max(src0.x, src0.y, src0.z, src0.w);
|
||||
// Note: only pv.x contains the value.
|
||||
// According to IPR2015-00325 sq_alu:
|
||||
// if (src0.x > src0.y && src0.x > src0.z && src0.x > src0.w) {
|
||||
// dest.xyzw = src0.x;
|
||||
// } else if (src0.y > src0.z && src0.y > src0.w) {
|
||||
// dest.xyzw = src0.y;
|
||||
// } else if (src0.z > src0.w) {
|
||||
// dest.xyzw = src0.z;
|
||||
// } else {
|
||||
// dest.xyzw = src0.w;
|
||||
// }
|
||||
kMax4 = 19,
|
||||
|
||||
// Floating-Point Predicate Counter Increment If Equal
|
||||
|
@ -1672,7 +1797,9 @@ struct alignas(uint32_t) AluInstruction {
|
|||
bool abs_constants() const { return data_.abs_constants == 1; }
|
||||
bool is_const_0_addressed() const { return data_.const_0_rel_abs == 1; }
|
||||
bool is_const_1_addressed() const { return data_.const_1_rel_abs == 1; }
|
||||
bool is_address_relative() const { return data_.address_absolute == 1; }
|
||||
bool is_const_address_register_relative() const {
|
||||
return data_.const_address_register_relative == 1;
|
||||
}
|
||||
|
||||
AluVectorOpcode vector_opcode() const { return data_.vector_opc; }
|
||||
uint32_t vector_write_mask() const { return data_.vector_write_mask; }
|
||||
|
@ -1686,6 +1813,18 @@ struct alignas(uint32_t) AluInstruction {
|
|||
bool is_scalar_dest_relative() const { return data_.scalar_dest_rel == 1; }
|
||||
bool scalar_clamp() const { return data_.scalar_clamp == 1; }
|
||||
|
||||
static constexpr uint32_t src_temp_reg(uint32_t src_reg) {
|
||||
return src_reg & 0x3F;
|
||||
}
|
||||
static constexpr bool is_src_temp_relative(uint32_t src_reg) {
|
||||
return (src_reg & 0x40) != 0;
|
||||
}
|
||||
static constexpr bool is_src_temp_value_absolute(uint32_t src_reg) {
|
||||
return (src_reg & 0x80) != 0;
|
||||
}
|
||||
// Full register index for constants, packed structure for temporary
|
||||
// registers (unpack using src_temp_reg, is_src_temp_relative,
|
||||
// is_src_temp_value_absolute).
|
||||
uint32_t src_reg(size_t i) const {
|
||||
switch (i) {
|
||||
case 1:
|
||||
|
@ -1702,16 +1841,59 @@ struct alignas(uint32_t) AluInstruction {
|
|||
bool src_is_temp(size_t i) const {
|
||||
switch (i) {
|
||||
case 1:
|
||||
return data_.src1_sel == 1;
|
||||
return bool(data_.src1_sel);
|
||||
case 2:
|
||||
return data_.src2_sel == 1;
|
||||
return bool(data_.src2_sel);
|
||||
case 3:
|
||||
return data_.src3_sel == 1;
|
||||
return bool(data_.src3_sel);
|
||||
default:
|
||||
assert_unhandled_case(i);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
// Whether the specified operand is actually a constant is disregarded in this
|
||||
// function so its scope is limited to just parsing the structure's layout -
|
||||
// to decide whether to use relative addressing for the operand as a whole,
|
||||
// check externally whether the operand is actually a constant first.
|
||||
//
|
||||
// For the constant operand in mulsc, addsc, subsc, this should be called for
|
||||
// the operand index 3. Note that the XNA disassembler takes the addressing
|
||||
// mode for the constant scalar operand unconditionally from const_1_rel_abs,
|
||||
// and ignores the +aL for it unless the scalar operation is co-issued with a
|
||||
// vector operation reading from a constant. However, the XNA assembler treats
|
||||
// the constant scalar operand as a constant in the third operand, and places
|
||||
// the addressing mode for it in const_0_rel_abs if no other constants are
|
||||
// used in the whole ALU instruction. The validator also doesn't report
|
||||
// anything if +aL is used when the constant scalar operand is the only
|
||||
// constant in the instruction (and explicitly calls it the third constant in
|
||||
// the error message in case both vector operands are constants, and different
|
||||
// addressing modes are used for the second vector operand and the constant
|
||||
// scalar operand). Passing the disassembly produced by XNA back to the
|
||||
// assembler results in different microcode in this case. This indicates that
|
||||
// most likely there's a bug in the XNA disassembler, and that the addressing
|
||||
// mode for the constant scalar operand should actually be taken the same way
|
||||
// as for the third vector operand - from const_0_rel_abs if there are no
|
||||
// constant vector operands, or from const_1_rel_abs if there is at least one.
|
||||
bool src_const_is_addressed(size_t i) const {
|
||||
// "error X7100: When three constants are used in one instruction, the
|
||||
// second and third constant must either both be non-relative, or both be
|
||||
// relative."
|
||||
// Whether to use const_0_rel_abs or const_1_rel_abs is essentially
|
||||
// min(sum of whether the previous operands are constants, 1).
|
||||
switch (i) {
|
||||
case 1:
|
||||
return bool(data_.const_0_rel_abs);
|
||||
case 2:
|
||||
return bool(src_is_temp(1) ? data_.const_0_rel_abs
|
||||
: data_.const_1_rel_abs);
|
||||
case 3:
|
||||
return bool((src_is_temp(1) && src_is_temp(2)) ? data_.const_0_rel_abs
|
||||
: data_.const_1_rel_abs);
|
||||
default:
|
||||
assert_unhandled_case(i);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
uint32_t src_swizzle(size_t i) const {
|
||||
switch (i) {
|
||||
case 1:
|
||||
|
@ -1739,8 +1921,20 @@ struct alignas(uint32_t) AluInstruction {
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t scalar_const_op_src_temp_reg() const {
|
||||
return (uint32_t(data_.scalar_opc) & 1) | (data_.src3_sel << 1) |
|
||||
(data_.src3_swiz & 0x3C);
|
||||
}
|
||||
|
||||
// Helpers.
|
||||
|
||||
// Returns the absolute component index calculated from the relative swizzle
|
||||
// in an ALU instruction.
|
||||
static constexpr uint32_t GetSwizzledComponentIndex(
|
||||
uint32_t swizzle, uint32_t component_index) {
|
||||
return ((swizzle >> (2 * component_index)) + component_index) & 3;
|
||||
}
|
||||
|
||||
// Note that even if the export component is unused (like W of the vertex
|
||||
// shader misc register, YZW of pixel shader depth), it must still not be
|
||||
// excluded - that may make disassembly not reassemblable if there are
|
||||
|
@ -1803,6 +1997,7 @@ struct alignas(uint32_t) AluInstruction {
|
|||
AluScalarOpcode scalar_opc : 6;
|
||||
};
|
||||
struct {
|
||||
// Swizzles are component-relative.
|
||||
uint32_t src3_swiz : 8;
|
||||
uint32_t src2_swiz : 8;
|
||||
uint32_t src1_swiz : 8;
|
||||
|
@ -1811,7 +2006,9 @@ struct alignas(uint32_t) AluInstruction {
|
|||
uint32_t src1_reg_negate : 1;
|
||||
uint32_t pred_condition : 1;
|
||||
uint32_t is_predicated : 1;
|
||||
uint32_t address_absolute : 1;
|
||||
// Temporary registers can have only absolute and aL-relative indices, not
|
||||
// a0-relative.
|
||||
uint32_t const_address_register_relative : 1;
|
||||
uint32_t const_1_rel_abs : 1;
|
||||
uint32_t const_0_rel_abs : 1;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue