[GPU] Store ALU result after both vector and scalar instructions
This commit is contained in:
parent
cd1aadef74
commit
66a9c9d812
|
@ -1057,8 +1057,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// cubemap coordinate.
|
||||
void ArrayCoordToCubeDirection(uint32_t reg);
|
||||
|
||||
void ProcessVectorAluInstruction(const ParsedAluInstruction& instr);
|
||||
void ProcessScalarAluInstruction(const ParsedAluInstruction& instr);
|
||||
bool ProcessVectorAluOperation(const ParsedAluInstruction& instr,
|
||||
bool& replicate_result_x,
|
||||
bool& predicate_written);
|
||||
bool ProcessScalarAluOperation(const ParsedAluInstruction& instr,
|
||||
bool& predicate_written);
|
||||
|
||||
// Appends a string to a DWORD stream, returns the DWORD-aligned length.
|
||||
static uint32_t AppendString(std::vector<uint32_t>& dest, const char* source);
|
||||
|
@ -1206,7 +1209,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// eM# in each `alloc export`, or UINT32_MAX if not used.
|
||||
uint32_t system_temps_memexport_data_[kMaxMemExports][5];
|
||||
|
||||
// Vector ALU result/scratch (since Xenos write masks can contain swizzles).
|
||||
// Vector ALU result or fetch scratch (since Xenos write masks can contain
|
||||
// swizzles).
|
||||
uint32_t system_temp_pv_;
|
||||
// Temporary register ID for previous scalar result, program counter,
|
||||
// predicate and absolute address register.
|
||||
|
|
|
@ -17,29 +17,22 @@ namespace xe {
|
|||
namespace gpu {
|
||||
using namespace ucode;
|
||||
|
||||
void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||
const ParsedAluInstruction& instr) {
|
||||
if (FLAGS_dxbc_source_map) {
|
||||
instruction_disassembly_buffer_.Reset();
|
||||
instr.Disassemble(&instruction_disassembly_buffer_);
|
||||
// Will be emitted by UpdateInstructionPredication.
|
||||
}
|
||||
UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition,
|
||||
true);
|
||||
// Whether the instruction has changed the predicate and it needs to be
|
||||
// checked again later.
|
||||
bool predicate_written = false;
|
||||
bool DxbcShaderTranslator::ProcessVectorAluOperation(
|
||||
const ParsedAluInstruction& instr, bool& replicate_result_x,
|
||||
bool& predicate_written) {
|
||||
replicate_result_x = false;
|
||||
predicate_written = false;
|
||||
|
||||
// Whether the result is only in X and all components should be remapped to X
|
||||
// while storing.
|
||||
bool replicate_result = false;
|
||||
if (!instr.has_vector_op) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// A small shortcut, operands of cube are the same, but swizzled.
|
||||
uint32_t operand_count;
|
||||
if (instr.vector_opcode == AluVectorOpcode::kCube) {
|
||||
operand_count = 1;
|
||||
} else {
|
||||
operand_count = uint32_t(instr.operand_count);
|
||||
operand_count = uint32_t(instr.vector_operand_count);
|
||||
}
|
||||
DxbcSourceOperand dxbc_operands[3];
|
||||
// Whether the operand is the same as any previous operand, and thus is loaded
|
||||
|
@ -47,9 +40,9 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
|||
bool operands_duplicate[3] = {};
|
||||
uint32_t operand_length_sums[3];
|
||||
for (uint32_t i = 0; i < operand_count; ++i) {
|
||||
const InstructionOperand& operand = instr.operands[i];
|
||||
const InstructionOperand& operand = instr.vector_operands[i];
|
||||
for (uint32_t j = 0; j < i; ++j) {
|
||||
if (operand == instr.operands[j]) {
|
||||
if (operand == instr.vector_operands[j]) {
|
||||
operands_duplicate[i] = true;
|
||||
dxbc_operands[i] = dxbc_operands[j];
|
||||
break;
|
||||
|
@ -98,6 +91,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
|||
D3D10_SB_OPCODE_MAX,
|
||||
};
|
||||
|
||||
bool translated = true;
|
||||
switch (instr.vector_opcode) {
|
||||
case AluVectorOpcode::kAdd:
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
||||
|
@ -123,7 +117,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
|||
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
|
||||
if (!instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) {
|
||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0),
|
||||
// flushing denormals (must be done using eq - doing bitwise comparison
|
||||
// doesn't flush denormals).
|
||||
|
@ -287,7 +281,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
|||
UseDxbcSourceOperand(dxbc_operands[2]);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
|
||||
if (!instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) {
|
||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
||||
// If any operand is zero or denormalized, just leave the addition part.
|
||||
uint32_t is_subnormal_temp = PushSystemTemp();
|
||||
|
@ -394,7 +388,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
|||
case AluVectorOpcode::kDp4:
|
||||
case AluVectorOpcode::kDp3:
|
||||
case AluVectorOpcode::kDp2Add: {
|
||||
if (instr.operands[0].EqualsAbsolute(instr.operands[1])) {
|
||||
if (instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) {
|
||||
// The operands are the same when calculating vector length, no need to
|
||||
// emulate 0 * anything = 0 in this case.
|
||||
shader_code_.push_back(
|
||||
|
@ -858,7 +852,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
|||
} break;
|
||||
|
||||
case AluVectorOpcode::kMax4:
|
||||
replicate_result = true;
|
||||
replicate_result_x = true;
|
||||
// pv.xy = max(src0.xy, src0.zw)
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||
|
@ -891,7 +885,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
|||
case AluVectorOpcode::kSetpGtPush:
|
||||
case AluVectorOpcode::kSetpGePush:
|
||||
predicate_written = true;
|
||||
replicate_result = true;
|
||||
replicate_result_x = true;
|
||||
// pv.xy = (src0.x == 0.0, src0.w == 0.0)
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||
|
@ -997,7 +991,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
|||
case AluVectorOpcode::kKillGt:
|
||||
case AluVectorOpcode::kKillGe:
|
||||
case AluVectorOpcode::kKillNe:
|
||||
replicate_result = true;
|
||||
replicate_result_x = true;
|
||||
// pv = src0 op src1
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
|
||||
|
@ -1094,7 +1088,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
|||
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
|
||||
if (!instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) {
|
||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
||||
// This is an attenuation calculation function, so infinity is probably
|
||||
// not very unlikely.
|
||||
|
@ -1277,8 +1271,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
|||
break;
|
||||
|
||||
default:
|
||||
assert_always();
|
||||
// Unknown instruction - don't modify pv.
|
||||
assert_unhandled_case(instr.vector_opcode);
|
||||
translated = false;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1289,37 +1283,26 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
|||
}
|
||||
}
|
||||
|
||||
StoreResult(instr.result, system_temp_pv_, replicate_result,
|
||||
instr.GetMemExportStreamConstant() != UINT32_MAX);
|
||||
|
||||
if (predicate_written) {
|
||||
cf_exec_predicate_written_ = true;
|
||||
CloseInstructionPredication();
|
||||
}
|
||||
return translated;
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||
const ParsedAluInstruction& instr) {
|
||||
if (FLAGS_dxbc_source_map) {
|
||||
instruction_disassembly_buffer_.Reset();
|
||||
instr.Disassemble(&instruction_disassembly_buffer_);
|
||||
// Will be emitted by UpdateInstructionPredication.
|
||||
bool DxbcShaderTranslator::ProcessScalarAluOperation(
|
||||
const ParsedAluInstruction& instr, bool& predicate_written) {
|
||||
predicate_written = false;
|
||||
|
||||
if (!instr.has_scalar_op) {
|
||||
return false;
|
||||
}
|
||||
UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition,
|
||||
true);
|
||||
// Whether the instruction has changed the predicate and it needs to be
|
||||
// checked again later.
|
||||
bool predicate_written = false;
|
||||
|
||||
DxbcSourceOperand dxbc_operands[3];
|
||||
// Whether the operand is the same as any previous operand, and thus is loaded
|
||||
// only once.
|
||||
bool operands_duplicate[3] = {};
|
||||
uint32_t operand_lengths[3];
|
||||
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
|
||||
const InstructionOperand& operand = instr.operands[i];
|
||||
for (uint32_t i = 0; i < uint32_t(instr.scalar_operand_count); ++i) {
|
||||
const InstructionOperand& operand = instr.scalar_operands[i];
|
||||
for (uint32_t j = 0; j < i; ++j) {
|
||||
if (operand == instr.operands[j]) {
|
||||
if (operand == instr.scalar_operands[j]) {
|
||||
operands_duplicate[i] = true;
|
||||
dxbc_operands[i] = dxbc_operands[j];
|
||||
break;
|
||||
|
@ -1385,6 +1368,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
|||
D3D10_SB_OPCODE_SINCOS,
|
||||
};
|
||||
|
||||
bool translated = true;
|
||||
switch (instr.scalar_opcode) {
|
||||
case AluScalarOpcode::kAdds:
|
||||
case AluScalarOpcode::kSubs: {
|
||||
|
@ -1431,7 +1415,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
|||
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
if (instr.operands[0].components[0] != instr.operands[0].components[1]) {
|
||||
if (instr.scalar_operands[0].components[0] !=
|
||||
instr.scalar_operands[0].components[1]) {
|
||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
||||
uint32_t is_subnormal_temp = PushSystemTemp();
|
||||
// Get the non-NaN multiplicand closer to zero to check if any of them
|
||||
|
@ -1679,7 +1664,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
|||
case AluScalarOpcode::kMaxs:
|
||||
case AluScalarOpcode::kMins: {
|
||||
// max is commonly used as mov.
|
||||
if (instr.operands[0].components[0] == instr.operands[0].components[1]) {
|
||||
if (instr.scalar_operands[0].components[0] ==
|
||||
instr.scalar_operands[0].components[1]) {
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 +
|
||||
|
@ -1990,7 +1976,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
|||
++stat_.instruction_count;
|
||||
++stat_.conversion_instruction_count;
|
||||
// The `ps = max(src0.x, src0.y)` part.
|
||||
if (instr.operands[0].components[0] == instr.operands[0].components[1]) {
|
||||
if (instr.scalar_operands[0].components[0] ==
|
||||
instr.scalar_operands[0].components[1]) {
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 +
|
||||
|
@ -2308,7 +2295,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
|||
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
|
||||
if (!instr.scalar_operands[0].EqualsAbsolute(instr.scalar_operands[1])) {
|
||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
||||
uint32_t is_subnormal_temp = PushSystemTemp();
|
||||
// Get the non-NaN multiplicand closer to zero to check if any of them
|
||||
|
@ -2407,38 +2394,62 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
|||
++stat_.float_instruction_count;
|
||||
} break;
|
||||
|
||||
case AluScalarOpcode::kRetainPrev:
|
||||
// No changes, but translated successfully (just write the old ps).
|
||||
break;
|
||||
|
||||
default:
|
||||
// May be retain_prev, in this case the current ps should be written, or
|
||||
// something invalid that's better to ignore.
|
||||
assert_true(instr.scalar_opcode == AluScalarOpcode::kRetainPrev);
|
||||
assert_unhandled_case(instr.scalar_opcode);
|
||||
translated = false;
|
||||
break;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
|
||||
UnloadDxbcSourceOperand(dxbc_operands[instr.operand_count - 1 - i]);
|
||||
for (uint32_t i = 0; i < uint32_t(instr.scalar_operand_count); ++i) {
|
||||
UnloadDxbcSourceOperand(dxbc_operands[instr.scalar_operand_count - 1 - i]);
|
||||
}
|
||||
|
||||
StoreResult(instr.result, system_temp_ps_pc_p0_a0_, true);
|
||||
return translated;
|
||||
}
|
||||
|
||||
if (predicate_written) {
|
||||
void DxbcShaderTranslator::ProcessAluInstruction(
|
||||
const ParsedAluInstruction& instr) {
|
||||
if (instr.is_nop()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (FLAGS_dxbc_source_map) {
|
||||
instruction_disassembly_buffer_.Reset();
|
||||
instr.Disassemble(&instruction_disassembly_buffer_);
|
||||
// Will be emitted by UpdateInstructionPredication.
|
||||
}
|
||||
UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition,
|
||||
true);
|
||||
|
||||
// Whether the instruction has changed the predicate and it needs to be
|
||||
// checked again later.
|
||||
bool predicate_written_vector = false;
|
||||
// Whether the result is only in X and all components should be remapped to X
|
||||
// while storing.
|
||||
bool replicate_vector_x = false;
|
||||
bool store_vector = ProcessVectorAluOperation(instr, replicate_vector_x,
|
||||
predicate_written_vector);
|
||||
bool predicate_written_scalar = false;
|
||||
bool store_scalar =
|
||||
ProcessScalarAluOperation(instr, predicate_written_scalar);
|
||||
|
||||
if (store_vector) {
|
||||
StoreResult(instr.vector_result, system_temp_pv_, replicate_vector_x,
|
||||
instr.GetMemExportStreamConstant() != UINT32_MAX);
|
||||
}
|
||||
if (store_scalar) {
|
||||
StoreResult(instr.scalar_result, system_temp_ps_pc_p0_a0_, true);
|
||||
}
|
||||
|
||||
if (predicate_written_vector || predicate_written_scalar) {
|
||||
cf_exec_predicate_written_ = true;
|
||||
CloseInstructionPredication();
|
||||
}
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::ProcessAluInstruction(
|
||||
const ParsedAluInstruction& instr) {
|
||||
switch (instr.type) {
|
||||
case ParsedAluInstruction::Type::kNop:
|
||||
break;
|
||||
case ParsedAluInstruction::Type::kVector:
|
||||
ProcessVectorAluInstruction(instr);
|
||||
break;
|
||||
case ParsedAluInstruction::Type::kScalar:
|
||||
ProcessScalarAluInstruction(instr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -820,18 +820,36 @@ void GlslShaderTranslator::ProcessTextureFetchInstruction(
|
|||
|
||||
void GlslShaderTranslator::ProcessAluInstruction(
|
||||
const ParsedAluInstruction& instr) {
|
||||
EmitSource("// ");
|
||||
EmitSource("/*\n");
|
||||
instr.Disassemble(&source_);
|
||||
EmitSource("*/\n");
|
||||
|
||||
switch (instr.type) {
|
||||
case ParsedAluInstruction::Type::kNop:
|
||||
break;
|
||||
case ParsedAluInstruction::Type::kVector:
|
||||
ProcessVectorAluInstruction(instr);
|
||||
break;
|
||||
case ParsedAluInstruction::Type::kScalar:
|
||||
ProcessScalarAluInstruction(instr);
|
||||
break;
|
||||
if (instr.is_nop()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Emit if statement only if we have a different predicate condition than our
|
||||
// containing block.
|
||||
bool conditional = false;
|
||||
if (instr.is_predicated &&
|
||||
(!cf_exec_pred_ || (cf_exec_pred_cond_ != instr.predicate_condition))) {
|
||||
conditional = true;
|
||||
EmitSourceDepth("if (%cp0) {\n", instr.predicate_condition ? ' ' : '!');
|
||||
Indent();
|
||||
}
|
||||
|
||||
bool store_vector = ProcessVectorAluOperation(instr);
|
||||
bool store_scalar = ProcessScalarAluOperation(instr);
|
||||
if (store_vector) {
|
||||
EmitStoreVectorResult(instr.vector_result);
|
||||
}
|
||||
if (store_scalar) {
|
||||
EmitStoreScalarResult(instr.scalar_result);
|
||||
}
|
||||
|
||||
if (conditional) {
|
||||
Unindent();
|
||||
EmitSourceDepth("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1041,20 +1059,14 @@ void GlslShaderTranslator::EmitStoreResult(const InstructionResult& result,
|
|||
EmitSource(";\n");
|
||||
}
|
||||
|
||||
void GlslShaderTranslator::ProcessVectorAluInstruction(
|
||||
bool GlslShaderTranslator::ProcessVectorAluOperation(
|
||||
const ParsedAluInstruction& instr) {
|
||||
// Emit if statement only if we have a different predicate condition than our
|
||||
// containing block.
|
||||
bool conditional = false;
|
||||
if (instr.is_predicated &&
|
||||
(!cf_exec_pred_ || (cf_exec_pred_cond_ != instr.predicate_condition))) {
|
||||
conditional = true;
|
||||
EmitSourceDepth("if (%cp0) {\n", instr.predicate_condition ? ' ' : '!');
|
||||
Indent();
|
||||
if (!instr.has_vector_op) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < instr.operand_count; ++i) {
|
||||
EmitLoadOperand(i, instr.operands[i]);
|
||||
for (size_t i = 0; i < instr.vector_operand_count; ++i) {
|
||||
EmitLoadOperand(i, instr.vector_operands[i]);
|
||||
}
|
||||
|
||||
switch (instr.vector_opcode) {
|
||||
|
@ -1251,26 +1263,17 @@ void GlslShaderTranslator::ProcessVectorAluInstruction(
|
|||
break;
|
||||
}
|
||||
|
||||
EmitStoreVectorResult(instr.result);
|
||||
|
||||
if (conditional) {
|
||||
Unindent();
|
||||
EmitSourceDepth("}\n");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void GlslShaderTranslator::ProcessScalarAluInstruction(
|
||||
bool GlslShaderTranslator::ProcessScalarAluOperation(
|
||||
const ParsedAluInstruction& instr) {
|
||||
bool conditional = false;
|
||||
if (instr.is_predicated &&
|
||||
(!cf_exec_pred_ || (cf_exec_pred_cond_ != instr.predicate_condition))) {
|
||||
conditional = true;
|
||||
EmitSourceDepth("if (%cp0) {\n", instr.predicate_condition ? ' ' : '!');
|
||||
Indent();
|
||||
if (!instr.has_scalar_op) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < instr.operand_count; ++i) {
|
||||
EmitLoadOperand(i, instr.operands[i]);
|
||||
for (size_t i = 0; i < instr.scalar_operand_count; ++i) {
|
||||
EmitLoadOperand(i, instr.scalar_operands[i]);
|
||||
}
|
||||
|
||||
switch (instr.scalar_opcode) {
|
||||
|
@ -1595,12 +1598,7 @@ void GlslShaderTranslator::ProcessScalarAluInstruction(
|
|||
break;
|
||||
}
|
||||
|
||||
EmitStoreScalarResult(instr.result);
|
||||
|
||||
if (conditional) {
|
||||
Unindent();
|
||||
EmitSourceDepth("}\n");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -77,8 +77,8 @@ class GlslShaderTranslator : public ShaderTranslator {
|
|||
bool cf_exec_pred_ = false;
|
||||
bool cf_exec_pred_cond_ = false;
|
||||
|
||||
void ProcessVectorAluInstruction(const ParsedAluInstruction& instr);
|
||||
void ProcessScalarAluInstruction(const ParsedAluInstruction& instr);
|
||||
bool ProcessVectorAluOperation(const ParsedAluInstruction& instr);
|
||||
bool ProcessScalarAluOperation(const ParsedAluInstruction& instr);
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -459,51 +459,62 @@ struct ParsedAluInstruction {
|
|||
// Index into the ucode dword source.
|
||||
uint32_t dword_index = 0;
|
||||
|
||||
enum class Type {
|
||||
kNop,
|
||||
kVector,
|
||||
kScalar,
|
||||
};
|
||||
// Type of the instruction.
|
||||
Type type = Type::kNop;
|
||||
bool is_nop() const { return type == Type::kNop; }
|
||||
bool is_vector_type() const { return type == Type::kVector; }
|
||||
bool is_scalar_type() const { return type == Type::kScalar; }
|
||||
// Opcode for the instruction if it is a vector type.
|
||||
ucode::AluVectorOpcode vector_opcode = ucode::AluVectorOpcode::kAdd;
|
||||
// Opcode for the instruction if it is a scalar type.
|
||||
ucode::AluScalarOpcode scalar_opcode = ucode::AluScalarOpcode::kAdds;
|
||||
// Friendly name of the instruction.
|
||||
const char* opcode_name = nullptr;
|
||||
// True if the vector part of the instruction needs to be executed and data
|
||||
// about it in this structure is valid.
|
||||
bool has_vector_op = false;
|
||||
// True if the scalar part of the instruction needs to be executed and data
|
||||
// about it in this structure is valid.
|
||||
bool has_scalar_op = false;
|
||||
bool is_nop() const { return !has_vector_op && !has_scalar_op; }
|
||||
|
||||
// Opcode for the vector part of the instruction.
|
||||
ucode::AluVectorOpcode vector_opcode = ucode::AluVectorOpcode::kAdd;
|
||||
// Opcode for the scalar part of the instruction.
|
||||
ucode::AluScalarOpcode scalar_opcode = ucode::AluScalarOpcode::kAdds;
|
||||
// Friendly name of the vector instruction.
|
||||
const char* vector_opcode_name = nullptr;
|
||||
// Friendly name of the scalar instruction.
|
||||
const char* scalar_opcode_name = nullptr;
|
||||
|
||||
// True if the instruction is paired with another instruction.
|
||||
bool is_paired = false;
|
||||
// True if the instruction is predicated on the specified
|
||||
// predicate_condition.
|
||||
bool is_predicated = false;
|
||||
// Expected predication condition value if predicated.
|
||||
bool predicate_condition = false;
|
||||
|
||||
// Describes how the instruction result is stored.
|
||||
InstructionResult result;
|
||||
// Describes how the vector operation result is stored.
|
||||
InstructionResult vector_result;
|
||||
// Describes how the scalar operation result is stored.
|
||||
InstructionResult scalar_result;
|
||||
// Both operations must be executed before any result is stored if vector and
|
||||
// scalar operations are paired. There are cases of vector result being used
|
||||
// as scalar operand or vice versa (the halo on Avalanche in Halo 3, for
|
||||
// example), in this case there must be no dependency between the two
|
||||
// operations.
|
||||
|
||||
// Number of source operands.
|
||||
size_t operand_count = 0;
|
||||
// Describes each source operand.
|
||||
InstructionOperand operands[3];
|
||||
// Number of source operands of the vector operation.
|
||||
size_t vector_operand_count = 0;
|
||||
// Describes each source operand of the vector operation.
|
||||
InstructionOperand vector_operands[3];
|
||||
// Number of source operands of the scalar operation.
|
||||
size_t scalar_operand_count = 0;
|
||||
// Describes each source operand of the scalar operation.
|
||||
InstructionOperand scalar_operands[2];
|
||||
|
||||
// If this is a valid eA write (MAD with a stream constant), returns the index
|
||||
// of the stream float constant, otherwise returns UINT32_MAX.
|
||||
uint32_t GetMemExportStreamConstant() const {
|
||||
if (result.storage_target == InstructionStorageTarget::kExportAddress &&
|
||||
is_vector_type() && vector_opcode == ucode::AluVectorOpcode::kMad &&
|
||||
result.has_all_writes() &&
|
||||
operands[2].storage_source ==
|
||||
if (has_vector_op &&
|
||||
vector_result.storage_target ==
|
||||
InstructionStorageTarget::kExportAddress &&
|
||||
vector_opcode == ucode::AluVectorOpcode::kMad &&
|
||||
vector_result.has_all_writes() &&
|
||||
vector_operands[2].storage_source ==
|
||||
InstructionStorageSource::kConstantFloat &&
|
||||
operands[2].storage_addressing_mode ==
|
||||
vector_operands[2].storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic &&
|
||||
operands[2].is_standard_swizzle()) {
|
||||
return operands[2].storage_index;
|
||||
vector_operands[2].is_standard_swizzle()) {
|
||||
return vector_operands[2].storage_index;
|
||||
}
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
|
|
@ -1124,30 +1124,21 @@ const ShaderTranslator::AluOpcodeInfo
|
|||
};
|
||||
|
||||
void ShaderTranslator::TranslateAluInstruction(const AluInstruction& op) {
|
||||
if (!op.has_vector_op() && !op.has_scalar_op()) {
|
||||
ParsedAluInstruction instr;
|
||||
instr.type = ParsedAluInstruction::Type::kNop;
|
||||
|
||||
instr.dword_index = 0;
|
||||
|
||||
instr.is_predicated = op.is_predicated();
|
||||
instr.predicate_condition = op.predicate_condition();
|
||||
|
||||
ParseAluVectorOperation(op, instr);
|
||||
ParseAluScalarOperation(op, instr);
|
||||
|
||||
instr.Disassemble(&ucode_disasm_buffer_);
|
||||
ProcessAluInstruction(instr);
|
||||
return;
|
||||
}
|
||||
|
||||
ParsedAluInstruction instr;
|
||||
if (op.has_vector_op()) {
|
||||
const auto& opcode_info =
|
||||
alu_vector_opcode_infos_[static_cast<int>(op.vector_opcode())];
|
||||
ParseAluVectorInstruction(op, opcode_info, instr);
|
||||
ProcessAluInstruction(instr);
|
||||
}
|
||||
|
||||
if (op.has_scalar_op()) {
|
||||
const auto& opcode_info =
|
||||
alu_scalar_opcode_infos_[static_cast<int>(op.scalar_opcode())];
|
||||
ParseAluScalarInstruction(op, opcode_info, instr);
|
||||
ProcessAluInstruction(instr);
|
||||
}
|
||||
}
|
||||
|
||||
void ParseAluInstructionOperand(const AluInstruction& op, int i,
|
||||
int swizzle_component_count,
|
||||
InstructionOperand* out_op) {
|
||||
|
@ -1238,62 +1229,64 @@ void ParseAluInstructionOperandSpecial(const AluInstruction& op,
|
|||
out_op->components[0] = GetSwizzleFromComponentIndex(a);
|
||||
}
|
||||
|
||||
void ShaderTranslator::ParseAluVectorInstruction(
|
||||
const AluInstruction& op, const AluOpcodeInfo& opcode_info,
|
||||
void ShaderTranslator::ParseAluVectorOperation(const AluInstruction& op,
|
||||
ParsedAluInstruction& i) {
|
||||
i.dword_index = 0;
|
||||
i.type = ParsedAluInstruction::Type::kVector;
|
||||
i.has_vector_op = op.has_vector_op();
|
||||
if (!i.has_vector_op) {
|
||||
return;
|
||||
}
|
||||
i.vector_opcode = op.vector_opcode();
|
||||
i.opcode_name = opcode_info.name;
|
||||
i.is_paired = op.has_scalar_op();
|
||||
i.is_predicated = op.is_predicated();
|
||||
i.predicate_condition = op.predicate_condition();
|
||||
const auto& opcode_info =
|
||||
alu_vector_opcode_infos_[static_cast<int>(op.vector_opcode())];
|
||||
i.vector_opcode_name = opcode_info.name;
|
||||
|
||||
i.result.is_export = op.is_export();
|
||||
i.result.is_clamped = op.vector_clamp();
|
||||
i.result.storage_target = InstructionStorageTarget::kRegister;
|
||||
i.result.storage_index = 0;
|
||||
i.vector_result.is_export = op.is_export();
|
||||
i.vector_result.is_clamped = op.vector_clamp();
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kRegister;
|
||||
i.vector_result.storage_index = 0;
|
||||
uint32_t dest_num = op.vector_dest();
|
||||
if (!op.is_export()) {
|
||||
assert_true(dest_num < 32);
|
||||
i.result.storage_target = InstructionStorageTarget::kRegister;
|
||||
i.result.storage_index = dest_num;
|
||||
i.result.storage_addressing_mode =
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kRegister;
|
||||
i.vector_result.storage_index = dest_num;
|
||||
i.vector_result.storage_addressing_mode =
|
||||
op.is_vector_dest_relative()
|
||||
? InstructionStorageAddressingMode::kAddressRelative
|
||||
: InstructionStorageAddressingMode::kStatic;
|
||||
} else if (is_vertex_shader()) {
|
||||
switch (dest_num) {
|
||||
case 32:
|
||||
i.result.storage_target = InstructionStorageTarget::kExportAddress;
|
||||
i.vector_result.storage_target =
|
||||
InstructionStorageTarget::kExportAddress;
|
||||
break;
|
||||
case 33:
|
||||
case 34:
|
||||
case 35:
|
||||
case 36:
|
||||
case 37:
|
||||
i.result.storage_index = dest_num - 33;
|
||||
i.result.storage_target = InstructionStorageTarget::kExportData;
|
||||
i.vector_result.storage_index = dest_num - 33;
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kExportData;
|
||||
break;
|
||||
case 62:
|
||||
i.result.storage_target = InstructionStorageTarget::kPosition;
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kPosition;
|
||||
break;
|
||||
case 63:
|
||||
i.result.storage_target = InstructionStorageTarget::kPointSize;
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kPointSize;
|
||||
break;
|
||||
default:
|
||||
if (dest_num < 16) {
|
||||
i.result.storage_target = InstructionStorageTarget::kInterpolant;
|
||||
i.result.storage_index = dest_num;
|
||||
i.vector_result.storage_target =
|
||||
InstructionStorageTarget::kInterpolant;
|
||||
i.vector_result.storage_index = dest_num;
|
||||
} else {
|
||||
// Unimplemented.
|
||||
// assert_always();
|
||||
XELOGE(
|
||||
"ShaderTranslator::ParseAluVectorInstruction: Unsupported write "
|
||||
"to export %d",
|
||||
"ShaderTranslator::ParseAluVectorOperation: Unsupported write to "
|
||||
"export %d",
|
||||
dest_num);
|
||||
i.result.storage_target = InstructionStorageTarget::kNone;
|
||||
i.result.storage_index = 0;
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kNone;
|
||||
i.vector_result.storage_index = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1301,42 +1294,43 @@ void ShaderTranslator::ParseAluVectorInstruction(
|
|||
switch (dest_num) {
|
||||
case 0:
|
||||
case 63: // ? masked?
|
||||
i.result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.result.storage_index = 0;
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.vector_result.storage_index = 0;
|
||||
break;
|
||||
case 1:
|
||||
i.result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.result.storage_index = 1;
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.vector_result.storage_index = 1;
|
||||
break;
|
||||
case 2:
|
||||
i.result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.result.storage_index = 2;
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.vector_result.storage_index = 2;
|
||||
break;
|
||||
case 3:
|
||||
i.result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.result.storage_index = 3;
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.vector_result.storage_index = 3;
|
||||
break;
|
||||
case 32:
|
||||
i.result.storage_target = InstructionStorageTarget::kExportAddress;
|
||||
i.vector_result.storage_target =
|
||||
InstructionStorageTarget::kExportAddress;
|
||||
break;
|
||||
case 33:
|
||||
case 34:
|
||||
case 35:
|
||||
case 36:
|
||||
case 37:
|
||||
i.result.storage_index = dest_num - 33;
|
||||
i.result.storage_target = InstructionStorageTarget::kExportData;
|
||||
i.vector_result.storage_index = dest_num - 33;
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kExportData;
|
||||
break;
|
||||
case 61:
|
||||
i.result.storage_target = InstructionStorageTarget::kDepth;
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kDepth;
|
||||
break;
|
||||
default:
|
||||
XELOGE(
|
||||
"ShaderTranslator::ParseAluVectorInstruction: Unsupported write "
|
||||
"to export %d",
|
||||
"ShaderTranslator::ParseAluVectorOperation: Unsupported write to "
|
||||
"export %d",
|
||||
dest_num);
|
||||
i.result.storage_target = InstructionStorageTarget::kNone;
|
||||
i.result.storage_index = 0;
|
||||
i.vector_result.storage_target = InstructionStorageTarget::kNone;
|
||||
i.vector_result.storage_index = 0;
|
||||
}
|
||||
}
|
||||
if (op.is_export()) {
|
||||
|
@ -1344,22 +1338,22 @@ void ShaderTranslator::ParseAluVectorInstruction(
|
|||
uint32_t const_1_mask = op.scalar_write_mask();
|
||||
if (!write_mask) {
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
i.result.write_mask[j] = false;
|
||||
i.vector_result.write_mask[j] = false;
|
||||
}
|
||||
} else {
|
||||
for (int j = 0; j < 4; ++j, write_mask >>= 1, const_1_mask >>= 1) {
|
||||
i.result.write_mask[j] = true;
|
||||
i.vector_result.write_mask[j] = true;
|
||||
if (write_mask & 0x1) {
|
||||
if (const_1_mask & 0x1) {
|
||||
i.result.components[j] = SwizzleSource::k1;
|
||||
i.vector_result.components[j] = SwizzleSource::k1;
|
||||
} else {
|
||||
i.result.components[j] = GetSwizzleFromComponentIndex(j);
|
||||
i.vector_result.components[j] = GetSwizzleFromComponentIndex(j);
|
||||
}
|
||||
} else {
|
||||
if (op.is_scalar_dest_relative()) {
|
||||
i.result.components[j] = SwizzleSource::k0;
|
||||
i.vector_result.components[j] = SwizzleSource::k0;
|
||||
} else {
|
||||
i.result.write_mask[j] = false;
|
||||
i.vector_result.write_mask[j] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1367,45 +1361,44 @@ void ShaderTranslator::ParseAluVectorInstruction(
|
|||
} else {
|
||||
uint32_t write_mask = op.vector_write_mask();
|
||||
for (int j = 0; j < 4; ++j, write_mask >>= 1) {
|
||||
i.result.write_mask[j] = (write_mask & 0x1) == 0x1;
|
||||
i.result.components[j] = GetSwizzleFromComponentIndex(j);
|
||||
i.vector_result.write_mask[j] = (write_mask & 0x1) == 0x1;
|
||||
i.vector_result.components[j] = GetSwizzleFromComponentIndex(j);
|
||||
}
|
||||
}
|
||||
|
||||
i.operand_count = opcode_info.argument_count;
|
||||
for (int j = 0; j < i.operand_count; ++j) {
|
||||
ParseAluInstructionOperand(
|
||||
op, j + 1, opcode_info.src_swizzle_component_count, &i.operands[j]);
|
||||
i.vector_operand_count = opcode_info.argument_count;
|
||||
for (int j = 0; j < i.vector_operand_count; ++j) {
|
||||
ParseAluInstructionOperand(op, j + 1,
|
||||
opcode_info.src_swizzle_component_count,
|
||||
&i.vector_operands[j]);
|
||||
|
||||
// Track constant float register loads.
|
||||
if (i.operands[j].storage_source ==
|
||||
if (i.vector_operands[j].storage_source ==
|
||||
InstructionStorageSource::kConstantFloat) {
|
||||
if (i.operands[j].storage_addressing_mode !=
|
||||
if (i.vector_operands[j].storage_addressing_mode !=
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
// Dynamic addressing makes all constants required.
|
||||
std::memset(constant_register_map_.float_bitmap, 0xFF,
|
||||
sizeof(constant_register_map_.float_bitmap));
|
||||
} else {
|
||||
auto register_index = i.operands[j].storage_index;
|
||||
auto register_index = i.vector_operands[j].storage_index;
|
||||
constant_register_map_.float_bitmap[register_index / 64] |=
|
||||
1ull << (register_index % 64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i.Disassemble(&ucode_disasm_buffer_);
|
||||
}
|
||||
|
||||
void ShaderTranslator::ParseAluScalarInstruction(
|
||||
const AluInstruction& op, const AluOpcodeInfo& opcode_info,
|
||||
void ShaderTranslator::ParseAluScalarOperation(const AluInstruction& op,
|
||||
ParsedAluInstruction& i) {
|
||||
i.dword_index = 0;
|
||||
i.type = ParsedAluInstruction::Type::kScalar;
|
||||
i.has_scalar_op = op.has_scalar_op();
|
||||
if (!i.has_scalar_op) {
|
||||
return;
|
||||
}
|
||||
i.scalar_opcode = op.scalar_opcode();
|
||||
i.opcode_name = opcode_info.name;
|
||||
i.is_paired = op.has_vector_op();
|
||||
i.is_predicated = op.is_predicated();
|
||||
i.predicate_condition = op.predicate_condition();
|
||||
const auto& opcode_info =
|
||||
alu_scalar_opcode_infos_[static_cast<int>(op.scalar_opcode())];
|
||||
i.scalar_opcode_name = opcode_info.name;
|
||||
|
||||
uint32_t dest_num;
|
||||
uint32_t write_mask;
|
||||
|
@ -1416,50 +1409,52 @@ void ShaderTranslator::ParseAluScalarInstruction(
|
|||
dest_num = op.scalar_dest();
|
||||
write_mask = op.scalar_write_mask();
|
||||
}
|
||||
i.result.is_export = op.is_export();
|
||||
i.result.is_clamped = op.scalar_clamp();
|
||||
i.result.storage_target = InstructionStorageTarget::kRegister;
|
||||
i.result.storage_index = 0;
|
||||
i.scalar_result.is_export = op.is_export();
|
||||
i.scalar_result.is_clamped = op.scalar_clamp();
|
||||
i.scalar_result.storage_target = InstructionStorageTarget::kRegister;
|
||||
i.scalar_result.storage_index = 0;
|
||||
if (!op.is_export()) {
|
||||
assert_true(dest_num < 32);
|
||||
i.result.storage_target = InstructionStorageTarget::kRegister;
|
||||
i.result.storage_index = dest_num;
|
||||
i.result.storage_addressing_mode =
|
||||
i.scalar_result.storage_target = InstructionStorageTarget::kRegister;
|
||||
i.scalar_result.storage_index = dest_num;
|
||||
i.scalar_result.storage_addressing_mode =
|
||||
op.is_scalar_dest_relative()
|
||||
? InstructionStorageAddressingMode::kAddressRelative
|
||||
: InstructionStorageAddressingMode::kStatic;
|
||||
} else if (is_vertex_shader()) {
|
||||
switch (dest_num) {
|
||||
case 32:
|
||||
i.result.storage_target = InstructionStorageTarget::kExportAddress;
|
||||
i.scalar_result.storage_target =
|
||||
InstructionStorageTarget::kExportAddress;
|
||||
break;
|
||||
case 33:
|
||||
case 34:
|
||||
case 35:
|
||||
case 36:
|
||||
case 37:
|
||||
i.result.storage_index = dest_num - 33;
|
||||
i.result.storage_target = InstructionStorageTarget::kExportData;
|
||||
i.scalar_result.storage_index = dest_num - 33;
|
||||
i.scalar_result.storage_target = InstructionStorageTarget::kExportData;
|
||||
break;
|
||||
case 62:
|
||||
i.result.storage_target = InstructionStorageTarget::kPosition;
|
||||
i.scalar_result.storage_target = InstructionStorageTarget::kPosition;
|
||||
break;
|
||||
case 63:
|
||||
i.result.storage_target = InstructionStorageTarget::kPointSize;
|
||||
i.scalar_result.storage_target = InstructionStorageTarget::kPointSize;
|
||||
break;
|
||||
default:
|
||||
if (dest_num < 16) {
|
||||
i.result.storage_target = InstructionStorageTarget::kInterpolant;
|
||||
i.result.storage_index = dest_num;
|
||||
i.scalar_result.storage_target =
|
||||
InstructionStorageTarget::kInterpolant;
|
||||
i.scalar_result.storage_index = dest_num;
|
||||
} else {
|
||||
// Unimplemented.
|
||||
// assert_always();
|
||||
XELOGE(
|
||||
"ShaderTranslator::ParseAluScalarInstruction: Unsupported write "
|
||||
"to export %d",
|
||||
"ShaderTranslator::ParseAluScalarOperation: Unsupported write to "
|
||||
"export %d",
|
||||
dest_num);
|
||||
i.result.storage_target = InstructionStorageTarget::kNone;
|
||||
i.result.storage_index = 0;
|
||||
i.scalar_result.storage_target = InstructionStorageTarget::kNone;
|
||||
i.scalar_result.storage_index = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1467,46 +1462,47 @@ void ShaderTranslator::ParseAluScalarInstruction(
|
|||
switch (dest_num) {
|
||||
case 0:
|
||||
case 63: // ? masked?
|
||||
i.result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.result.storage_index = 0;
|
||||
i.scalar_result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.scalar_result.storage_index = 0;
|
||||
break;
|
||||
case 1:
|
||||
i.result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.result.storage_index = 1;
|
||||
i.scalar_result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.scalar_result.storage_index = 1;
|
||||
break;
|
||||
case 2:
|
||||
i.result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.result.storage_index = 2;
|
||||
i.scalar_result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.scalar_result.storage_index = 2;
|
||||
break;
|
||||
case 3:
|
||||
i.result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.result.storage_index = 3;
|
||||
i.scalar_result.storage_target = InstructionStorageTarget::kColorTarget;
|
||||
i.scalar_result.storage_index = 3;
|
||||
break;
|
||||
case 32:
|
||||
i.result.storage_target = InstructionStorageTarget::kExportAddress;
|
||||
i.scalar_result.storage_target =
|
||||
InstructionStorageTarget::kExportAddress;
|
||||
break;
|
||||
case 33:
|
||||
case 34:
|
||||
case 35:
|
||||
case 36:
|
||||
case 37:
|
||||
i.result.storage_index = dest_num - 33;
|
||||
i.result.storage_target = InstructionStorageTarget::kExportData;
|
||||
i.scalar_result.storage_index = dest_num - 33;
|
||||
i.scalar_result.storage_target = InstructionStorageTarget::kExportData;
|
||||
break;
|
||||
case 61:
|
||||
i.result.storage_target = InstructionStorageTarget::kDepth;
|
||||
i.scalar_result.storage_target = InstructionStorageTarget::kDepth;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int j = 0; j < 4; ++j, write_mask >>= 1) {
|
||||
i.result.write_mask[j] = (write_mask & 0x1) == 0x1;
|
||||
i.result.components[j] = GetSwizzleFromComponentIndex(j);
|
||||
i.scalar_result.write_mask[j] = (write_mask & 0x1) == 0x1;
|
||||
i.scalar_result.components[j] = GetSwizzleFromComponentIndex(j);
|
||||
}
|
||||
|
||||
i.operand_count = opcode_info.argument_count;
|
||||
i.scalar_operand_count = opcode_info.argument_count;
|
||||
if (opcode_info.argument_count == 1) {
|
||||
ParseAluInstructionOperand(op, 3, opcode_info.src_swizzle_component_count,
|
||||
&i.operands[0]);
|
||||
&i.scalar_operands[0]);
|
||||
} else {
|
||||
uint32_t src3_swizzle = op.src_swizzle(3);
|
||||
uint32_t swiz_a = ((src3_swizzle >> 6) + 3) & 0x3;
|
||||
|
@ -1518,19 +1514,19 @@ void ShaderTranslator::ParseAluScalarInstruction(
|
|||
|
||||
ParseAluInstructionOperandSpecial(
|
||||
op, InstructionStorageSource::kConstantFloat, op.src_reg(3),
|
||||
op.src_negate(3), 0, swiz_a, &i.operands[0]);
|
||||
op.src_negate(3), 0, swiz_a, &i.scalar_operands[0]);
|
||||
|
||||
ParseAluInstructionOperandSpecial(op, InstructionStorageSource::kRegister,
|
||||
reg2, op.src_negate(3), const_slot,
|
||||
swiz_b, &i.operands[1]);
|
||||
swiz_b, &i.scalar_operands[1]);
|
||||
}
|
||||
|
||||
// Track constant float register loads - in either case, a float constant may
|
||||
// be used in operand 0.
|
||||
if (i.operands[0].storage_source ==
|
||||
if (i.scalar_operands[0].storage_source ==
|
||||
InstructionStorageSource::kConstantFloat) {
|
||||
auto register_index = i.operands[0].storage_index;
|
||||
if (i.operands[0].storage_addressing_mode !=
|
||||
auto register_index = i.scalar_operands[0].storage_index;
|
||||
if (i.scalar_operands[0].storage_addressing_mode !=
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
// Dynamic addressing makes all constants required.
|
||||
std::memset(constant_register_map_.float_bitmap, 0xFF,
|
||||
|
@ -1540,8 +1536,6 @@ void ShaderTranslator::ParseAluScalarInstruction(
|
|||
1ull << (register_index % 64);
|
||||
}
|
||||
}
|
||||
|
||||
i.Disassemble(&ucode_disasm_buffer_);
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -208,11 +208,9 @@ class ShaderTranslator {
|
|||
ParsedTextureFetchInstruction* out_instr);
|
||||
|
||||
void TranslateAluInstruction(const ucode::AluInstruction& op);
|
||||
void ParseAluVectorInstruction(const ucode::AluInstruction& op,
|
||||
const AluOpcodeInfo& opcode_info,
|
||||
void ParseAluVectorOperation(const ucode::AluInstruction& op,
|
||||
ParsedAluInstruction& instr);
|
||||
void ParseAluScalarInstruction(const ucode::AluInstruction& op,
|
||||
const AluOpcodeInfo& opcode_info,
|
||||
void ParseAluScalarOperation(const ucode::AluInstruction& op,
|
||||
ParsedAluInstruction& instr);
|
||||
|
||||
// Input shader metadata and microcode.
|
||||
|
|
|
@ -454,30 +454,45 @@ void ParsedAluInstruction::Disassemble(StringBuffer* out) const {
|
|||
out->Append(" nop\n");
|
||||
return;
|
||||
}
|
||||
if (is_scalar_type() && is_paired) {
|
||||
out->Append(" + ");
|
||||
} else {
|
||||
if (has_vector_op) {
|
||||
out->Append(" ");
|
||||
}
|
||||
if (is_predicated) {
|
||||
out->Append(predicate_condition ? " (p0) " : "(!p0) ");
|
||||
} else {
|
||||
out->Append(" ");
|
||||
}
|
||||
out->Append(opcode_name);
|
||||
if (result.is_clamped) {
|
||||
out->Append(vector_opcode_name);
|
||||
if (vector_result.is_clamped) {
|
||||
out->Append("_sat");
|
||||
}
|
||||
out->Append(' ');
|
||||
|
||||
DisassembleResultOperand(result, out);
|
||||
|
||||
for (int i = 0; i < operand_count; ++i) {
|
||||
DisassembleResultOperand(vector_result, out);
|
||||
for (int i = 0; i < vector_operand_count; ++i) {
|
||||
out->Append(", ");
|
||||
DisassembleSourceOperand(operands[i], out);
|
||||
DisassembleSourceOperand(vector_operands[i], out);
|
||||
}
|
||||
out->Append('\n');
|
||||
}
|
||||
if (has_scalar_op) {
|
||||
out->Append(has_vector_op ? " + " : " ");
|
||||
if (is_predicated) {
|
||||
out->Append(predicate_condition ? " (p0) " : "(!p0) ");
|
||||
} else {
|
||||
out->Append(" ");
|
||||
}
|
||||
out->Append(scalar_opcode_name);
|
||||
if (scalar_result.is_clamped) {
|
||||
out->Append("_sat");
|
||||
}
|
||||
out->Append(' ');
|
||||
DisassembleResultOperand(scalar_result, out);
|
||||
for (int i = 0; i < scalar_operand_count; ++i) {
|
||||
out->Append(", ");
|
||||
DisassembleSourceOperand(scalar_operands[i], out);
|
||||
}
|
||||
out->Append('\n');
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -2000,17 +2000,60 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
|
|||
|
||||
void SpirvShaderTranslator::ProcessAluInstruction(
|
||||
const ParsedAluInstruction& instr) {
|
||||
if (instr.is_nop()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto& b = *builder_;
|
||||
switch (instr.type) {
|
||||
case ParsedAluInstruction::Type::kNop:
|
||||
b.createNoResultOp(spv::Op::OpNop);
|
||||
break;
|
||||
case ParsedAluInstruction::Type::kVector:
|
||||
ProcessVectorAluInstruction(instr);
|
||||
break;
|
||||
case ParsedAluInstruction::Type::kScalar:
|
||||
ProcessScalarAluInstruction(instr);
|
||||
break;
|
||||
|
||||
// Close the open predicated block if this instr isn't predicated or the
|
||||
// conditions do not match.
|
||||
if (open_predicated_block_ &&
|
||||
(!instr.is_predicated ||
|
||||
instr.predicate_condition != predicated_block_cond_)) {
|
||||
b.createBranch(predicated_block_end_);
|
||||
b.setBuildPoint(predicated_block_end_);
|
||||
open_predicated_block_ = false;
|
||||
predicated_block_cond_ = false;
|
||||
predicated_block_end_ = nullptr;
|
||||
}
|
||||
|
||||
if (!open_predicated_block_ && instr.is_predicated) {
|
||||
Id pred_cond =
|
||||
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
|
||||
b.makeBoolConstant(instr.predicate_condition));
|
||||
auto block = &b.makeNewBlock();
|
||||
open_predicated_block_ = true;
|
||||
predicated_block_cond_ = instr.predicate_condition;
|
||||
predicated_block_end_ = &b.makeNewBlock();
|
||||
|
||||
b.createSelectionMerge(predicated_block_end_,
|
||||
spv::SelectionControlMaskNone);
|
||||
b.createConditionalBranch(pred_cond, block, predicated_block_end_);
|
||||
b.setBuildPoint(block);
|
||||
}
|
||||
|
||||
bool close_predicated_block_vector = false;
|
||||
bool store_vector =
|
||||
ProcessVectorAluOperation(instr, close_predicated_block_vector);
|
||||
bool close_predicated_block_scalar = false;
|
||||
bool store_scalar =
|
||||
ProcessScalarAluOperation(instr, close_predicated_block_scalar);
|
||||
|
||||
if (store_vector) {
|
||||
StoreToResult(b.createLoad(pv_), instr.vector_result);
|
||||
}
|
||||
if (store_scalar) {
|
||||
StoreToResult(b.createLoad(ps_), instr.scalar_result);
|
||||
}
|
||||
|
||||
if ((close_predicated_block_vector || close_predicated_block_scalar) &&
|
||||
open_predicated_block_) {
|
||||
b.createBranch(predicated_block_end_);
|
||||
b.setBuildPoint(predicated_block_end_);
|
||||
open_predicated_block_ = false;
|
||||
predicated_block_cond_ = false;
|
||||
predicated_block_end_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2202,45 +2245,23 @@ spv::Function* SpirvShaderTranslator::CreateCubeFunction() {
|
|||
return function;
|
||||
}
|
||||
|
||||
void SpirvShaderTranslator::ProcessVectorAluInstruction(
|
||||
const ParsedAluInstruction& instr) {
|
||||
bool SpirvShaderTranslator::ProcessVectorAluOperation(
|
||||
const ParsedAluInstruction& instr, bool& close_predicated_block) {
|
||||
close_predicated_block = false;
|
||||
|
||||
if (!instr.has_vector_op) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto& b = *builder_;
|
||||
|
||||
// Close the open predicated block if this instr isn't predicated or the
|
||||
// conditions do not match.
|
||||
if (open_predicated_block_ &&
|
||||
(!instr.is_predicated ||
|
||||
instr.predicate_condition != predicated_block_cond_)) {
|
||||
b.createBranch(predicated_block_end_);
|
||||
b.setBuildPoint(predicated_block_end_);
|
||||
open_predicated_block_ = false;
|
||||
predicated_block_cond_ = false;
|
||||
predicated_block_end_ = nullptr;
|
||||
}
|
||||
|
||||
if (!open_predicated_block_ && instr.is_predicated) {
|
||||
Id pred_cond =
|
||||
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
|
||||
b.makeBoolConstant(instr.predicate_condition));
|
||||
auto block = &b.makeNewBlock();
|
||||
open_predicated_block_ = true;
|
||||
predicated_block_cond_ = instr.predicate_condition;
|
||||
predicated_block_end_ = &b.makeNewBlock();
|
||||
|
||||
b.createSelectionMerge(predicated_block_end_,
|
||||
spv::SelectionControlMaskNone);
|
||||
b.createConditionalBranch(pred_cond, block, predicated_block_end_);
|
||||
b.setBuildPoint(block);
|
||||
}
|
||||
|
||||
// TODO: If we have identical operands, reuse previous one.
|
||||
Id sources[3] = {0};
|
||||
Id dest = vec4_float_zero_;
|
||||
for (size_t i = 0; i < instr.operand_count; i++) {
|
||||
sources[i] = LoadFromOperand(instr.operands[i]);
|
||||
for (size_t i = 0; i < instr.vector_operand_count; i++) {
|
||||
sources[i] = LoadFromOperand(instr.vector_operands[i]);
|
||||
}
|
||||
|
||||
bool close_predicated_block = false;
|
||||
switch (instr.vector_opcode) {
|
||||
case AluVectorOpcode::kAdd: {
|
||||
dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, sources[0],
|
||||
|
@ -2603,58 +2624,30 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
|
|||
assert_true(b.getTypeId(dest) == vec4_float_type_);
|
||||
if (dest) {
|
||||
b.createStore(dest, pv_);
|
||||
StoreToResult(dest, instr.result);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (close_predicated_block && open_predicated_block_) {
|
||||
b.createBranch(predicated_block_end_);
|
||||
b.setBuildPoint(predicated_block_end_);
|
||||
open_predicated_block_ = false;
|
||||
predicated_block_cond_ = false;
|
||||
predicated_block_end_ = nullptr;
|
||||
}
|
||||
bool SpirvShaderTranslator::ProcessScalarAluOperation(
|
||||
const ParsedAluInstruction& instr, bool& close_predicated_block) {
|
||||
close_predicated_block = false;
|
||||
|
||||
if (!instr.has_scalar_op) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void SpirvShaderTranslator::ProcessScalarAluInstruction(
|
||||
const ParsedAluInstruction& instr) {
|
||||
auto& b = *builder_;
|
||||
|
||||
// Close the open predicated block if this instr isn't predicated or the
|
||||
// conditions do not match.
|
||||
if (open_predicated_block_ &&
|
||||
(!instr.is_predicated ||
|
||||
instr.predicate_condition != predicated_block_cond_)) {
|
||||
b.createBranch(predicated_block_end_);
|
||||
b.setBuildPoint(predicated_block_end_);
|
||||
open_predicated_block_ = false;
|
||||
predicated_block_cond_ = false;
|
||||
predicated_block_end_ = nullptr;
|
||||
}
|
||||
|
||||
if (!open_predicated_block_ && instr.is_predicated) {
|
||||
Id pred_cond =
|
||||
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
|
||||
b.makeBoolConstant(instr.predicate_condition));
|
||||
auto block = &b.makeNewBlock();
|
||||
open_predicated_block_ = true;
|
||||
predicated_block_cond_ = instr.predicate_condition;
|
||||
predicated_block_end_ = &b.makeNewBlock();
|
||||
|
||||
b.createSelectionMerge(predicated_block_end_,
|
||||
spv::SelectionControlMaskNone);
|
||||
b.createConditionalBranch(pred_cond, block, predicated_block_end_);
|
||||
b.setBuildPoint(block);
|
||||
}
|
||||
|
||||
// TODO: If we have identical operands, reuse previous one.
|
||||
Id sources[3] = {0};
|
||||
Id dest = b.makeFloatConstant(0);
|
||||
for (size_t i = 0, x = 0; i < instr.operand_count; i++) {
|
||||
auto src = LoadFromOperand(instr.operands[i]);
|
||||
for (size_t i = 0, x = 0; i < instr.scalar_operand_count; i++) {
|
||||
auto src = LoadFromOperand(instr.scalar_operands[i]);
|
||||
|
||||
// Pull components out of the vector operands and use them as sources.
|
||||
if (instr.operands[i].component_count > 1) {
|
||||
for (int j = 0; j < instr.operands[i].component_count; j++) {
|
||||
if (instr.scalar_operands[i].component_count > 1) {
|
||||
for (int j = 0; j < instr.scalar_operands[i].component_count; j++) {
|
||||
sources[x++] = b.createCompositeExtract(src, float_type_, j);
|
||||
}
|
||||
} else {
|
||||
|
@ -2662,7 +2655,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
|
|||
}
|
||||
}
|
||||
|
||||
bool close_predicated_block = false;
|
||||
switch (instr.scalar_opcode) {
|
||||
case AluScalarOpcode::kAdds:
|
||||
case AluScalarOpcode::kAddsc0:
|
||||
|
@ -3073,16 +3065,9 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
|
|||
assert_true(b.getTypeId(dest) == float_type_);
|
||||
if (dest) {
|
||||
b.createStore(dest, ps_);
|
||||
StoreToResult(dest, instr.result);
|
||||
}
|
||||
|
||||
if (close_predicated_block && open_predicated_block_) {
|
||||
b.createBranch(predicated_block_end_);
|
||||
b.setBuildPoint(predicated_block_end_);
|
||||
open_predicated_block_ = false;
|
||||
predicated_block_cond_ = false;
|
||||
predicated_block_end_ = nullptr;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Id SpirvShaderTranslator::CreateGlslStd450InstructionCall(
|
||||
|
|
|
@ -88,8 +88,10 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
private:
|
||||
spv::Function* CreateCubeFunction();
|
||||
|
||||
void ProcessVectorAluInstruction(const ParsedAluInstruction& instr);
|
||||
void ProcessScalarAluInstruction(const ParsedAluInstruction& instr);
|
||||
bool ProcessVectorAluOperation(const ParsedAluInstruction& instr,
|
||||
bool& close_predicate_block);
|
||||
bool ProcessScalarAluOperation(const ParsedAluInstruction& instr,
|
||||
bool& close_predicate_block);
|
||||
|
||||
spv::Id BitfieldExtract(spv::Id result_type, spv::Id base, bool is_signed,
|
||||
uint32_t offset, uint32_t count);
|
||||
|
|
Loading…
Reference in New Issue