[GPU] Store ALU result after both vector and scalar instructions

This commit is contained in:
Triang3l 2019-04-20 20:25:27 +03:00
parent cd1aadef74
commit 66a9c9d812
10 changed files with 421 additions and 403 deletions

View File

@ -1057,8 +1057,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
// cubemap coordinate. // cubemap coordinate.
void ArrayCoordToCubeDirection(uint32_t reg); void ArrayCoordToCubeDirection(uint32_t reg);
void ProcessVectorAluInstruction(const ParsedAluInstruction& instr); bool ProcessVectorAluOperation(const ParsedAluInstruction& instr,
void ProcessScalarAluInstruction(const ParsedAluInstruction& instr); bool& replicate_result_x,
bool& predicate_written);
bool ProcessScalarAluOperation(const ParsedAluInstruction& instr,
bool& predicate_written);
// Appends a string to a DWORD stream, returns the DWORD-aligned length. // Appends a string to a DWORD stream, returns the DWORD-aligned length.
static uint32_t AppendString(std::vector<uint32_t>& dest, const char* source); static uint32_t AppendString(std::vector<uint32_t>& dest, const char* source);
@ -1206,7 +1209,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
// eM# in each `alloc export`, or UINT32_MAX if not used. // eM# in each `alloc export`, or UINT32_MAX if not used.
uint32_t system_temps_memexport_data_[kMaxMemExports][5]; uint32_t system_temps_memexport_data_[kMaxMemExports][5];
// Vector ALU result/scratch (since Xenos write masks can contain swizzles). // Vector ALU result or fetch scratch (since Xenos write masks can contain
// swizzles).
uint32_t system_temp_pv_; uint32_t system_temp_pv_;
// Temporary register ID for previous scalar result, program counter, // Temporary register ID for previous scalar result, program counter,
// predicate and absolute address register. // predicate and absolute address register.

View File

@ -17,29 +17,22 @@ namespace xe {
namespace gpu { namespace gpu {
using namespace ucode; using namespace ucode;
void DxbcShaderTranslator::ProcessVectorAluInstruction( bool DxbcShaderTranslator::ProcessVectorAluOperation(
const ParsedAluInstruction& instr) { const ParsedAluInstruction& instr, bool& replicate_result_x,
if (FLAGS_dxbc_source_map) { bool& predicate_written) {
instruction_disassembly_buffer_.Reset(); replicate_result_x = false;
instr.Disassemble(&instruction_disassembly_buffer_); predicate_written = false;
// Will be emitted by UpdateInstructionPredication.
}
UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition,
true);
// Whether the instruction has changed the predicate and it needs to be
// checked again later.
bool predicate_written = false;
// Whether the result is only in X and all components should be remapped to X if (!instr.has_vector_op) {
// while storing. return false;
bool replicate_result = false; }
// A small shortcut, operands of cube are the same, but swizzled. // A small shortcut, operands of cube are the same, but swizzled.
uint32_t operand_count; uint32_t operand_count;
if (instr.vector_opcode == AluVectorOpcode::kCube) { if (instr.vector_opcode == AluVectorOpcode::kCube) {
operand_count = 1; operand_count = 1;
} else { } else {
operand_count = uint32_t(instr.operand_count); operand_count = uint32_t(instr.vector_operand_count);
} }
DxbcSourceOperand dxbc_operands[3]; DxbcSourceOperand dxbc_operands[3];
// Whether the operand is the same as any previous operand, and thus is loaded // Whether the operand is the same as any previous operand, and thus is loaded
@ -47,9 +40,9 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
bool operands_duplicate[3] = {}; bool operands_duplicate[3] = {};
uint32_t operand_length_sums[3]; uint32_t operand_length_sums[3];
for (uint32_t i = 0; i < operand_count; ++i) { for (uint32_t i = 0; i < operand_count; ++i) {
const InstructionOperand& operand = instr.operands[i]; const InstructionOperand& operand = instr.vector_operands[i];
for (uint32_t j = 0; j < i; ++j) { for (uint32_t j = 0; j < i; ++j) {
if (operand == instr.operands[j]) { if (operand == instr.vector_operands[j]) {
operands_duplicate[i] = true; operands_duplicate[i] = true;
dxbc_operands[i] = dxbc_operands[j]; dxbc_operands[i] = dxbc_operands[j];
break; break;
@ -98,6 +91,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
D3D10_SB_OPCODE_MAX, D3D10_SB_OPCODE_MAX,
}; };
bool translated = true;
switch (instr.vector_opcode) { switch (instr.vector_opcode) {
case AluVectorOpcode::kAdd: case AluVectorOpcode::kAdd:
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
@ -123,7 +117,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
UseDxbcSourceOperand(dxbc_operands[1]); UseDxbcSourceOperand(dxbc_operands[1]);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) { if (!instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0), // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0),
// flushing denormals (must be done using eq - doing bitwise comparison // flushing denormals (must be done using eq - doing bitwise comparison
// doesn't flush denormals). // doesn't flush denormals).
@ -287,7 +281,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
UseDxbcSourceOperand(dxbc_operands[2]); UseDxbcSourceOperand(dxbc_operands[2]);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) { if (!instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
// If any operand is zero or denormalized, just leave the addition part. // If any operand is zero or denormalized, just leave the addition part.
uint32_t is_subnormal_temp = PushSystemTemp(); uint32_t is_subnormal_temp = PushSystemTemp();
@ -394,7 +388,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
case AluVectorOpcode::kDp4: case AluVectorOpcode::kDp4:
case AluVectorOpcode::kDp3: case AluVectorOpcode::kDp3:
case AluVectorOpcode::kDp2Add: { case AluVectorOpcode::kDp2Add: {
if (instr.operands[0].EqualsAbsolute(instr.operands[1])) { if (instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) {
// The operands are the same when calculating vector length, no need to // The operands are the same when calculating vector length, no need to
// emulate 0 * anything = 0 in this case. // emulate 0 * anything = 0 in this case.
shader_code_.push_back( shader_code_.push_back(
@ -858,7 +852,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
} break; } break;
case AluVectorOpcode::kMax4: case AluVectorOpcode::kMax4:
replicate_result = true; replicate_result_x = true;
// pv.xy = max(src0.xy, src0.zw) // pv.xy = max(src0.xy, src0.zw)
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -891,7 +885,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
case AluVectorOpcode::kSetpGtPush: case AluVectorOpcode::kSetpGtPush:
case AluVectorOpcode::kSetpGePush: case AluVectorOpcode::kSetpGePush:
predicate_written = true; predicate_written = true;
replicate_result = true; replicate_result_x = true;
// pv.xy = (src0.x == 0.0, src0.w == 0.0) // pv.xy = (src0.x == 0.0, src0.w == 0.0)
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -997,7 +991,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
case AluVectorOpcode::kKillGt: case AluVectorOpcode::kKillGt:
case AluVectorOpcode::kKillGe: case AluVectorOpcode::kKillGe:
case AluVectorOpcode::kKillNe: case AluVectorOpcode::kKillNe:
replicate_result = true; replicate_result_x = true;
// pv = src0 op src1 // pv = src0 op src1
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE( shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(
kCoreOpcodes[uint32_t(instr.vector_opcode)]) | kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
@ -1094,7 +1088,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1); UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) { if (!instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
// This is an attenuation calculation function, so infinity is probably // This is an attenuation calculation function, so infinity is probably
// not very unlikely. // not very unlikely.
@ -1277,8 +1271,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
break; break;
default: default:
assert_always(); assert_unhandled_case(instr.vector_opcode);
// Unknown instruction - don't modify pv. translated = false;
break; break;
} }
@ -1289,37 +1283,26 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
} }
} }
StoreResult(instr.result, system_temp_pv_, replicate_result, return translated;
instr.GetMemExportStreamConstant() != UINT32_MAX);
if (predicate_written) {
cf_exec_predicate_written_ = true;
CloseInstructionPredication();
}
} }
void DxbcShaderTranslator::ProcessScalarAluInstruction( bool DxbcShaderTranslator::ProcessScalarAluOperation(
const ParsedAluInstruction& instr) { const ParsedAluInstruction& instr, bool& predicate_written) {
if (FLAGS_dxbc_source_map) { predicate_written = false;
instruction_disassembly_buffer_.Reset();
instr.Disassemble(&instruction_disassembly_buffer_); if (!instr.has_scalar_op) {
// Will be emitted by UpdateInstructionPredication. return false;
} }
UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition,
true);
// Whether the instruction has changed the predicate and it needs to be
// checked again later.
bool predicate_written = false;
DxbcSourceOperand dxbc_operands[3]; DxbcSourceOperand dxbc_operands[3];
// Whether the operand is the same as any previous operand, and thus is loaded // Whether the operand is the same as any previous operand, and thus is loaded
// only once. // only once.
bool operands_duplicate[3] = {}; bool operands_duplicate[3] = {};
uint32_t operand_lengths[3]; uint32_t operand_lengths[3];
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) { for (uint32_t i = 0; i < uint32_t(instr.scalar_operand_count); ++i) {
const InstructionOperand& operand = instr.operands[i]; const InstructionOperand& operand = instr.scalar_operands[i];
for (uint32_t j = 0; j < i; ++j) { for (uint32_t j = 0; j < i; ++j) {
if (operand == instr.operands[j]) { if (operand == instr.scalar_operands[j]) {
operands_duplicate[i] = true; operands_duplicate[i] = true;
dxbc_operands[i] = dxbc_operands[j]; dxbc_operands[i] = dxbc_operands[j];
break; break;
@ -1385,6 +1368,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
D3D10_SB_OPCODE_SINCOS, D3D10_SB_OPCODE_SINCOS,
}; };
bool translated = true;
switch (instr.scalar_opcode) { switch (instr.scalar_opcode) {
case AluScalarOpcode::kAdds: case AluScalarOpcode::kAdds:
case AluScalarOpcode::kSubs: { case AluScalarOpcode::kSubs: {
@ -1431,7 +1415,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1); UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
if (instr.operands[0].components[0] != instr.operands[0].components[1]) { if (instr.scalar_operands[0].components[0] !=
instr.scalar_operands[0].components[1]) {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
uint32_t is_subnormal_temp = PushSystemTemp(); uint32_t is_subnormal_temp = PushSystemTemp();
// Get the non-NaN multiplicand closer to zero to check if any of them // Get the non-NaN multiplicand closer to zero to check if any of them
@ -1679,7 +1664,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
case AluScalarOpcode::kMaxs: case AluScalarOpcode::kMaxs:
case AluScalarOpcode::kMins: { case AluScalarOpcode::kMins: {
// max is commonly used as mov. // max is commonly used as mov.
if (instr.operands[0].components[0] == instr.operands[0].components[1]) { if (instr.scalar_operands[0].components[0] ==
instr.scalar_operands[0].components[1]) {
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 +
@ -1990,7 +1976,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.conversion_instruction_count; ++stat_.conversion_instruction_count;
// The `ps = max(src0.x, src0.y)` part. // The `ps = max(src0.x, src0.y)` part.
if (instr.operands[0].components[0] == instr.operands[0].components[1]) { if (instr.scalar_operands[0].components[0] ==
instr.scalar_operands[0].components[1]) {
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 +
@ -2308,7 +2295,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0); UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) { if (!instr.scalar_operands[0].EqualsAbsolute(instr.scalar_operands[1])) {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
uint32_t is_subnormal_temp = PushSystemTemp(); uint32_t is_subnormal_temp = PushSystemTemp();
// Get the non-NaN multiplicand closer to zero to check if any of them // Get the non-NaN multiplicand closer to zero to check if any of them
@ -2407,38 +2394,62 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
++stat_.float_instruction_count; ++stat_.float_instruction_count;
} break; } break;
case AluScalarOpcode::kRetainPrev:
// No changes, but translated successfully (just write the old ps).
break;
default: default:
// May be retain_prev, in this case the current ps should be written, or assert_unhandled_case(instr.scalar_opcode);
// something invalid that's better to ignore. translated = false;
assert_true(instr.scalar_opcode == AluScalarOpcode::kRetainPrev);
break; break;
} }
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) { for (uint32_t i = 0; i < uint32_t(instr.scalar_operand_count); ++i) {
UnloadDxbcSourceOperand(dxbc_operands[instr.operand_count - 1 - i]); UnloadDxbcSourceOperand(dxbc_operands[instr.scalar_operand_count - 1 - i]);
} }
StoreResult(instr.result, system_temp_ps_pc_p0_a0_, true); return translated;
}
if (predicate_written) { void DxbcShaderTranslator::ProcessAluInstruction(
const ParsedAluInstruction& instr) {
if (instr.is_nop()) {
return;
}
if (FLAGS_dxbc_source_map) {
instruction_disassembly_buffer_.Reset();
instr.Disassemble(&instruction_disassembly_buffer_);
// Will be emitted by UpdateInstructionPredication.
}
UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition,
true);
// Whether the instruction has changed the predicate and it needs to be
// checked again later.
bool predicate_written_vector = false;
// Whether the result is only in X and all components should be remapped to X
// while storing.
bool replicate_vector_x = false;
bool store_vector = ProcessVectorAluOperation(instr, replicate_vector_x,
predicate_written_vector);
bool predicate_written_scalar = false;
bool store_scalar =
ProcessScalarAluOperation(instr, predicate_written_scalar);
if (store_vector) {
StoreResult(instr.vector_result, system_temp_pv_, replicate_vector_x,
instr.GetMemExportStreamConstant() != UINT32_MAX);
}
if (store_scalar) {
StoreResult(instr.scalar_result, system_temp_ps_pc_p0_a0_, true);
}
if (predicate_written_vector || predicate_written_scalar) {
cf_exec_predicate_written_ = true; cf_exec_predicate_written_ = true;
CloseInstructionPredication(); CloseInstructionPredication();
} }
} }
void DxbcShaderTranslator::ProcessAluInstruction(
const ParsedAluInstruction& instr) {
switch (instr.type) {
case ParsedAluInstruction::Type::kNop:
break;
case ParsedAluInstruction::Type::kVector:
ProcessVectorAluInstruction(instr);
break;
case ParsedAluInstruction::Type::kScalar:
ProcessScalarAluInstruction(instr);
break;
}
}
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe

View File

@ -820,18 +820,36 @@ void GlslShaderTranslator::ProcessTextureFetchInstruction(
void GlslShaderTranslator::ProcessAluInstruction( void GlslShaderTranslator::ProcessAluInstruction(
const ParsedAluInstruction& instr) { const ParsedAluInstruction& instr) {
EmitSource("// "); EmitSource("/*\n");
instr.Disassemble(&source_); instr.Disassemble(&source_);
EmitSource("*/\n");
switch (instr.type) { if (instr.is_nop()) {
case ParsedAluInstruction::Type::kNop: return;
break; }
case ParsedAluInstruction::Type::kVector:
ProcessVectorAluInstruction(instr); // Emit if statement only if we have a different predicate condition than our
break; // containing block.
case ParsedAluInstruction::Type::kScalar: bool conditional = false;
ProcessScalarAluInstruction(instr); if (instr.is_predicated &&
break; (!cf_exec_pred_ || (cf_exec_pred_cond_ != instr.predicate_condition))) {
conditional = true;
EmitSourceDepth("if (%cp0) {\n", instr.predicate_condition ? ' ' : '!');
Indent();
}
bool store_vector = ProcessVectorAluOperation(instr);
bool store_scalar = ProcessScalarAluOperation(instr);
if (store_vector) {
EmitStoreVectorResult(instr.vector_result);
}
if (store_scalar) {
EmitStoreScalarResult(instr.scalar_result);
}
if (conditional) {
Unindent();
EmitSourceDepth("}\n");
} }
} }
@ -1041,20 +1059,14 @@ void GlslShaderTranslator::EmitStoreResult(const InstructionResult& result,
EmitSource(";\n"); EmitSource(";\n");
} }
void GlslShaderTranslator::ProcessVectorAluInstruction( bool GlslShaderTranslator::ProcessVectorAluOperation(
const ParsedAluInstruction& instr) { const ParsedAluInstruction& instr) {
// Emit if statement only if we have a different predicate condition than our if (!instr.has_vector_op) {
// containing block. return false;
bool conditional = false;
if (instr.is_predicated &&
(!cf_exec_pred_ || (cf_exec_pred_cond_ != instr.predicate_condition))) {
conditional = true;
EmitSourceDepth("if (%cp0) {\n", instr.predicate_condition ? ' ' : '!');
Indent();
} }
for (size_t i = 0; i < instr.operand_count; ++i) { for (size_t i = 0; i < instr.vector_operand_count; ++i) {
EmitLoadOperand(i, instr.operands[i]); EmitLoadOperand(i, instr.vector_operands[i]);
} }
switch (instr.vector_opcode) { switch (instr.vector_opcode) {
@ -1251,26 +1263,17 @@ void GlslShaderTranslator::ProcessVectorAluInstruction(
break; break;
} }
EmitStoreVectorResult(instr.result); return true;
if (conditional) {
Unindent();
EmitSourceDepth("}\n");
}
} }
void GlslShaderTranslator::ProcessScalarAluInstruction( bool GlslShaderTranslator::ProcessScalarAluOperation(
const ParsedAluInstruction& instr) { const ParsedAluInstruction& instr) {
bool conditional = false; if (!instr.has_scalar_op) {
if (instr.is_predicated && return false;
(!cf_exec_pred_ || (cf_exec_pred_cond_ != instr.predicate_condition))) {
conditional = true;
EmitSourceDepth("if (%cp0) {\n", instr.predicate_condition ? ' ' : '!');
Indent();
} }
for (size_t i = 0; i < instr.operand_count; ++i) { for (size_t i = 0; i < instr.scalar_operand_count; ++i) {
EmitLoadOperand(i, instr.operands[i]); EmitLoadOperand(i, instr.scalar_operands[i]);
} }
switch (instr.scalar_opcode) { switch (instr.scalar_opcode) {
@ -1595,12 +1598,7 @@ void GlslShaderTranslator::ProcessScalarAluInstruction(
break; break;
} }
EmitStoreScalarResult(instr.result); return true;
if (conditional) {
Unindent();
EmitSourceDepth("}\n");
}
} }
} // namespace gpu } // namespace gpu

View File

@ -77,8 +77,8 @@ class GlslShaderTranslator : public ShaderTranslator {
bool cf_exec_pred_ = false; bool cf_exec_pred_ = false;
bool cf_exec_pred_cond_ = false; bool cf_exec_pred_cond_ = false;
void ProcessVectorAluInstruction(const ParsedAluInstruction& instr); bool ProcessVectorAluOperation(const ParsedAluInstruction& instr);
void ProcessScalarAluInstruction(const ParsedAluInstruction& instr); bool ProcessScalarAluOperation(const ParsedAluInstruction& instr);
}; };
} // namespace gpu } // namespace gpu

View File

@ -459,51 +459,62 @@ struct ParsedAluInstruction {
// Index into the ucode dword source. // Index into the ucode dword source.
uint32_t dword_index = 0; uint32_t dword_index = 0;
enum class Type { // True if the vector part of the instruction needs to be executed and data
kNop, // about it in this structure is valid.
kVector, bool has_vector_op = false;
kScalar, // True if the scalar part of the instruction needs to be executed and data
}; // about it in this structure is valid.
// Type of the instruction. bool has_scalar_op = false;
Type type = Type::kNop; bool is_nop() const { return !has_vector_op && !has_scalar_op; }
bool is_nop() const { return type == Type::kNop; }
bool is_vector_type() const { return type == Type::kVector; } // Opcode for the vector part of the instruction.
bool is_scalar_type() const { return type == Type::kScalar; } ucode::AluVectorOpcode vector_opcode = ucode::AluVectorOpcode::kAdd;
// Opcode for the instruction if it is a vector type. // Opcode for the scalar part of the instruction.
ucode::AluVectorOpcode vector_opcode = ucode::AluVectorOpcode::kAdd; ucode::AluScalarOpcode scalar_opcode = ucode::AluScalarOpcode::kAdds;
// Opcode for the instruction if it is a scalar type. // Friendly name of the vector instruction.
ucode::AluScalarOpcode scalar_opcode = ucode::AluScalarOpcode::kAdds; const char* vector_opcode_name = nullptr;
// Friendly name of the instruction. // Friendly name of the scalar instruction.
const char* opcode_name = nullptr; const char* scalar_opcode_name = nullptr;
// True if the instruction is paired with another instruction.
bool is_paired = false;
// True if the instruction is predicated on the specified // True if the instruction is predicated on the specified
// predicate_condition. // predicate_condition.
bool is_predicated = false; bool is_predicated = false;
// Expected predication condition value if predicated. // Expected predication condition value if predicated.
bool predicate_condition = false; bool predicate_condition = false;
// Describes how the instruction result is stored. // Describes how the vector operation result is stored.
InstructionResult result; InstructionResult vector_result;
// Describes how the scalar operation result is stored.
InstructionResult scalar_result;
// Both operations must be executed before any result is stored if vector and
// scalar operations are paired. There are cases of vector result being used
// as scalar operand or vice versa (the halo on Avalanche in Halo 3, for
// example), in this case there must be no dependency between the two
// operations.
// Number of source operands. // Number of source operands of the vector operation.
size_t operand_count = 0; size_t vector_operand_count = 0;
// Describes each source operand. // Describes each source operand of the vector operation.
InstructionOperand operands[3]; InstructionOperand vector_operands[3];
// Number of source operands of the scalar operation.
size_t scalar_operand_count = 0;
// Describes each source operand of the scalar operation.
InstructionOperand scalar_operands[2];
// If this is a valid eA write (MAD with a stream constant), returns the index // If this is a valid eA write (MAD with a stream constant), returns the index
// of the stream float constant, otherwise returns UINT32_MAX. // of the stream float constant, otherwise returns UINT32_MAX.
uint32_t GetMemExportStreamConstant() const { uint32_t GetMemExportStreamConstant() const {
if (result.storage_target == InstructionStorageTarget::kExportAddress && if (has_vector_op &&
is_vector_type() && vector_opcode == ucode::AluVectorOpcode::kMad && vector_result.storage_target ==
result.has_all_writes() && InstructionStorageTarget::kExportAddress &&
operands[2].storage_source == vector_opcode == ucode::AluVectorOpcode::kMad &&
vector_result.has_all_writes() &&
vector_operands[2].storage_source ==
InstructionStorageSource::kConstantFloat && InstructionStorageSource::kConstantFloat &&
operands[2].storage_addressing_mode == vector_operands[2].storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic && InstructionStorageAddressingMode::kStatic &&
operands[2].is_standard_swizzle()) { vector_operands[2].is_standard_swizzle()) {
return operands[2].storage_index; return vector_operands[2].storage_index;
} }
return UINT32_MAX; return UINT32_MAX;
} }

View File

@ -1124,28 +1124,19 @@ const ShaderTranslator::AluOpcodeInfo
}; };
void ShaderTranslator::TranslateAluInstruction(const AluInstruction& op) { void ShaderTranslator::TranslateAluInstruction(const AluInstruction& op) {
if (!op.has_vector_op() && !op.has_scalar_op()) {
ParsedAluInstruction instr;
instr.type = ParsedAluInstruction::Type::kNop;
instr.Disassemble(&ucode_disasm_buffer_);
ProcessAluInstruction(instr);
return;
}
ParsedAluInstruction instr; ParsedAluInstruction instr;
if (op.has_vector_op()) {
const auto& opcode_info =
alu_vector_opcode_infos_[static_cast<int>(op.vector_opcode())];
ParseAluVectorInstruction(op, opcode_info, instr);
ProcessAluInstruction(instr);
}
if (op.has_scalar_op()) { instr.dword_index = 0;
const auto& opcode_info =
alu_scalar_opcode_infos_[static_cast<int>(op.scalar_opcode())]; instr.is_predicated = op.is_predicated();
ParseAluScalarInstruction(op, opcode_info, instr); instr.predicate_condition = op.predicate_condition();
ProcessAluInstruction(instr);
} ParseAluVectorOperation(op, instr);
ParseAluScalarOperation(op, instr);
instr.Disassemble(&ucode_disasm_buffer_);
ProcessAluInstruction(instr);
} }
void ParseAluInstructionOperand(const AluInstruction& op, int i, void ParseAluInstructionOperand(const AluInstruction& op, int i,
@ -1238,62 +1229,64 @@ void ParseAluInstructionOperandSpecial(const AluInstruction& op,
out_op->components[0] = GetSwizzleFromComponentIndex(a); out_op->components[0] = GetSwizzleFromComponentIndex(a);
} }
void ShaderTranslator::ParseAluVectorInstruction( void ShaderTranslator::ParseAluVectorOperation(const AluInstruction& op,
const AluInstruction& op, const AluOpcodeInfo& opcode_info, ParsedAluInstruction& i) {
ParsedAluInstruction& i) { i.has_vector_op = op.has_vector_op();
i.dword_index = 0; if (!i.has_vector_op) {
i.type = ParsedAluInstruction::Type::kVector; return;
}
i.vector_opcode = op.vector_opcode(); i.vector_opcode = op.vector_opcode();
i.opcode_name = opcode_info.name; const auto& opcode_info =
i.is_paired = op.has_scalar_op(); alu_vector_opcode_infos_[static_cast<int>(op.vector_opcode())];
i.is_predicated = op.is_predicated(); i.vector_opcode_name = opcode_info.name;
i.predicate_condition = op.predicate_condition();
i.result.is_export = op.is_export(); i.vector_result.is_export = op.is_export();
i.result.is_clamped = op.vector_clamp(); i.vector_result.is_clamped = op.vector_clamp();
i.result.storage_target = InstructionStorageTarget::kRegister; i.vector_result.storage_target = InstructionStorageTarget::kRegister;
i.result.storage_index = 0; i.vector_result.storage_index = 0;
uint32_t dest_num = op.vector_dest(); uint32_t dest_num = op.vector_dest();
if (!op.is_export()) { if (!op.is_export()) {
assert_true(dest_num < 32); assert_true(dest_num < 32);
i.result.storage_target = InstructionStorageTarget::kRegister; i.vector_result.storage_target = InstructionStorageTarget::kRegister;
i.result.storage_index = dest_num; i.vector_result.storage_index = dest_num;
i.result.storage_addressing_mode = i.vector_result.storage_addressing_mode =
op.is_vector_dest_relative() op.is_vector_dest_relative()
? InstructionStorageAddressingMode::kAddressRelative ? InstructionStorageAddressingMode::kAddressRelative
: InstructionStorageAddressingMode::kStatic; : InstructionStorageAddressingMode::kStatic;
} else if (is_vertex_shader()) { } else if (is_vertex_shader()) {
switch (dest_num) { switch (dest_num) {
case 32: case 32:
i.result.storage_target = InstructionStorageTarget::kExportAddress; i.vector_result.storage_target =
InstructionStorageTarget::kExportAddress;
break; break;
case 33: case 33:
case 34: case 34:
case 35: case 35:
case 36: case 36:
case 37: case 37:
i.result.storage_index = dest_num - 33; i.vector_result.storage_index = dest_num - 33;
i.result.storage_target = InstructionStorageTarget::kExportData; i.vector_result.storage_target = InstructionStorageTarget::kExportData;
break; break;
case 62: case 62:
i.result.storage_target = InstructionStorageTarget::kPosition; i.vector_result.storage_target = InstructionStorageTarget::kPosition;
break; break;
case 63: case 63:
i.result.storage_target = InstructionStorageTarget::kPointSize; i.vector_result.storage_target = InstructionStorageTarget::kPointSize;
break; break;
default: default:
if (dest_num < 16) { if (dest_num < 16) {
i.result.storage_target = InstructionStorageTarget::kInterpolant; i.vector_result.storage_target =
i.result.storage_index = dest_num; InstructionStorageTarget::kInterpolant;
i.vector_result.storage_index = dest_num;
} else { } else {
// Unimplemented. // Unimplemented.
// assert_always(); // assert_always();
XELOGE( XELOGE(
"ShaderTranslator::ParseAluVectorInstruction: Unsupported write " "ShaderTranslator::ParseAluVectorOperation: Unsupported write to "
"to export %d", "export %d",
dest_num); dest_num);
i.result.storage_target = InstructionStorageTarget::kNone; i.vector_result.storage_target = InstructionStorageTarget::kNone;
i.result.storage_index = 0; i.vector_result.storage_index = 0;
} }
break; break;
} }
@ -1301,42 +1294,43 @@ void ShaderTranslator::ParseAluVectorInstruction(
switch (dest_num) { switch (dest_num) {
case 0: case 0:
case 63: // ? masked? case 63: // ? masked?
i.result.storage_target = InstructionStorageTarget::kColorTarget; i.vector_result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 0; i.vector_result.storage_index = 0;
break; break;
case 1: case 1:
i.result.storage_target = InstructionStorageTarget::kColorTarget; i.vector_result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 1; i.vector_result.storage_index = 1;
break; break;
case 2: case 2:
i.result.storage_target = InstructionStorageTarget::kColorTarget; i.vector_result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 2; i.vector_result.storage_index = 2;
break; break;
case 3: case 3:
i.result.storage_target = InstructionStorageTarget::kColorTarget; i.vector_result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 3; i.vector_result.storage_index = 3;
break; break;
case 32: case 32:
i.result.storage_target = InstructionStorageTarget::kExportAddress; i.vector_result.storage_target =
InstructionStorageTarget::kExportAddress;
break; break;
case 33: case 33:
case 34: case 34:
case 35: case 35:
case 36: case 36:
case 37: case 37:
i.result.storage_index = dest_num - 33; i.vector_result.storage_index = dest_num - 33;
i.result.storage_target = InstructionStorageTarget::kExportData; i.vector_result.storage_target = InstructionStorageTarget::kExportData;
break; break;
case 61: case 61:
i.result.storage_target = InstructionStorageTarget::kDepth; i.vector_result.storage_target = InstructionStorageTarget::kDepth;
break; break;
default: default:
XELOGE( XELOGE(
"ShaderTranslator::ParseAluVectorInstruction: Unsupported write " "ShaderTranslator::ParseAluVectorOperation: Unsupported write to "
"to export %d", "export %d",
dest_num); dest_num);
i.result.storage_target = InstructionStorageTarget::kNone; i.vector_result.storage_target = InstructionStorageTarget::kNone;
i.result.storage_index = 0; i.vector_result.storage_index = 0;
} }
} }
if (op.is_export()) { if (op.is_export()) {
@ -1344,22 +1338,22 @@ void ShaderTranslator::ParseAluVectorInstruction(
uint32_t const_1_mask = op.scalar_write_mask(); uint32_t const_1_mask = op.scalar_write_mask();
if (!write_mask) { if (!write_mask) {
for (int j = 0; j < 4; ++j) { for (int j = 0; j < 4; ++j) {
i.result.write_mask[j] = false; i.vector_result.write_mask[j] = false;
} }
} else { } else {
for (int j = 0; j < 4; ++j, write_mask >>= 1, const_1_mask >>= 1) { for (int j = 0; j < 4; ++j, write_mask >>= 1, const_1_mask >>= 1) {
i.result.write_mask[j] = true; i.vector_result.write_mask[j] = true;
if (write_mask & 0x1) { if (write_mask & 0x1) {
if (const_1_mask & 0x1) { if (const_1_mask & 0x1) {
i.result.components[j] = SwizzleSource::k1; i.vector_result.components[j] = SwizzleSource::k1;
} else { } else {
i.result.components[j] = GetSwizzleFromComponentIndex(j); i.vector_result.components[j] = GetSwizzleFromComponentIndex(j);
} }
} else { } else {
if (op.is_scalar_dest_relative()) { if (op.is_scalar_dest_relative()) {
i.result.components[j] = SwizzleSource::k0; i.vector_result.components[j] = SwizzleSource::k0;
} else { } else {
i.result.write_mask[j] = false; i.vector_result.write_mask[j] = false;
} }
} }
} }
@ -1367,45 +1361,44 @@ void ShaderTranslator::ParseAluVectorInstruction(
} else { } else {
uint32_t write_mask = op.vector_write_mask(); uint32_t write_mask = op.vector_write_mask();
for (int j = 0; j < 4; ++j, write_mask >>= 1) { for (int j = 0; j < 4; ++j, write_mask >>= 1) {
i.result.write_mask[j] = (write_mask & 0x1) == 0x1; i.vector_result.write_mask[j] = (write_mask & 0x1) == 0x1;
i.result.components[j] = GetSwizzleFromComponentIndex(j); i.vector_result.components[j] = GetSwizzleFromComponentIndex(j);
} }
} }
i.operand_count = opcode_info.argument_count; i.vector_operand_count = opcode_info.argument_count;
for (int j = 0; j < i.operand_count; ++j) { for (int j = 0; j < i.vector_operand_count; ++j) {
ParseAluInstructionOperand( ParseAluInstructionOperand(op, j + 1,
op, j + 1, opcode_info.src_swizzle_component_count, &i.operands[j]); opcode_info.src_swizzle_component_count,
&i.vector_operands[j]);
// Track constant float register loads. // Track constant float register loads.
if (i.operands[j].storage_source == if (i.vector_operands[j].storage_source ==
InstructionStorageSource::kConstantFloat) { InstructionStorageSource::kConstantFloat) {
if (i.operands[j].storage_addressing_mode != if (i.vector_operands[j].storage_addressing_mode !=
InstructionStorageAddressingMode::kStatic) { InstructionStorageAddressingMode::kStatic) {
// Dynamic addressing makes all constants required. // Dynamic addressing makes all constants required.
std::memset(constant_register_map_.float_bitmap, 0xFF, std::memset(constant_register_map_.float_bitmap, 0xFF,
sizeof(constant_register_map_.float_bitmap)); sizeof(constant_register_map_.float_bitmap));
} else { } else {
auto register_index = i.operands[j].storage_index; auto register_index = i.vector_operands[j].storage_index;
constant_register_map_.float_bitmap[register_index / 64] |= constant_register_map_.float_bitmap[register_index / 64] |=
1ull << (register_index % 64); 1ull << (register_index % 64);
} }
} }
} }
i.Disassemble(&ucode_disasm_buffer_);
} }
void ShaderTranslator::ParseAluScalarInstruction( void ShaderTranslator::ParseAluScalarOperation(const AluInstruction& op,
const AluInstruction& op, const AluOpcodeInfo& opcode_info, ParsedAluInstruction& i) {
ParsedAluInstruction& i) { i.has_scalar_op = op.has_scalar_op();
i.dword_index = 0; if (!i.has_scalar_op) {
i.type = ParsedAluInstruction::Type::kScalar; return;
}
i.scalar_opcode = op.scalar_opcode(); i.scalar_opcode = op.scalar_opcode();
i.opcode_name = opcode_info.name; const auto& opcode_info =
i.is_paired = op.has_vector_op(); alu_scalar_opcode_infos_[static_cast<int>(op.scalar_opcode())];
i.is_predicated = op.is_predicated(); i.scalar_opcode_name = opcode_info.name;
i.predicate_condition = op.predicate_condition();
uint32_t dest_num; uint32_t dest_num;
uint32_t write_mask; uint32_t write_mask;
@ -1416,50 +1409,52 @@ void ShaderTranslator::ParseAluScalarInstruction(
dest_num = op.scalar_dest(); dest_num = op.scalar_dest();
write_mask = op.scalar_write_mask(); write_mask = op.scalar_write_mask();
} }
i.result.is_export = op.is_export(); i.scalar_result.is_export = op.is_export();
i.result.is_clamped = op.scalar_clamp(); i.scalar_result.is_clamped = op.scalar_clamp();
i.result.storage_target = InstructionStorageTarget::kRegister; i.scalar_result.storage_target = InstructionStorageTarget::kRegister;
i.result.storage_index = 0; i.scalar_result.storage_index = 0;
if (!op.is_export()) { if (!op.is_export()) {
assert_true(dest_num < 32); assert_true(dest_num < 32);
i.result.storage_target = InstructionStorageTarget::kRegister; i.scalar_result.storage_target = InstructionStorageTarget::kRegister;
i.result.storage_index = dest_num; i.scalar_result.storage_index = dest_num;
i.result.storage_addressing_mode = i.scalar_result.storage_addressing_mode =
op.is_scalar_dest_relative() op.is_scalar_dest_relative()
? InstructionStorageAddressingMode::kAddressRelative ? InstructionStorageAddressingMode::kAddressRelative
: InstructionStorageAddressingMode::kStatic; : InstructionStorageAddressingMode::kStatic;
} else if (is_vertex_shader()) { } else if (is_vertex_shader()) {
switch (dest_num) { switch (dest_num) {
case 32: case 32:
i.result.storage_target = InstructionStorageTarget::kExportAddress; i.scalar_result.storage_target =
InstructionStorageTarget::kExportAddress;
break; break;
case 33: case 33:
case 34: case 34:
case 35: case 35:
case 36: case 36:
case 37: case 37:
i.result.storage_index = dest_num - 33; i.scalar_result.storage_index = dest_num - 33;
i.result.storage_target = InstructionStorageTarget::kExportData; i.scalar_result.storage_target = InstructionStorageTarget::kExportData;
break; break;
case 62: case 62:
i.result.storage_target = InstructionStorageTarget::kPosition; i.scalar_result.storage_target = InstructionStorageTarget::kPosition;
break; break;
case 63: case 63:
i.result.storage_target = InstructionStorageTarget::kPointSize; i.scalar_result.storage_target = InstructionStorageTarget::kPointSize;
break; break;
default: default:
if (dest_num < 16) { if (dest_num < 16) {
i.result.storage_target = InstructionStorageTarget::kInterpolant; i.scalar_result.storage_target =
i.result.storage_index = dest_num; InstructionStorageTarget::kInterpolant;
i.scalar_result.storage_index = dest_num;
} else { } else {
// Unimplemented. // Unimplemented.
// assert_always(); // assert_always();
XELOGE( XELOGE(
"ShaderTranslator::ParseAluScalarInstruction: Unsupported write " "ShaderTranslator::ParseAluScalarOperation: Unsupported write to "
"to export %d", "export %d",
dest_num); dest_num);
i.result.storage_target = InstructionStorageTarget::kNone; i.scalar_result.storage_target = InstructionStorageTarget::kNone;
i.result.storage_index = 0; i.scalar_result.storage_index = 0;
} }
break; break;
} }
@ -1467,46 +1462,47 @@ void ShaderTranslator::ParseAluScalarInstruction(
switch (dest_num) { switch (dest_num) {
case 0: case 0:
case 63: // ? masked? case 63: // ? masked?
i.result.storage_target = InstructionStorageTarget::kColorTarget; i.scalar_result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 0; i.scalar_result.storage_index = 0;
break; break;
case 1: case 1:
i.result.storage_target = InstructionStorageTarget::kColorTarget; i.scalar_result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 1; i.scalar_result.storage_index = 1;
break; break;
case 2: case 2:
i.result.storage_target = InstructionStorageTarget::kColorTarget; i.scalar_result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 2; i.scalar_result.storage_index = 2;
break; break;
case 3: case 3:
i.result.storage_target = InstructionStorageTarget::kColorTarget; i.scalar_result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 3; i.scalar_result.storage_index = 3;
break; break;
case 32: case 32:
i.result.storage_target = InstructionStorageTarget::kExportAddress; i.scalar_result.storage_target =
InstructionStorageTarget::kExportAddress;
break; break;
case 33: case 33:
case 34: case 34:
case 35: case 35:
case 36: case 36:
case 37: case 37:
i.result.storage_index = dest_num - 33; i.scalar_result.storage_index = dest_num - 33;
i.result.storage_target = InstructionStorageTarget::kExportData; i.scalar_result.storage_target = InstructionStorageTarget::kExportData;
break; break;
case 61: case 61:
i.result.storage_target = InstructionStorageTarget::kDepth; i.scalar_result.storage_target = InstructionStorageTarget::kDepth;
break; break;
} }
} }
for (int j = 0; j < 4; ++j, write_mask >>= 1) { for (int j = 0; j < 4; ++j, write_mask >>= 1) {
i.result.write_mask[j] = (write_mask & 0x1) == 0x1; i.scalar_result.write_mask[j] = (write_mask & 0x1) == 0x1;
i.result.components[j] = GetSwizzleFromComponentIndex(j); i.scalar_result.components[j] = GetSwizzleFromComponentIndex(j);
} }
i.operand_count = opcode_info.argument_count; i.scalar_operand_count = opcode_info.argument_count;
if (opcode_info.argument_count == 1) { if (opcode_info.argument_count == 1) {
ParseAluInstructionOperand(op, 3, opcode_info.src_swizzle_component_count, ParseAluInstructionOperand(op, 3, opcode_info.src_swizzle_component_count,
&i.operands[0]); &i.scalar_operands[0]);
} else { } else {
uint32_t src3_swizzle = op.src_swizzle(3); uint32_t src3_swizzle = op.src_swizzle(3);
uint32_t swiz_a = ((src3_swizzle >> 6) + 3) & 0x3; uint32_t swiz_a = ((src3_swizzle >> 6) + 3) & 0x3;
@ -1518,19 +1514,19 @@ void ShaderTranslator::ParseAluScalarInstruction(
ParseAluInstructionOperandSpecial( ParseAluInstructionOperandSpecial(
op, InstructionStorageSource::kConstantFloat, op.src_reg(3), op, InstructionStorageSource::kConstantFloat, op.src_reg(3),
op.src_negate(3), 0, swiz_a, &i.operands[0]); op.src_negate(3), 0, swiz_a, &i.scalar_operands[0]);
ParseAluInstructionOperandSpecial(op, InstructionStorageSource::kRegister, ParseAluInstructionOperandSpecial(op, InstructionStorageSource::kRegister,
reg2, op.src_negate(3), const_slot, reg2, op.src_negate(3), const_slot,
swiz_b, &i.operands[1]); swiz_b, &i.scalar_operands[1]);
} }
// Track constant float register loads - in either case, a float constant may // Track constant float register loads - in either case, a float constant may
// be used in operand 0. // be used in operand 0.
if (i.operands[0].storage_source == if (i.scalar_operands[0].storage_source ==
InstructionStorageSource::kConstantFloat) { InstructionStorageSource::kConstantFloat) {
auto register_index = i.operands[0].storage_index; auto register_index = i.scalar_operands[0].storage_index;
if (i.operands[0].storage_addressing_mode != if (i.scalar_operands[0].storage_addressing_mode !=
InstructionStorageAddressingMode::kStatic) { InstructionStorageAddressingMode::kStatic) {
// Dynamic addressing makes all constants required. // Dynamic addressing makes all constants required.
std::memset(constant_register_map_.float_bitmap, 0xFF, std::memset(constant_register_map_.float_bitmap, 0xFF,
@ -1540,8 +1536,6 @@ void ShaderTranslator::ParseAluScalarInstruction(
1ull << (register_index % 64); 1ull << (register_index % 64);
} }
} }
i.Disassemble(&ucode_disasm_buffer_);
} }
} // namespace gpu } // namespace gpu

View File

@ -208,12 +208,10 @@ class ShaderTranslator {
ParsedTextureFetchInstruction* out_instr); ParsedTextureFetchInstruction* out_instr);
void TranslateAluInstruction(const ucode::AluInstruction& op); void TranslateAluInstruction(const ucode::AluInstruction& op);
void ParseAluVectorInstruction(const ucode::AluInstruction& op, void ParseAluVectorOperation(const ucode::AluInstruction& op,
const AluOpcodeInfo& opcode_info, ParsedAluInstruction& instr);
ParsedAluInstruction& instr); void ParseAluScalarOperation(const ucode::AluInstruction& op,
void ParseAluScalarInstruction(const ucode::AluInstruction& op, ParsedAluInstruction& instr);
const AluOpcodeInfo& opcode_info,
ParsedAluInstruction& instr);
// Input shader metadata and microcode. // Input shader metadata and microcode.
ShaderType shader_type_; ShaderType shader_type_;

View File

@ -454,29 +454,44 @@ void ParsedAluInstruction::Disassemble(StringBuffer* out) const {
out->Append(" nop\n"); out->Append(" nop\n");
return; return;
} }
if (is_scalar_type() && is_paired) { if (has_vector_op) {
out->Append(" + ");
} else {
out->Append(" "); out->Append(" ");
if (is_predicated) {
out->Append(predicate_condition ? " (p0) " : "(!p0) ");
} else {
out->Append(" ");
}
out->Append(vector_opcode_name);
if (vector_result.is_clamped) {
out->Append("_sat");
}
out->Append(' ');
DisassembleResultOperand(vector_result, out);
for (int i = 0; i < vector_operand_count; ++i) {
out->Append(", ");
DisassembleSourceOperand(vector_operands[i], out);
}
out->Append('\n');
} }
if (is_predicated) { if (has_scalar_op) {
out->Append(predicate_condition ? " (p0) " : "(!p0) "); out->Append(has_vector_op ? " + " : " ");
} else { if (is_predicated) {
out->Append(" "); out->Append(predicate_condition ? " (p0) " : "(!p0) ");
} else {
out->Append(" ");
}
out->Append(scalar_opcode_name);
if (scalar_result.is_clamped) {
out->Append("_sat");
}
out->Append(' ');
DisassembleResultOperand(scalar_result, out);
for (int i = 0; i < scalar_operand_count; ++i) {
out->Append(", ");
DisassembleSourceOperand(scalar_operands[i], out);
}
out->Append('\n');
} }
out->Append(opcode_name);
if (result.is_clamped) {
out->Append("_sat");
}
out->Append(' ');
DisassembleResultOperand(result, out);
for (int i = 0; i < operand_count; ++i) {
out->Append(", ");
DisassembleSourceOperand(operands[i], out);
}
out->Append('\n');
} }
} // namespace gpu } // namespace gpu

View File

@ -2000,17 +2000,60 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
void SpirvShaderTranslator::ProcessAluInstruction( void SpirvShaderTranslator::ProcessAluInstruction(
const ParsedAluInstruction& instr) { const ParsedAluInstruction& instr) {
if (instr.is_nop()) {
return;
}
auto& b = *builder_; auto& b = *builder_;
switch (instr.type) {
case ParsedAluInstruction::Type::kNop: // Close the open predicated block if this instr isn't predicated or the
b.createNoResultOp(spv::Op::OpNop); // conditions do not match.
break; if (open_predicated_block_ &&
case ParsedAluInstruction::Type::kVector: (!instr.is_predicated ||
ProcessVectorAluInstruction(instr); instr.predicate_condition != predicated_block_cond_)) {
break; b.createBranch(predicated_block_end_);
case ParsedAluInstruction::Type::kScalar: b.setBuildPoint(predicated_block_end_);
ProcessScalarAluInstruction(instr); open_predicated_block_ = false;
break; predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
}
if (!open_predicated_block_ && instr.is_predicated) {
Id pred_cond =
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
b.makeBoolConstant(instr.predicate_condition));
auto block = &b.makeNewBlock();
open_predicated_block_ = true;
predicated_block_cond_ = instr.predicate_condition;
predicated_block_end_ = &b.makeNewBlock();
b.createSelectionMerge(predicated_block_end_,
spv::SelectionControlMaskNone);
b.createConditionalBranch(pred_cond, block, predicated_block_end_);
b.setBuildPoint(block);
}
bool close_predicated_block_vector = false;
bool store_vector =
ProcessVectorAluOperation(instr, close_predicated_block_vector);
bool close_predicated_block_scalar = false;
bool store_scalar =
ProcessScalarAluOperation(instr, close_predicated_block_scalar);
if (store_vector) {
StoreToResult(b.createLoad(pv_), instr.vector_result);
}
if (store_scalar) {
StoreToResult(b.createLoad(ps_), instr.scalar_result);
}
if ((close_predicated_block_vector || close_predicated_block_scalar) &&
open_predicated_block_) {
b.createBranch(predicated_block_end_);
b.setBuildPoint(predicated_block_end_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
} }
} }
@ -2202,45 +2245,23 @@ spv::Function* SpirvShaderTranslator::CreateCubeFunction() {
return function; return function;
} }
void SpirvShaderTranslator::ProcessVectorAluInstruction( bool SpirvShaderTranslator::ProcessVectorAluOperation(
const ParsedAluInstruction& instr) { const ParsedAluInstruction& instr, bool& close_predicated_block) {
close_predicated_block = false;
if (!instr.has_vector_op) {
return false;
}
auto& b = *builder_; auto& b = *builder_;
// Close the open predicated block if this instr isn't predicated or the
// conditions do not match.
if (open_predicated_block_ &&
(!instr.is_predicated ||
instr.predicate_condition != predicated_block_cond_)) {
b.createBranch(predicated_block_end_);
b.setBuildPoint(predicated_block_end_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
}
if (!open_predicated_block_ && instr.is_predicated) {
Id pred_cond =
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
b.makeBoolConstant(instr.predicate_condition));
auto block = &b.makeNewBlock();
open_predicated_block_ = true;
predicated_block_cond_ = instr.predicate_condition;
predicated_block_end_ = &b.makeNewBlock();
b.createSelectionMerge(predicated_block_end_,
spv::SelectionControlMaskNone);
b.createConditionalBranch(pred_cond, block, predicated_block_end_);
b.setBuildPoint(block);
}
// TODO: If we have identical operands, reuse previous one. // TODO: If we have identical operands, reuse previous one.
Id sources[3] = {0}; Id sources[3] = {0};
Id dest = vec4_float_zero_; Id dest = vec4_float_zero_;
for (size_t i = 0; i < instr.operand_count; i++) { for (size_t i = 0; i < instr.vector_operand_count; i++) {
sources[i] = LoadFromOperand(instr.operands[i]); sources[i] = LoadFromOperand(instr.vector_operands[i]);
} }
bool close_predicated_block = false;
switch (instr.vector_opcode) { switch (instr.vector_opcode) {
case AluVectorOpcode::kAdd: { case AluVectorOpcode::kAdd: {
dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, sources[0], dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, sources[0],
@ -2603,58 +2624,30 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
assert_true(b.getTypeId(dest) == vec4_float_type_); assert_true(b.getTypeId(dest) == vec4_float_type_);
if (dest) { if (dest) {
b.createStore(dest, pv_); b.createStore(dest, pv_);
StoreToResult(dest, instr.result); return true;
}
if (close_predicated_block && open_predicated_block_) {
b.createBranch(predicated_block_end_);
b.setBuildPoint(predicated_block_end_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
} }
return false;
} }
void SpirvShaderTranslator::ProcessScalarAluInstruction( bool SpirvShaderTranslator::ProcessScalarAluOperation(
const ParsedAluInstruction& instr) { const ParsedAluInstruction& instr, bool& close_predicated_block) {
close_predicated_block = false;
if (!instr.has_scalar_op) {
return false;
}
auto& b = *builder_; auto& b = *builder_;
// Close the open predicated block if this instr isn't predicated or the
// conditions do not match.
if (open_predicated_block_ &&
(!instr.is_predicated ||
instr.predicate_condition != predicated_block_cond_)) {
b.createBranch(predicated_block_end_);
b.setBuildPoint(predicated_block_end_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
}
if (!open_predicated_block_ && instr.is_predicated) {
Id pred_cond =
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
b.makeBoolConstant(instr.predicate_condition));
auto block = &b.makeNewBlock();
open_predicated_block_ = true;
predicated_block_cond_ = instr.predicate_condition;
predicated_block_end_ = &b.makeNewBlock();
b.createSelectionMerge(predicated_block_end_,
spv::SelectionControlMaskNone);
b.createConditionalBranch(pred_cond, block, predicated_block_end_);
b.setBuildPoint(block);
}
// TODO: If we have identical operands, reuse previous one. // TODO: If we have identical operands, reuse previous one.
Id sources[3] = {0}; Id sources[3] = {0};
Id dest = b.makeFloatConstant(0); Id dest = b.makeFloatConstant(0);
for (size_t i = 0, x = 0; i < instr.operand_count; i++) { for (size_t i = 0, x = 0; i < instr.scalar_operand_count; i++) {
auto src = LoadFromOperand(instr.operands[i]); auto src = LoadFromOperand(instr.scalar_operands[i]);
// Pull components out of the vector operands and use them as sources. // Pull components out of the vector operands and use them as sources.
if (instr.operands[i].component_count > 1) { if (instr.scalar_operands[i].component_count > 1) {
for (int j = 0; j < instr.operands[i].component_count; j++) { for (int j = 0; j < instr.scalar_operands[i].component_count; j++) {
sources[x++] = b.createCompositeExtract(src, float_type_, j); sources[x++] = b.createCompositeExtract(src, float_type_, j);
} }
} else { } else {
@ -2662,7 +2655,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
} }
} }
bool close_predicated_block = false;
switch (instr.scalar_opcode) { switch (instr.scalar_opcode) {
case AluScalarOpcode::kAdds: case AluScalarOpcode::kAdds:
case AluScalarOpcode::kAddsc0: case AluScalarOpcode::kAddsc0:
@ -3073,16 +3065,9 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
assert_true(b.getTypeId(dest) == float_type_); assert_true(b.getTypeId(dest) == float_type_);
if (dest) { if (dest) {
b.createStore(dest, ps_); b.createStore(dest, ps_);
StoreToResult(dest, instr.result); return true;
}
if (close_predicated_block && open_predicated_block_) {
b.createBranch(predicated_block_end_);
b.setBuildPoint(predicated_block_end_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
} }
return false;
} }
Id SpirvShaderTranslator::CreateGlslStd450InstructionCall( Id SpirvShaderTranslator::CreateGlslStd450InstructionCall(

View File

@ -88,8 +88,10 @@ class SpirvShaderTranslator : public ShaderTranslator {
private: private:
spv::Function* CreateCubeFunction(); spv::Function* CreateCubeFunction();
void ProcessVectorAluInstruction(const ParsedAluInstruction& instr); bool ProcessVectorAluOperation(const ParsedAluInstruction& instr,
void ProcessScalarAluInstruction(const ParsedAluInstruction& instr); bool& close_predicate_block);
bool ProcessScalarAluOperation(const ParsedAluInstruction& instr,
bool& close_predicate_block);
spv::Id BitfieldExtract(spv::Id result_type, spv::Id base, bool is_signed, spv::Id BitfieldExtract(spv::Id result_type, spv::Id base, bool is_signed,
uint32_t offset, uint32_t count); uint32_t offset, uint32_t count);