[D3D12] DXBC: Use switch instead of if for flow control (experimentally)

This commit is contained in:
Triang3l 2018-09-19 21:25:58 +03:00
parent 4db3a927ad
commit cbd36218bf
1 changed files with 130 additions and 47 deletions

View File

@ -28,6 +28,10 @@ DEFINE_bool(dxbc_indexable_temps, true,
"relative addressing of general-purpose registers - shaders rarely " "relative addressing of general-purpose registers - shaders rarely "
"do that, but when they do, this may improve performance on AMD, " "do that, but when they do, this may improve performance on AMD, "
"but may cause unknown issues on Nvidia."); "but may cause unknown issues on Nvidia.");
DEFINE_bool(dxbc_switch, true,
"Use switch rather than if for flow control. Turning this off or "
"on may improve stability, though this heavily depends on the "
"driver.");
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -652,16 +656,33 @@ void DxbcShaderTranslator::StartTranslation() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.dynamic_flow_control_count; ++stat_.dynamic_flow_control_count;
// First label (pc == 0). // Switch and the first label (pc == 0).
shader_code_.push_back( if (FLAGS_dxbc_switch) {
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SWITCH) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(D3D10_SB_INSTRUCTION_TEST_ZERO)); shader_code_.push_back(
shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); shader_code_.push_back(system_temp_ps_pc_p0_a0_);
shader_code_.push_back(system_temp_ps_pc_p0_a0_); ++stat_.instruction_count;
++stat_.instruction_count; ++stat_.dynamic_flow_control_count;
++stat_.dynamic_flow_control_count; shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CASE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.static_flow_control_count;
} else {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_ZERO));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
}
} }
void DxbcShaderTranslator::CompleteVertexShader() { void DxbcShaderTranslator::CompleteVertexShader() {
@ -1002,10 +1023,20 @@ void DxbcShaderTranslator::CompletePixelShader() {
} }
void DxbcShaderTranslator::CompleteShaderCode() { void DxbcShaderTranslator::CompleteShaderCode() {
// Close the last label. // Close the last label and the switch.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | if (FLAGS_dxbc_switch) {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
++stat_.instruction_count; ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDSWITCH) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
} else {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
// End the main loop. // End the main loop.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
@ -2424,37 +2455,69 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) {
ClosePredicate(); ClosePredicate();
SetExecBoolConstant(kCfExecBoolConstantNone, false); SetExecBoolConstant(kCfExecBoolConstantNone, false);
// Close the previous label. if (FLAGS_dxbc_switch) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | // Fallthrough to the label from the previous one on the next iteration if
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); // no `continue` was done. Can't simply fallthrough because in DXBC, a
++stat_.instruction_count; // non-empty switch case must end with a break.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(cf_index);
++stat_.instruction_count;
++stat_.mov_instruction_count;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CONTINUE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
// Close the previous label.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
// Go to the next label.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CASE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(cf_index);
++stat_.instruction_count;
++stat_.static_flow_control_count;
} else {
// Close the previous label.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
// pc <= cf_index // pc <= cf_index
uint32_t test_register = PushSystemTemp(); uint32_t test_register = PushSystemTemp();
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UGE) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UGE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(test_register); shader_code_.push_back(test_register);
shader_code_.push_back( shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(cf_index); shader_code_.push_back(cf_index);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_); shader_code_.push_back(system_temp_ps_pc_p0_a0_);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
// if (pc <= cf_index) // if (pc <= cf_index)
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_NONZERO)); D3D10_SB_INSTRUCTION_TEST_NONZERO));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(test_register); shader_code_.push_back(test_register);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.dynamic_flow_control_count; ++stat_.dynamic_flow_control_count;
PopSystemTemp(); PopSystemTemp();
}
} }
void DxbcShaderTranslator::ProcessExecInstructionBegin( void DxbcShaderTranslator::ProcessExecInstructionBegin(
@ -2491,9 +2554,29 @@ void DxbcShaderTranslator::ProcessExecInstructionEnd(
CheckPredicate(cf_exec_predicated_, cf_exec_predicate_condition_); CheckPredicate(cf_exec_predicated_, cf_exec_predicate_condition_);
} }
// Break out of the main loop. // Break out of the main loop.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) | if (FLAGS_dxbc_switch) {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); // Write an invalid value to pc.
++stat_.instruction_count; shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0xFFFFFFFFu);
++stat_.instruction_count;
++stat_.mov_instruction_count;
// Go to the next iteration, where switch cases won't be reached.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CONTINUE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
} else {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
} }
ClosePredicate(); ClosePredicate();
cf_exec_predicated_ = false; cf_exec_predicated_ = false;