[D3D12] DXBC control flow

This commit is contained in:
Triang3l 2018-09-10 23:30:53 +03:00
parent d5d3f336f5
commit 06fabd16ad
2 changed files with 232 additions and 13 deletions

View File

@ -1809,6 +1809,22 @@ void DxbcShaderTranslator::SetExecBoolConstant(uint32_t index, bool condition) {
}
}
void DxbcShaderTranslator::JumpToLabel(uint32_t address) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(address);
++stat_.instruction_count;
++stat_.mov_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CONTINUE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
uint32_t write_mask) {
// Allocate temporary registers for intermediate values.
@ -2244,24 +2260,223 @@ void DxbcShaderTranslator::ProcessLoopStartInstruction(
shader_code_.push_back(system_temp_loop_count_);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(instr.loop_skip_address);
++stat_.instruction_count;
++stat_.mov_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CONTINUE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
JumpToLabel(instr.loop_skip_address);
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
void DxbcShaderTranslator::ProcessLoopEndInstruction(
const ParsedLoopEndInstruction& instr) {
// endloop il<idx>, L<idx> - end loop w/ data il<idx>, head @ L<idx>
// Subtract 1 from the loop counter.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(system_temp_loop_count_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(system_temp_loop_count_);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(uint32_t(-1));
++stat_.instruction_count;
++stat_.int_instruction_count;
// Break case.
if (instr.is_predicated_break) {
// if (loop_count.x == 0 || [!]p0)
uint32_t break_case_temp = PushSystemTemp();
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(break_case_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
if (instr.predicate_condition) {
// If p0 is non-zero, set the test value to 0 (since if_z is used,
// otherwise check if the loop counter is zero).
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
}
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(system_temp_loop_count_);
if (!instr.predicate_condition) {
// If p0 is zero, set the test value to 0 (since if_z is used, otherwise
// check if the loop counter is zero).
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
}
++stat_.instruction_count;
++stat_.movc_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_ZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(break_case_temp);
PopSystemTemp();
} else {
// if (loop_count.x == 0)
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_ZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(system_temp_loop_count_);
}
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Pop the current loop off the stack, move YZW to XYZ and set W to 0.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(system_temp_loop_count_);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b11111001, 1));
shader_code_.push_back(system_temp_loop_count_);
++stat_.instruction_count;
++stat_.mov_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1));
shader_code_.push_back(system_temp_loop_count_);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.mov_instruction_count;
// Now going to fall through to the next exec (no need to jump).
// Continue case.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
uint32_t aL_add_temp = PushSystemTemp();
// Extract the value to add to aL (in bits 16:23 of the loop constant).
rdef_constants_used_ |= 1ull << uint32_t(RdefConstantIndex::kLoopConstants);
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(aL_add_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(8);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(16);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
shader_code_.push_back(uint32_t(RdefConstantBufferIndex::kBoolLoopConstants));
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
// 8 because of bool constants.
shader_code_.push_back(8 + instr.loop_constant_index);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Add the needed value to aL.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(system_temp_aL_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(system_temp_loop_count_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(aL_add_temp);
++stat_.instruction_count;
++stat_.int_instruction_count;
// Release aL_add_temp.
PopSystemTemp();
// Jump back to the beginning of the loop body.
JumpToLabel(instr.loop_body_address);
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
void DxbcShaderTranslator::ProcessJumpInstruction(
const ParsedJumpInstruction& instr) {
D3D10_SB_INSTRUCTION_TEST_BOOLEAN test =
instr.condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO;
if (instr.type == ParsedJumpInstruction::Type::kConditional) {
uint32_t bool_constant_test_register = PushSystemTemp();
// Check the bool constant's value.
rdef_constants_used_ |= 1ull << uint32_t(RdefConstantIndex::kBoolConstants);
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(bool_constant_test_register);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
shader_code_.push_back(
uint32_t(RdefConstantBufferIndex::kBoolLoopConstants));
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_code_.push_back(instr.bool_constant_index >> 5);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1u << (instr.bool_constant_index & 31));
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Open the `if`.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(bool_constant_test_register);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Release bool_constant_test_register.
PopSystemTemp();
} else if (instr.type == ParsedJumpInstruction::Type::kPredicated) {
// Called outside of exec - need to check the predicate explicitly.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
}
JumpToLabel(instr.target_address);
if (instr.type == ParsedJumpInstruction::Type::kConditional ||
instr.type == ParsedJumpInstruction::Type::kPredicated) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
}
void DxbcShaderTranslator::ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) {
if (instr.operand_count < 2 ||

View File

@ -99,6 +99,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override;
void ProcessLoopStartInstruction(
const ParsedLoopStartInstruction& instr) override;
void ProcessLoopEndInstruction(
const ParsedLoopEndInstruction& instr) override;
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
void ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) override;
@ -330,6 +333,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Opens or closes the `if` checking the value of a bool constant - call with
// kCfExecBoolConstantNone to force close.
void SetExecBoolConstant(uint32_t index, bool condition);
void JumpToLabel(uint32_t address);
// Emits copde for endian swapping of the data located in pv.
void SwapVertexData(uint32_t vfetch_index, uint32_t write_mask);