From 06fabd16ad05476c8a75792ae269dfcab71323f9 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 10 Sep 2018 23:30:53 +0300 Subject: [PATCH] [D3D12] DXBC control flow --- src/xenia/gpu/dxbc_shader_translator.cc | 241 ++++++++++++++++++++++-- src/xenia/gpu/dxbc_shader_translator.h | 4 + 2 files changed, 232 insertions(+), 13 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index af4b88715..442862ba9 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -1809,6 +1809,22 @@ void DxbcShaderTranslator::SetExecBoolConstant(uint32_t index, bool condition) { } } +void DxbcShaderTranslator::JumpToLabel(uint32_t address) { + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); + shader_code_.push_back(system_temp_ps_pc_p0_a0_); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(address); + ++stat_.instruction_count; + ++stat_.mov_instruction_count; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CONTINUE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; +} + void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index, uint32_t write_mask) { // Allocate temporary registers for intermediate values. @@ -2244,24 +2260,223 @@ void DxbcShaderTranslator::ProcessLoopStartInstruction( shader_code_.push_back(system_temp_loop_count_); ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); - shader_code_.push_back(system_temp_ps_pc_p0_a0_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(instr.loop_skip_address); - ++stat_.instruction_count; - ++stat_.mov_instruction_count; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CONTINUE) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); - ++stat_.instruction_count; + JumpToLabel(instr.loop_skip_address); shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ++stat_.instruction_count; } +void DxbcShaderTranslator::ProcessLoopEndInstruction( + const ParsedLoopEndInstruction& instr) { + // endloop il, L - end loop w/ data il, head @ L + + // Subtract 1 from the loop counter. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(system_temp_loop_count_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(system_temp_loop_count_); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(uint32_t(-1)); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + + // Break case. + + if (instr.is_predicated_break) { + // if (loop_count.x == 0 || [!]p0) + uint32_t break_case_temp = PushSystemTemp(); + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(break_case_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(system_temp_ps_pc_p0_a0_); + if (instr.predicate_condition) { + // If p0 is non-zero, set the test value to 0 (since if_z is used, + // otherwise check if the loop counter is zero). + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(0); + } + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(system_temp_loop_count_); + if (!instr.predicate_condition) { + // If p0 is zero, set the test value to 0 (since if_z is used, otherwise + // check if the loop counter is zero). + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(0); + } + ++stat_.instruction_count; + ++stat_.movc_instruction_count; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( + D3D10_SB_INSTRUCTION_TEST_ZERO) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(break_case_temp); + PopSystemTemp(); + } else { + // if (loop_count.x == 0) + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( + D3D10_SB_INSTRUCTION_TEST_ZERO) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(system_temp_loop_count_); + } + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + + // Pop the current loop off the stack, move YZW to XYZ and set W to 0. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(system_temp_loop_count_); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b11111001, 1)); + shader_code_.push_back(system_temp_loop_count_); + ++stat_.instruction_count; + ++stat_.mov_instruction_count; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1)); + shader_code_.push_back(system_temp_loop_count_); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(0); + ++stat_.instruction_count; + ++stat_.mov_instruction_count; + + // Now going to fall through to the next exec (no need to jump). + + // Continue case. + + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + + uint32_t aL_add_temp = PushSystemTemp(); + + // Extract the value to add to aL (in bits 16:23 of the loop constant). + rdef_constants_used_ |= 1ull << uint32_t(RdefConstantIndex::kLoopConstants); + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(aL_add_temp); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(8); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(16); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3)); + shader_code_.push_back(uint32_t(RdefConstantBufferIndex::kBoolLoopConstants)); + shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants)); + // 8 because of bool constants. + shader_code_.push_back(8 + instr.loop_constant_index); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Add the needed value to aL. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(system_temp_aL_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(system_temp_loop_count_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(aL_add_temp); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + + // Release aL_add_temp. + PopSystemTemp(); + + // Jump back to the beginning of the loop body. + JumpToLabel(instr.loop_body_address); + + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; +} + +void DxbcShaderTranslator::ProcessJumpInstruction( + const ParsedJumpInstruction& instr) { + D3D10_SB_INSTRUCTION_TEST_BOOLEAN test = + instr.condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO + : D3D10_SB_INSTRUCTION_TEST_ZERO; + + if (instr.type == ParsedJumpInstruction::Type::kConditional) { + uint32_t bool_constant_test_register = PushSystemTemp(); + // Check the bool constant's value. + rdef_constants_used_ |= 1ull << uint32_t(RdefConstantIndex::kBoolConstants); + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(bool_constant_test_register); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3)); + shader_code_.push_back( + uint32_t(RdefConstantBufferIndex::kBoolLoopConstants)); + shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants)); + shader_code_.push_back(instr.bool_constant_index >> 5); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(1u << (instr.bool_constant_index & 31)); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + // Open the `if`. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(bool_constant_test_register); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + // Release bool_constant_test_register. + PopSystemTemp(); + } else if (instr.type == ParsedJumpInstruction::Type::kPredicated) { + // Called outside of exec - need to check the predicate explicitly. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(system_temp_ps_pc_p0_a0_); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + } + + JumpToLabel(instr.target_address); + + if (instr.type == ParsedJumpInstruction::Type::kConditional || + instr.type == ParsedJumpInstruction::Type::kPredicated) { + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + } +} + void DxbcShaderTranslator::ProcessVertexFetchInstruction( const ParsedVertexFetchInstruction& instr) { if (instr.operand_count < 2 || diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index e634ba8fa..eaae29dda 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -99,6 +99,9 @@ class DxbcShaderTranslator : public ShaderTranslator { void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override; void ProcessLoopStartInstruction( const ParsedLoopStartInstruction& instr) override; + void ProcessLoopEndInstruction( + const ParsedLoopEndInstruction& instr) override; + void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override; void ProcessVertexFetchInstruction( const ParsedVertexFetchInstruction& instr) override; @@ -330,6 +333,7 @@ class DxbcShaderTranslator : public ShaderTranslator { // Opens or closes the `if` checking the value of a bool constant - call with // kCfExecBoolConstantNone to force close. void SetExecBoolConstant(uint32_t index, bool condition); + void JumpToLabel(uint32_t address); // Emits copde for endian swapping of the data located in pv. void SwapVertexData(uint32_t vfetch_index, uint32_t write_mask);