From 20e607a35c9a2f401b36e0bc6428ed02988ef518 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 10 Sep 2018 21:35:12 +0300 Subject: [PATCH] [D3D12] DXBC exec and predicate --- src/xenia/gpu/dxbc_shader_translator.cc | 179 +++++++++++++++++++++--- src/xenia/gpu/dxbc_shader_translator.h | 43 ++++++ 2 files changed, 206 insertions(+), 16 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index d2ad5b58a..15cd4e5c2 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -69,6 +69,9 @@ void DxbcShaderTranslator::Reset() { rdef_constants_used_ = 0; system_temp_count_current_ = 0; system_temp_count_max_ = 0; + cf_currently_predicated_ = false; + cf_exec_predicated_ = false; + cf_exec_bool_constant_ = kCfExecBoolConstantNone; writes_depth_ = false; texture_srvs_.clear(); sampler_bindings_.clear(); @@ -1707,6 +1710,105 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, } } +void DxbcShaderTranslator::ClosePredicate() { + if (cf_currently_predicated_) { + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + cf_currently_predicated_ = false; + } +} + +void DxbcShaderTranslator::CheckPredicate( + bool instruction_predicated, bool instruction_predicate_condition) { + // If the instruction doesn't have its own predicate check, inherit it from + // the exec. + if (!instruction_predicated) { + instruction_predicated = cf_exec_predicated_; + instruction_predicate_condition = cf_exec_predicate_condition_; + } + // Close the current predicate if the conditions don't match or not predicated + // anymore. + if (cf_currently_predicated_ && + (!instruction_predicated || + cf_current_predicate_condition_ != instruction_predicate_condition)) { + ClosePredicate(); + } + // Open a new predicate if predicated now, but the conditions don't match (or + // the previous instruction wasn't predicated). + if (instruction_predicated && + (!cf_currently_predicated_ || + cf_current_predicate_condition_ != instruction_predicate_condition)) { + D3D10_SB_INSTRUCTION_TEST_BOOLEAN test = + instruction_predicate_condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO + : D3D10_SB_INSTRUCTION_TEST_ZERO; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(system_temp_ps_pc_p0_a0_); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + cf_currently_predicated_ = true; + cf_current_predicate_condition_ = instruction_predicate_condition; + } +} + +void DxbcShaderTranslator::SetExecBoolConstant(uint32_t index, bool condition) { + if (cf_exec_bool_constant_ == index && + (index == kCfExecBoolConstantNone || + cf_exec_bool_constant_condition_ == condition)) { + return; + } + if (cf_exec_bool_constant_ != kCfExecBoolConstantNone) { + // Predicates are checked deeper than the bool constant. + ClosePredicate(); + // Close the current `if`. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + cf_exec_bool_constant_ = kCfExecBoolConstantNone; + } + if (index != kCfExecBoolConstantNone) { + uint32_t bool_constant_test_register = PushSystemTemp(); + // Check the bool constant's value. + rdef_constants_used_ |= 1ull << uint32_t(RdefConstantIndex::kBoolConstants); + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(bool_constant_test_register); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3)); + shader_code_.push_back( + uint32_t(RdefConstantBufferIndex::kBoolLoopConstants)); + shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants)); + shader_code_.push_back(index >> 5); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(1u << (index & 31)); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + // Open the new `if`. + D3D10_SB_INSTRUCTION_TEST_BOOLEAN test = + condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO + : D3D10_SB_INSTRUCTION_TEST_ZERO; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(bool_constant_test_register); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + // Release bool_constant_test_register. + PopSystemTemp(); + cf_exec_bool_constant_ = index; + cf_exec_bool_constant_condition_ = condition; + } +} + void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index, uint32_t write_mask) { // Allocate temporary registers for intermediate values. @@ -1960,10 +2062,21 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) { // 0 already added in the beginning. return; } + + // Force close all `if`s on the levels below for safety (they should be closed + // anyway, but what if). + // TODO(Triang3l): See if that's enough. At least in Halo 3, labels are only + // placed between different `exec`s - however, if in some game they can be + // located within `exec`s, this would require restoring all those `if`s after + // the label. + ClosePredicate(); + SetExecBoolConstant(kCfExecBoolConstantNone, false); + // Close the previous label. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ++stat_.instruction_count; + // pc <= cf_index uint32_t test_register = PushSystemTemp(); shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UGE) | @@ -1992,6 +2105,38 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) { PopSystemTemp(); } +void DxbcShaderTranslator::ProcessExecInstructionBegin( + const ParsedExecInstruction& instr) { + // Force close the last `exec` if ProcessExecInstructionEnd was somehow not + // called, just for safety. + ClosePredicate(); + cf_exec_predicated_ = false; + SetExecBoolConstant(kCfExecBoolConstantNone, false); + + // TODO(Triang3l): Handle PredicateClean=true somehow - still not known how it + // should be done (execs doing setp are marked as PredicateClean=false, + // however it's very unlikely that PredicateClean=true means clean the + // predicate after the exec - shaders in Halo 3 have sequences of (p0) exec + // without setp in them and without PredicateClean=false, if it was actually + // cleaned after exec, all but the first would never be executed. Let's just + // ignore them for now. + + if (instr.type == ParsedExecInstruction::Type::kConditional) { + SetExecBoolConstant(instr.bool_constant_index, instr.condition); + } else if (instr.type == ParsedExecInstruction::Type::kPredicated) { + // The predicate will actually be checked by the next ALU/fetch instruction. + cf_exec_predicated_ = true; + cf_exec_predicate_condition_ = instr.condition; + } +} + +void DxbcShaderTranslator::ProcessExecInstructionEnd( + const ParsedExecInstruction& instr) { + ClosePredicate(); + cf_exec_predicated_ = false; + SetExecBoolConstant(kCfExecBoolConstantNone, false); +} + void DxbcShaderTranslator::ProcessVertexFetchInstruction( const ParsedVertexFetchInstruction& instr) { if (instr.operand_count < 2 || @@ -2040,7 +2185,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( } uint32_t result_write_mask = (1 << result_component_count) - 1; - // TODO(Triang3l): Predicate. + CheckPredicate(instr.is_predicated, instr.predicate_condition); // Convert the index to an integer. DxbcSourceOperand index_operand; @@ -2408,7 +2553,7 @@ uint32_t DxbcShaderTranslator::FindOrAddTextureSRV(uint32_t fetch_constant, void DxbcShaderTranslator::ProcessTextureFetchInstruction( const ParsedTextureFetchInstruction& instr) { - // TODO(Triang3l): Predicate. + CheckPredicate(instr.is_predicated, instr.predicate_condition); bool store_result = false; @@ -2492,11 +2637,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( void DxbcShaderTranslator::ProcessVectorAluInstruction( const ParsedAluInstruction& instr) { - // TODO(Triang3l): Predicate. - + CheckPredicate(instr.is_predicated, instr.predicate_condition); // Whether the instruction has changed the predicate and it needs to be // checked again. - bool close_predicate_block = false; + bool close_predicate = false; // Whether the result is only in X and all components should be remapped to X // while storing. @@ -3068,7 +3212,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( case AluVectorOpcode::kSetpNePush: case AluVectorOpcode::kSetpGtPush: case AluVectorOpcode::kSetpGePush: - close_predicate_block = true; + close_predicate = true; replicate_result = true; // pv.xy = (src0.x == 0.0, src0.w == 0.0) shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | @@ -3395,16 +3539,17 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( StoreResult(instr.result, system_temp_pv_, replicate_result); - // TODO(Triang3l): Close predicate check. + if (close_predicate) { + ClosePredicate(); + } } void DxbcShaderTranslator::ProcessScalarAluInstruction( const ParsedAluInstruction& instr) { - // TODO(Triang3l): Predicate. - + CheckPredicate(instr.is_predicated, instr.predicate_condition); // Whether the instruction has changed the predicate and it needs to be // checked again. - bool close_predicate_block = false; + bool close_predicate = false; DxbcSourceOperand dxbc_operands[3]; uint32_t operand_lengths[3]; @@ -3952,7 +4097,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( case AluScalarOpcode::kSetpNe: case AluScalarOpcode::kSetpGt: case AluScalarOpcode::kSetpGe: - close_predicate_block = true; + close_predicate = true; // Set p0 to whether the comparison with zero passes. shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE( @@ -3993,7 +4138,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( break; case AluScalarOpcode::kSetpInv: - close_predicate_block = true; + close_predicate = true; // Compare src0 to 0.0 (taking denormals into account, for instance) to // know what to set ps to in case src0 is not 1.0. shader_code_.push_back( @@ -4058,7 +4203,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( break; case AluScalarOpcode::kSetpPop: - close_predicate_block = true; + close_predicate = true; // ps = src0 - 1.0 shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | @@ -4104,7 +4249,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( break; case AluScalarOpcode::kSetpClr: - close_predicate_block = true; + close_predicate = true; // ps = FLT_MAX shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); @@ -4130,7 +4275,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( break; case AluScalarOpcode::kSetpRstr: - close_predicate_block = true; + close_predicate = true; // Copy src0 to ps. shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | @@ -4270,7 +4415,9 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( StoreResult(instr.result, system_temp_ps_pc_p0_a0_, true); - // TODO(Triang3l): Close predicate check. + if (close_predicate) { + ClosePredicate(); + } } void DxbcShaderTranslator::ProcessAluInstruction( diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 165d2a50e..c912cf591 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -95,6 +95,9 @@ class DxbcShaderTranslator : public ShaderTranslator { void ProcessLabel(uint32_t cf_index) override; + void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override; + void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override; + void ProcessVertexFetchInstruction( const ParsedVertexFetchInstruction& instr) override; void ProcessTextureFetchInstruction( @@ -300,6 +303,32 @@ class DxbcShaderTranslator : public ShaderTranslator { void StoreResult(const InstructionResult& result, uint32_t reg, bool replicate_x); + // The nesting of `if` instructions is the following: + // - pc checks (labels). + // - Bool constant checks (can only be done by exec). + // - Predicate checks (can be done both by exec and by instructions). + // It's probably fine to place instruction predicate checks and exec predicate + // on the same level rather than creating another level for instruction-level + // predicates, because (at least in Halo 3), in a `(p0) exec`, all + // instructions are `(p0)`, and `setp` isn't invoked in `(p0) exec`. Another + // possible constraint making things easier is labels not appearing within + // execs - so a label doesn't have to recheck the exec's condition. + // TODO(Triang3l): Check if these control flow constrains are true for all + // games. + + // Closes the current predicate `if` (but doesn't reset the current exec's + // predicate). + void ClosePredicate(); + // Updates the current predicate, placing if/endif when needed. This MUST be + // called before emitting any translated instructions because the exec + // implementation here doesn't place if/endif, only defers updating the + // predicate. + void CheckPredicate(bool instruction_predicated, + bool instruction_predicate_condition); + // Opens or closes the `if` checking the value of a bool constant - call with + // kCfExecBoolConstantNone to force close. + void SetExecBoolConstant(uint32_t index, bool condition); + // Emits copde for endian swapping of the data located in pv. void SwapVertexData(uint32_t vfetch_index, uint32_t write_mask); @@ -470,6 +499,20 @@ class DxbcShaderTranslator : public ShaderTranslator { // remapping). uint32_t system_temp_color_[4]; + // Whether a predicate `if` is open. + bool cf_currently_predicated_; + // Currently expected predicate value. + bool cf_current_predicate_condition_; + // Whether the current `exec` is predicated. + bool cf_exec_predicated_; + // Predicate condition in the current `exec`. + bool cf_exec_predicate_condition_; + // The bool constant number containing the condition for the current `exec`. + uint32_t cf_exec_bool_constant_; + static constexpr uint32_t kCfExecBoolConstantNone = UINT32_MAX; + // The expected value in the current conditional exec. + bool cf_exec_bool_constant_condition_; + bool writes_depth_; std::vector texture_srvs_;