diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index e3627d94f..7e308efe4 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -504,9 +504,10 @@ void DxbcShaderTranslator::Reset() { system_temp_count_current_ = 0; system_temp_count_max_ = 0; - cf_currently_predicated_ = false; - cf_exec_predicated_ = false; cf_exec_bool_constant_ = kCfExecBoolConstantNone; + cf_exec_predicated_ = false; + cf_instruction_predicate_if_open_ = false; + cf_exec_predicate_written_ = false; writes_depth_ = false; @@ -5454,6 +5455,9 @@ void DxbcShaderTranslator::CompletePixelShader() { void DxbcShaderTranslator::CompleteShaderCode() { if (!is_depth_only_pixel_shader_) { + // Close the last exec, there's nothing to merge it with anymore, and we're + // closing upper-level flow control blocks. + CloseExecConditionals(); // Close the last label and the switch. if (FLAGS_dxbc_switch) { shader_code_.push_back( @@ -6517,69 +6521,44 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, } } -void DxbcShaderTranslator::ClosePredicate() { - if (cf_currently_predicated_) { - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); - ++stat_.instruction_count; - cf_currently_predicated_ = false; +void DxbcShaderTranslator::UpdateExecConditionals( + ParsedExecInstruction::Type type, uint32_t bool_constant_index, + bool condition) { + // Check if we can merge the new exec with the previous one, or the jump with + // the previous exec. The instruction-level predicate check is also merged in + // this case. + if (type == ParsedExecInstruction::Type::kConditional) { + // Can merge conditional with conditional, as long as the bool constant and + // the expected values are the same. + if (cf_exec_bool_constant_ == bool_constant_index && + cf_exec_bool_constant_condition_ == condition) { + return; + } + } else if (type == ParsedExecInstruction::Type::kPredicated) { + // Can merge predicated with predicated if the conditions are the same and + // the previous exec hasn't modified the predicate register. + if (!cf_exec_predicate_written_ && cf_exec_predicated_ && + cf_exec_predicate_condition_ == condition) { + return; + } + } else { + // Can merge unconditional with unconditional. + if (cf_exec_bool_constant_ == kCfExecBoolConstantNone && + !cf_exec_predicated_) { + return; + } } -} -void DxbcShaderTranslator::CheckPredicate( - bool instruction_predicated, bool instruction_predicate_condition) { - // If the instruction doesn't have its own predicate check, inherit it from - // the exec. - if (!instruction_predicated) { - instruction_predicated = cf_exec_predicated_; - instruction_predicate_condition = cf_exec_predicate_condition_; - } - // Close the current predicate if the conditions don't match or not predicated - // anymore. - if (cf_currently_predicated_ && - (!instruction_predicated || - cf_current_predicate_condition_ != instruction_predicate_condition)) { - ClosePredicate(); - } - // Open a new predicate if predicated now, but the conditions don't match (or - // the previous instruction wasn't predicated). - if (instruction_predicated && - (!cf_currently_predicated_ || - cf_current_predicate_condition_ != instruction_predicate_condition)) { - D3D10_SB_INSTRUCTION_TEST_BOOLEAN test = - instruction_predicate_condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO - : D3D10_SB_INSTRUCTION_TEST_ZERO; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | - ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(system_temp_ps_pc_p0_a0_); - ++stat_.instruction_count; - ++stat_.dynamic_flow_control_count; - cf_currently_predicated_ = true; - cf_current_predicate_condition_ = instruction_predicate_condition; - } -} + CloseExecConditionals(); -void DxbcShaderTranslator::SetExecBoolConstant(uint32_t index, bool condition) { - if (cf_exec_bool_constant_ == index && - (index == kCfExecBoolConstantNone || - cf_exec_bool_constant_condition_ == condition)) { - return; - } - if (cf_exec_bool_constant_ != kCfExecBoolConstantNone) { - // Predicates are checked deeper than the bool constant. - ClosePredicate(); - // Close the current `if`. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); - ++stat_.instruction_count; - cf_exec_bool_constant_ = kCfExecBoolConstantNone; - } - if (index != kCfExecBoolConstantNone) { + D3D10_SB_INSTRUCTION_TEST_BOOLEAN test = + condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO + : D3D10_SB_INSTRUCTION_TEST_ZERO; + + if (type == ParsedExecInstruction::Type::kConditional) { uint32_t bool_constant_test_register = PushSystemTemp(); - // Check the bool constant's value. + + // Check the bool constant value. if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) { cbuffer_index_bool_loop_constants_ = cbuffer_count_++; } @@ -6592,16 +6571,14 @@ void DxbcShaderTranslator::SetExecBoolConstant(uint32_t index, bool condition) { EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3)); shader_code_.push_back(cbuffer_index_bool_loop_constants_); shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants)); - shader_code_.push_back(index >> 5); + shader_code_.push_back(bool_constant_index >> 5); shader_code_.push_back( EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(1u << (index & 31)); + shader_code_.push_back(1u << (bool_constant_index & 31)); ++stat_.instruction_count; ++stat_.uint_instruction_count; + // Open the new `if`. - D3D10_SB_INSTRUCTION_TEST_BOOLEAN test = - condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO - : D3D10_SB_INSTRUCTION_TEST_ZERO; shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); @@ -6610,10 +6587,87 @@ void DxbcShaderTranslator::SetExecBoolConstant(uint32_t index, bool condition) { shader_code_.push_back(bool_constant_test_register); ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; + // Release bool_constant_test_register. PopSystemTemp(); - cf_exec_bool_constant_ = index; + + cf_exec_bool_constant_ = bool_constant_index; cf_exec_bool_constant_condition_ = condition; + } else if (type == ParsedExecInstruction::Type::kPredicated) { + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(system_temp_ps_pc_p0_a0_); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + + cf_exec_predicated_ = true; + cf_exec_predicate_condition_ = condition; + } +} + +void DxbcShaderTranslator::CloseExecConditionals() { + // Within the exec - instruction-level predicate check. + CloseInstructionPredication(); + // Exec level. + if (cf_exec_bool_constant_ != kCfExecBoolConstantNone || + cf_exec_predicated_) { + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + cf_exec_bool_constant_ = kCfExecBoolConstantNone; + cf_exec_predicated_ = false; + } + // Nothing relies on the predicate value being unchanged now. + cf_exec_predicate_written_ = false; +} + +void DxbcShaderTranslator::UpdateInstructionPredication(bool predicated, + bool condition) { + if (predicated) { + if (cf_instruction_predicate_if_open_) { + if (cf_instruction_predicate_condition_ == condition) { + // Already in the needed instruction-level `if`. + return; + } + CloseInstructionPredication(); + } + // If the instruction predicate condition is the same as the exec predicate + // condition, no need to open a check. However, if there was a `setp` prior + // to this instruction, the predicate value now may be different than it was + // in the beginning of the exec. + if (!cf_exec_predicate_written_ && cf_exec_predicated_ && + cf_exec_predicate_condition_ == condition) { + return; + } + + D3D10_SB_INSTRUCTION_TEST_BOOLEAN test = + condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO + : D3D10_SB_INSTRUCTION_TEST_ZERO; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(system_temp_ps_pc_p0_a0_); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + + cf_instruction_predicate_if_open_ = true; + cf_instruction_predicate_condition_ = condition; + } else { + CloseInstructionPredication(); + } +} + +void DxbcShaderTranslator::CloseInstructionPredication() { + if (cf_instruction_predicate_if_open_) { + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + cf_instruction_predicate_if_open_ = false; } } @@ -6891,14 +6945,9 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) { return; } - // Force close all `if`s on the levels below for safety (they should be closed - // anyway, but what if). - // TODO(Triang3l): See if that's enough. At least in Halo 3, labels are only - // placed between different `exec`s - however, if in some game they can be - // located within `exec`s, this would require restoring all those `if`s after - // the label. - ClosePredicate(); - SetExecBoolConstant(kCfExecBoolConstantNone, false); + // Close flow control on the deeper levels below - prevent attempts to merge + // execs across labels. + CloseExecConditionals(); if (FLAGS_dxbc_switch) { // Fallthrough to the label from the previous one on the next iteration if @@ -6967,38 +7016,19 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) { void DxbcShaderTranslator::ProcessExecInstructionBegin( const ParsedExecInstruction& instr) { - // Force close the last `exec` if ProcessExecInstructionEnd was somehow not - // called, just for safety. - ClosePredicate(); - cf_exec_predicated_ = false; - SetExecBoolConstant(kCfExecBoolConstantNone, false); - - // TODO(Triang3l): Handle PredicateClean=true somehow - still not known how it - // should be done (execs doing setp are marked as PredicateClean=false, - // however it's very unlikely that PredicateClean=true means clean the - // predicate after the exec - shaders in Halo 3 have sequences of (p0) exec - // without setp in them and without PredicateClean=false, if it was actually - // cleaned after exec, all but the first would never be executed. Let's just - // ignore them for now. - - if (instr.type == ParsedExecInstruction::Type::kConditional) { - SetExecBoolConstant(instr.bool_constant_index, instr.condition); - } else if (instr.type == ParsedExecInstruction::Type::kPredicated) { - // The predicate will actually be checked by the next ALU/fetch instruction. - cf_exec_predicated_ = true; - cf_exec_predicate_condition_ = instr.condition; - } + UpdateExecConditionals(instr.type, instr.bool_constant_index, + instr.condition); + // TODO(Triang3l): Find out what PredicateClean=false in exec actually means + // (execs containing setp have PredicateClean=false, it possibly means that + // the predicate is dirty after the exec). } void DxbcShaderTranslator::ProcessExecInstructionEnd( const ParsedExecInstruction& instr) { // TODO(Triang3l): Check whether is_end is conditional or not. if (instr.is_end) { - // In case some instruction has flipped the predicate condition. - if (cf_exec_predicated_) { - CheckPredicate(cf_exec_predicated_, cf_exec_predicate_condition_); - } // Break out of the main loop. + CloseInstructionPredication(); if (FLAGS_dxbc_switch) { // Write an invalid value to pc. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | @@ -7023,15 +7053,15 @@ void DxbcShaderTranslator::ProcessExecInstructionEnd( ++stat_.instruction_count; } } - ClosePredicate(); - cf_exec_predicated_ = false; - SetExecBoolConstant(kCfExecBoolConstantNone, false); } void DxbcShaderTranslator::ProcessLoopStartInstruction( const ParsedLoopStartInstruction& instr) { // loop il, L - loop with loop data il, end @ L + // Loop control is outside execs - actually close the last exec. + CloseExecConditionals(); + uint32_t loop_count_and_aL = PushSystemTemp(); // Count (as uint) in bits 0:7 of the loop constant, aL in 8:15. @@ -7135,6 +7165,9 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction( const ParsedLoopEndInstruction& instr) { // endloop il, L - end loop w/ data il, head @ L + // Loop control is outside execs - actually close the last exec. + CloseExecConditionals(); + // Subtract 1 from the loop counter. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); @@ -7287,62 +7320,24 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction( void DxbcShaderTranslator::ProcessJumpInstruction( const ParsedJumpInstruction& instr) { - D3D10_SB_INSTRUCTION_TEST_BOOLEAN test = - instr.condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO - : D3D10_SB_INSTRUCTION_TEST_ZERO; - + // Treat like exec, merge with execs if possible, since it's an if too. + ParsedExecInstruction::Type type; if (instr.type == ParsedJumpInstruction::Type::kConditional) { - uint32_t bool_constant_test_register = PushSystemTemp(); - // Check the bool constant's value. - if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) { - cbuffer_index_bool_loop_constants_ = cbuffer_count_++; - } - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(bool_constant_test_register); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3)); - shader_code_.push_back(cbuffer_index_bool_loop_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants)); - shader_code_.push_back(instr.bool_constant_index >> 5); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(1u << (instr.bool_constant_index & 31)); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - // Open the `if`. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | - ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(bool_constant_test_register); - ++stat_.instruction_count; - ++stat_.dynamic_flow_control_count; - // Release bool_constant_test_register. - PopSystemTemp(); + type = ParsedExecInstruction::Type::kConditional; } else if (instr.type == ParsedJumpInstruction::Type::kPredicated) { - // Called outside of exec - need to check the predicate explicitly. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | - ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(system_temp_ps_pc_p0_a0_); - ++stat_.instruction_count; - ++stat_.dynamic_flow_control_count; + type = ParsedExecInstruction::Type::kPredicated; + } else { + type = ParsedExecInstruction::Type::kUnconditional; } + UpdateExecConditionals(type, instr.bool_constant_index, instr.condition); + + // UpdateExecConditionals may not necessarily close the instruction-level + // predicate check (it's not necessary if the execs are merged), but here the + // instruction itself is on the flow control level, so the predicate check is + // on the flow control level too. + CloseInstructionPredication(); JumpToLabel(instr.target_address); - - if (instr.type == ParsedJumpInstruction::Type::kConditional || - instr.type == ParsedJumpInstruction::Type::kPredicated) { - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); - ++stat_.instruction_count; - } } void DxbcShaderTranslator::ProcessVertexFetchInstruction( @@ -7393,7 +7388,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( } uint32_t result_write_mask = (1 << result_component_count) - 1; - CheckPredicate(instr.is_predicated, instr.predicate_condition); + UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition); // Convert the index to an integer. DxbcSourceOperand index_operand; @@ -8143,7 +8138,69 @@ void DxbcShaderTranslator::ArrayCoordToCubeDirection(uint32_t reg) { void DxbcShaderTranslator::ProcessTextureFetchInstruction( const ParsedTextureFetchInstruction& instr) { - CheckPredicate(instr.is_predicated, instr.predicate_condition); + // Predication should not affect derivative calculation: + // https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx9-graphics-reference-asm-ps-registers-output-color + // Do the part involving derivative calculation unconditionally, and re-enter + // the predicate check before writing the result. + bool suppress_predication = false; + if (IsDXBCPixelShader()) { + if (instr.opcode == FetchOpcode::kGetTextureComputedLod || + instr.opcode == FetchOpcode::kGetTextureGradients) { + suppress_predication = true; + } else if (instr.opcode == FetchOpcode::kTextureFetch) { + suppress_predication = instr.attributes.use_computed_lod && + !instr.attributes.use_register_lod; + } + } + uint32_t exec_p0_temp = UINT32_MAX; + if (suppress_predication) { + // Close instruction-level predication. + CloseInstructionPredication(); + // Temporarily close exec-level predication - will reopen at the end, so not + // changing cf_exec_predicated_. + if (cf_exec_predicated_) { + if (cf_exec_predicate_written_) { + // Restore the predicate value in the beginning of the exec and put it + // in exec_p0_temp. + exec_p0_temp = PushSystemTemp(); + // `if` case - the value was cf_exec_predicate_condition_. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(exec_p0_temp); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(cf_exec_predicate_condition_ ? 0xFFFFFFFFu : 0u); + ++stat_.instruction_count; + ++stat_.mov_instruction_count; + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + // `else` case - the value was !cf_exec_predicate_condition_. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(exec_p0_temp); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(cf_exec_predicate_condition_ ? 0u : 0xFFFFFFFFu); + ++stat_.instruction_count; + ++stat_.mov_instruction_count; + } + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + } + } else { + UpdateInstructionPredication(instr.is_predicated, + instr.predicate_condition); + } bool store_result = false; // Whether the result is only in X and all components should be remapped to X @@ -9401,6 +9458,31 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( UnloadDxbcSourceOperand(operand); } + // Re-enter conditional execution if closed it. + if (suppress_predication) { + // Re-enter exec-level predication. + if (cf_exec_predicated_) { + D3D10_SB_INSTRUCTION_TEST_BOOLEAN test = + cf_exec_predicate_condition_ ? D3D10_SB_INSTRUCTION_TEST_NONZERO + : D3D10_SB_INSTRUCTION_TEST_ZERO; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, exec_p0_temp != UINT32_MAX ? 0 : 2, 1)); + shader_code_.push_back( + exec_p0_temp != UINT32_MAX ? exec_p0_temp : system_temp_ps_pc_p0_a0_); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + if (exec_p0_temp != UINT32_MAX) { + PopSystemTemp(); + } + } + // Update instruction-level predication to the one needed by this tfetch. + UpdateInstructionPredication(instr.is_predicated, + instr.predicate_condition); + } + if (store_result) { StoreResult(instr.result, system_temp_pv_, replicate_result); } @@ -9408,10 +9490,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( void DxbcShaderTranslator::ProcessVectorAluInstruction( const ParsedAluInstruction& instr) { - CheckPredicate(instr.is_predicated, instr.predicate_condition); + UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition); // Whether the instruction has changed the predicate and it needs to be - // checked again. - bool close_predicate = false; + // checked again later. + bool predicate_written = false; // Whether the result is only in X and all components should be remapped to X // while storing. @@ -10237,7 +10319,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( case AluVectorOpcode::kSetpNePush: case AluVectorOpcode::kSetpGtPush: case AluVectorOpcode::kSetpGePush: - close_predicate = true; + predicate_written = true; replicate_result = true; // pv.xy = (src0.x == 0.0, src0.w == 0.0) shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | @@ -10631,17 +10713,18 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction( StoreResult(instr.result, system_temp_pv_, replicate_result); - if (close_predicate) { - ClosePredicate(); + if (predicate_written) { + cf_exec_predicate_written_ = true; + CloseInstructionPredication(); } } void DxbcShaderTranslator::ProcessScalarAluInstruction( const ParsedAluInstruction& instr) { - CheckPredicate(instr.is_predicated, instr.predicate_condition); + UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition); // Whether the instruction has changed the predicate and it needs to be - // checked again. - bool close_predicate = false; + // checked again later. + bool predicate_written = false; DxbcSourceOperand dxbc_operands[3]; uint32_t operand_lengths[3]; @@ -11328,7 +11411,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( case AluScalarOpcode::kSetpNe: case AluScalarOpcode::kSetpGt: case AluScalarOpcode::kSetpGe: - close_predicate = true; + predicate_written = true; // Set p0 to whether the comparison with zero passes. shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE( @@ -11369,7 +11452,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( break; case AluScalarOpcode::kSetpInv: - close_predicate = true; + predicate_written = true; // Compare src0 to 0.0 (taking denormals into account, for instance) to // know what to set ps to in case src0 is not 1.0. shader_code_.push_back( @@ -11434,7 +11517,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( break; case AluScalarOpcode::kSetpPop: - close_predicate = true; + predicate_written = true; // ps = src0 - 1.0 shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | @@ -11480,7 +11563,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( break; case AluScalarOpcode::kSetpClr: - close_predicate = true; + predicate_written = true; // ps = FLT_MAX shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); @@ -11506,7 +11589,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( break; case AluScalarOpcode::kSetpRstr: - close_predicate = true; + predicate_written = true; // Copy src0 to ps. shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | @@ -11720,8 +11803,9 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction( StoreResult(instr.result, system_temp_ps_pc_p0_a0_, true); - if (close_predicate) { - ClosePredicate(); + if (predicate_written) { + cf_exec_predicate_written_ = true; + CloseInstructionPredication(); } } diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 3bda0a5e4..1abcc808c 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -849,29 +849,40 @@ class DxbcShaderTranslator : public ShaderTranslator { // The nesting of `if` instructions is the following: // - pc checks (labels). - // - Bool constant checks (can only be done by exec). - // - Predicate checks (can be done both by exec and by instructions). - // It's probably fine to place instruction predicate checks and exec predicate - // on the same level rather than creating another level for instruction-level - // predicates, because (at least in Halo 3), in a `(p0) exec`, all - // instructions are `(p0)`, and `setp` isn't invoked in `(p0) exec`. Another - // possible constraint making things easier is labels not appearing within - // execs - so a label doesn't have to recheck the exec's condition. - // TODO(Triang3l): Check if these control flow constrains are true for all - // games. + // - exec predicate/bool constant check. + // - Instruction-level predicate checks. + // As an optimization, where possible, the DXBC translator tries to merge + // multiple execs into one, not creating endif/if doing nothing, if the + // execution condition is the same. This can't be done across labels + // (obviously) and in case `setp` is done in a predicated exec - in this case, + // the predicate value in the current exec may not match the predicate value + // in the next exec. + // Instruction-level predicate checks are also merged, and until a `setp` is + // done, if the instruction has the same predicate condition as the exec it is + // in, no instruction-level predicate `if` is created as well. One exception + // to the usual way of instruction-level predicate handling is made for + // instructions involving derivative computation, such as texture fetches with + // computed LOD. The part involving derivatives is executed disregarding the + // predication, but the result storing is predicated (this is handled in + // texture fetch instruction implementation): + // https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx9-graphics-reference-asm-ps-registers-output-color - // Closes the current predicate `if` (but doesn't reset the current exec's - // predicate). - void ClosePredicate(); - // Updates the current predicate, placing if/endif when needed. This MUST be - // called before emitting any instructions within an exec because the exec - // implementation here doesn't place if/endif, only defers updating the - // predicate. - void CheckPredicate(bool instruction_predicated, - bool instruction_predicate_condition); - // Opens or closes the `if` checking the value of a bool constant - call with - // kCfExecBoolConstantNone to force close. - void SetExecBoolConstant(uint32_t index, bool condition); + // Updates the current flow control condition (to be called in the beginning + // of exec and in jumps), closing the previous conditionals if needed. + // However, if the condition is not different, the instruction-level predicate + // `if` also won't be closed - this must be checked separately if needed (for + // example, in jumps). + void UpdateExecConditionals(ParsedExecInstruction::Type type, + uint32_t bool_constant_index, bool condition); + // Closes `if`s opened by exec and instructions within them (but not by + // labels) and updates the state accordingly. + void CloseExecConditionals(); + // Opens or reopens the predicate check conditional for the instruction. + void UpdateInstructionPredication(bool predicated, bool condition); + // Closes the instruction-level predicate `if` if it's open, useful if a flow + // control instruction needs to do some code which needs to respect the exec's + // conditional, but can't itself be predicated. + void CloseInstructionPredication(); void JumpToLabel(uint32_t address); // Emits copde for endian swapping of the data located in pv. @@ -1030,19 +1041,29 @@ class DxbcShaderTranslator : public ShaderTranslator { // for accuracy. uint32_t system_temp_depth_; - // Whether a predicate `if` is open. - bool cf_currently_predicated_; - // Currently expected predicate value. - bool cf_current_predicate_condition_; - // Whether the current `exec` is predicated. - bool cf_exec_predicated_; - // Predicate condition in the current `exec`. - bool cf_exec_predicate_condition_; - // The bool constant number containing the condition for the current `exec`. + // The bool constant number containing the condition for the currently + // processed exec (or the last - unless a label has reset this), or + // kCfExecBoolConstantNone if it's not checked. uint32_t cf_exec_bool_constant_; static constexpr uint32_t kCfExecBoolConstantNone = UINT32_MAX; - // The expected value in the current conditional exec. + // The expected bool constant value in the current exec if + // cf_exec_bool_constant_ is not kCfExecBoolConstantNone. bool cf_exec_bool_constant_condition_; + // Whether the currently processed exec is executed if a predicate is + // set/unset. + bool cf_exec_predicated_; + // The expected predicated condition if cf_exec_predicated_ is true. + bool cf_exec_predicate_condition_; + // Whether an `if` for instruction-level predicate check is currently open. + bool cf_instruction_predicate_if_open_; + // The expected predicate condition for the current or the last instruction if + // cf_exec_instruction_predicated_ is true. + bool cf_instruction_predicate_condition_; + // Whether there was a `setp` in the current exec before the current + // instruction, thus instruction-level predicate value can be different than + // the exec-level predicate value, and can't merge two execs with the same + // predicate condition anymore. + bool cf_exec_predicate_written_; bool writes_depth_;