[D3D12] DXBC: Rewrite conditional execution

This commit is contained in:
Triang3l 2018-10-30 21:31:59 +03:00
parent 251f078baf
commit de141e0ee3
2 changed files with 309 additions and 204 deletions

View File

@ -504,9 +504,10 @@ void DxbcShaderTranslator::Reset() {
system_temp_count_current_ = 0;
system_temp_count_max_ = 0;
cf_currently_predicated_ = false;
cf_exec_predicated_ = false;
cf_exec_bool_constant_ = kCfExecBoolConstantNone;
cf_exec_predicated_ = false;
cf_instruction_predicate_if_open_ = false;
cf_exec_predicate_written_ = false;
writes_depth_ = false;
@ -5454,6 +5455,9 @@ void DxbcShaderTranslator::CompletePixelShader() {
void DxbcShaderTranslator::CompleteShaderCode() {
if (!is_depth_only_pixel_shader_) {
// Close the last exec, there's nothing to merge it with anymore, and we're
// closing upper-level flow control blocks.
CloseExecConditionals();
// Close the last label and the switch.
if (FLAGS_dxbc_switch) {
shader_code_.push_back(
@ -6517,69 +6521,44 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
}
}
void DxbcShaderTranslator::ClosePredicate() {
if (cf_currently_predicated_) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
cf_currently_predicated_ = false;
void DxbcShaderTranslator::UpdateExecConditionals(
ParsedExecInstruction::Type type, uint32_t bool_constant_index,
bool condition) {
// Check if we can merge the new exec with the previous one, or the jump with
// the previous exec. The instruction-level predicate check is also merged in
// this case.
if (type == ParsedExecInstruction::Type::kConditional) {
// Can merge conditional with conditional, as long as the bool constant and
// the expected values are the same.
if (cf_exec_bool_constant_ == bool_constant_index &&
cf_exec_bool_constant_condition_ == condition) {
return;
}
} else if (type == ParsedExecInstruction::Type::kPredicated) {
// Can merge predicated with predicated if the conditions are the same and
// the previous exec hasn't modified the predicate register.
if (!cf_exec_predicate_written_ && cf_exec_predicated_ &&
cf_exec_predicate_condition_ == condition) {
return;
}
} else {
// Can merge unconditional with unconditional.
if (cf_exec_bool_constant_ == kCfExecBoolConstantNone &&
!cf_exec_predicated_) {
return;
}
}
}
void DxbcShaderTranslator::CheckPredicate(
bool instruction_predicated, bool instruction_predicate_condition) {
// If the instruction doesn't have its own predicate check, inherit it from
// the exec.
if (!instruction_predicated) {
instruction_predicated = cf_exec_predicated_;
instruction_predicate_condition = cf_exec_predicate_condition_;
}
// Close the current predicate if the conditions don't match or not predicated
// anymore.
if (cf_currently_predicated_ &&
(!instruction_predicated ||
cf_current_predicate_condition_ != instruction_predicate_condition)) {
ClosePredicate();
}
// Open a new predicate if predicated now, but the conditions don't match (or
// the previous instruction wasn't predicated).
if (instruction_predicated &&
(!cf_currently_predicated_ ||
cf_current_predicate_condition_ != instruction_predicate_condition)) {
D3D10_SB_INSTRUCTION_TEST_BOOLEAN test =
instruction_predicate_condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
cf_currently_predicated_ = true;
cf_current_predicate_condition_ = instruction_predicate_condition;
}
}
CloseExecConditionals();
void DxbcShaderTranslator::SetExecBoolConstant(uint32_t index, bool condition) {
if (cf_exec_bool_constant_ == index &&
(index == kCfExecBoolConstantNone ||
cf_exec_bool_constant_condition_ == condition)) {
return;
}
if (cf_exec_bool_constant_ != kCfExecBoolConstantNone) {
// Predicates are checked deeper than the bool constant.
ClosePredicate();
// Close the current `if`.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
cf_exec_bool_constant_ = kCfExecBoolConstantNone;
}
if (index != kCfExecBoolConstantNone) {
D3D10_SB_INSTRUCTION_TEST_BOOLEAN test =
condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO;
if (type == ParsedExecInstruction::Type::kConditional) {
uint32_t bool_constant_test_register = PushSystemTemp();
// Check the bool constant's value.
// Check the bool constant value.
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
}
@ -6592,16 +6571,14 @@ void DxbcShaderTranslator::SetExecBoolConstant(uint32_t index, bool condition) {
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_code_.push_back(index >> 5);
shader_code_.push_back(bool_constant_index >> 5);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1u << (index & 31));
shader_code_.push_back(1u << (bool_constant_index & 31));
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Open the new `if`.
D3D10_SB_INSTRUCTION_TEST_BOOLEAN test =
condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
@ -6610,10 +6587,87 @@ void DxbcShaderTranslator::SetExecBoolConstant(uint32_t index, bool condition) {
shader_code_.push_back(bool_constant_test_register);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Release bool_constant_test_register.
PopSystemTemp();
cf_exec_bool_constant_ = index;
cf_exec_bool_constant_ = bool_constant_index;
cf_exec_bool_constant_condition_ = condition;
} else if (type == ParsedExecInstruction::Type::kPredicated) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
cf_exec_predicated_ = true;
cf_exec_predicate_condition_ = condition;
}
}
void DxbcShaderTranslator::CloseExecConditionals() {
// Within the exec - instruction-level predicate check.
CloseInstructionPredication();
// Exec level.
if (cf_exec_bool_constant_ != kCfExecBoolConstantNone ||
cf_exec_predicated_) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
cf_exec_bool_constant_ = kCfExecBoolConstantNone;
cf_exec_predicated_ = false;
}
// Nothing relies on the predicate value being unchanged now.
cf_exec_predicate_written_ = false;
}
void DxbcShaderTranslator::UpdateInstructionPredication(bool predicated,
bool condition) {
if (predicated) {
if (cf_instruction_predicate_if_open_) {
if (cf_instruction_predicate_condition_ == condition) {
// Already in the needed instruction-level `if`.
return;
}
CloseInstructionPredication();
}
// If the instruction predicate condition is the same as the exec predicate
// condition, no need to open a check. However, if there was a `setp` prior
// to this instruction, the predicate value now may be different than it was
// in the beginning of the exec.
if (!cf_exec_predicate_written_ && cf_exec_predicated_ &&
cf_exec_predicate_condition_ == condition) {
return;
}
D3D10_SB_INSTRUCTION_TEST_BOOLEAN test =
condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
cf_instruction_predicate_if_open_ = true;
cf_instruction_predicate_condition_ = condition;
} else {
CloseInstructionPredication();
}
}
void DxbcShaderTranslator::CloseInstructionPredication() {
if (cf_instruction_predicate_if_open_) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
cf_instruction_predicate_if_open_ = false;
}
}
@ -6891,14 +6945,9 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) {
return;
}
// Force close all `if`s on the levels below for safety (they should be closed
// anyway, but what if).
// TODO(Triang3l): See if that's enough. At least in Halo 3, labels are only
// placed between different `exec`s - however, if in some game they can be
// located within `exec`s, this would require restoring all those `if`s after
// the label.
ClosePredicate();
SetExecBoolConstant(kCfExecBoolConstantNone, false);
// Close flow control on the deeper levels below - prevent attempts to merge
// execs across labels.
CloseExecConditionals();
if (FLAGS_dxbc_switch) {
// Fallthrough to the label from the previous one on the next iteration if
@ -6967,38 +7016,19 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) {
void DxbcShaderTranslator::ProcessExecInstructionBegin(
const ParsedExecInstruction& instr) {
// Force close the last `exec` if ProcessExecInstructionEnd was somehow not
// called, just for safety.
ClosePredicate();
cf_exec_predicated_ = false;
SetExecBoolConstant(kCfExecBoolConstantNone, false);
// TODO(Triang3l): Handle PredicateClean=true somehow - still not known how it
// should be done (execs doing setp are marked as PredicateClean=false,
// however it's very unlikely that PredicateClean=true means clean the
// predicate after the exec - shaders in Halo 3 have sequences of (p0) exec
// without setp in them and without PredicateClean=false, if it was actually
// cleaned after exec, all but the first would never be executed. Let's just
// ignore them for now.
if (instr.type == ParsedExecInstruction::Type::kConditional) {
SetExecBoolConstant(instr.bool_constant_index, instr.condition);
} else if (instr.type == ParsedExecInstruction::Type::kPredicated) {
// The predicate will actually be checked by the next ALU/fetch instruction.
cf_exec_predicated_ = true;
cf_exec_predicate_condition_ = instr.condition;
}
UpdateExecConditionals(instr.type, instr.bool_constant_index,
instr.condition);
// TODO(Triang3l): Find out what PredicateClean=false in exec actually means
// (execs containing setp have PredicateClean=false, it possibly means that
// the predicate is dirty after the exec).
}
void DxbcShaderTranslator::ProcessExecInstructionEnd(
const ParsedExecInstruction& instr) {
// TODO(Triang3l): Check whether is_end is conditional or not.
if (instr.is_end) {
// In case some instruction has flipped the predicate condition.
if (cf_exec_predicated_) {
CheckPredicate(cf_exec_predicated_, cf_exec_predicate_condition_);
}
// Break out of the main loop.
CloseInstructionPredication();
if (FLAGS_dxbc_switch) {
// Write an invalid value to pc.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
@ -7023,15 +7053,15 @@ void DxbcShaderTranslator::ProcessExecInstructionEnd(
++stat_.instruction_count;
}
}
ClosePredicate();
cf_exec_predicated_ = false;
SetExecBoolConstant(kCfExecBoolConstantNone, false);
}
void DxbcShaderTranslator::ProcessLoopStartInstruction(
const ParsedLoopStartInstruction& instr) {
// loop il<idx>, L<idx> - loop with loop data il<idx>, end @ L<idx>
// Loop control is outside execs - actually close the last exec.
CloseExecConditionals();
uint32_t loop_count_and_aL = PushSystemTemp();
// Count (as uint) in bits 0:7 of the loop constant, aL in 8:15.
@ -7135,6 +7165,9 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction(
const ParsedLoopEndInstruction& instr) {
// endloop il<idx>, L<idx> - end loop w/ data il<idx>, head @ L<idx>
// Loop control is outside execs - actually close the last exec.
CloseExecConditionals();
// Subtract 1 from the loop counter.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
@ -7287,62 +7320,24 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction(
void DxbcShaderTranslator::ProcessJumpInstruction(
const ParsedJumpInstruction& instr) {
D3D10_SB_INSTRUCTION_TEST_BOOLEAN test =
instr.condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO;
// Treat like exec, merge with execs if possible, since it's an if too.
ParsedExecInstruction::Type type;
if (instr.type == ParsedJumpInstruction::Type::kConditional) {
uint32_t bool_constant_test_register = PushSystemTemp();
// Check the bool constant's value.
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
}
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(bool_constant_test_register);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_code_.push_back(instr.bool_constant_index >> 5);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1u << (instr.bool_constant_index & 31));
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Open the `if`.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(bool_constant_test_register);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Release bool_constant_test_register.
PopSystemTemp();
type = ParsedExecInstruction::Type::kConditional;
} else if (instr.type == ParsedJumpInstruction::Type::kPredicated) {
// Called outside of exec - need to check the predicate explicitly.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
type = ParsedExecInstruction::Type::kPredicated;
} else {
type = ParsedExecInstruction::Type::kUnconditional;
}
UpdateExecConditionals(type, instr.bool_constant_index, instr.condition);
// UpdateExecConditionals may not necessarily close the instruction-level
// predicate check (it's not necessary if the execs are merged), but here the
// instruction itself is on the flow control level, so the predicate check is
// on the flow control level too.
CloseInstructionPredication();
JumpToLabel(instr.target_address);
if (instr.type == ParsedJumpInstruction::Type::kConditional ||
instr.type == ParsedJumpInstruction::Type::kPredicated) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
}
void DxbcShaderTranslator::ProcessVertexFetchInstruction(
@ -7393,7 +7388,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
}
uint32_t result_write_mask = (1 << result_component_count) - 1;
CheckPredicate(instr.is_predicated, instr.predicate_condition);
UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition);
// Convert the index to an integer.
DxbcSourceOperand index_operand;
@ -8143,7 +8138,69 @@ void DxbcShaderTranslator::ArrayCoordToCubeDirection(uint32_t reg) {
void DxbcShaderTranslator::ProcessTextureFetchInstruction(
const ParsedTextureFetchInstruction& instr) {
CheckPredicate(instr.is_predicated, instr.predicate_condition);
// Predication should not affect derivative calculation:
// https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx9-graphics-reference-asm-ps-registers-output-color
// Do the part involving derivative calculation unconditionally, and re-enter
// the predicate check before writing the result.
bool suppress_predication = false;
if (IsDXBCPixelShader()) {
if (instr.opcode == FetchOpcode::kGetTextureComputedLod ||
instr.opcode == FetchOpcode::kGetTextureGradients) {
suppress_predication = true;
} else if (instr.opcode == FetchOpcode::kTextureFetch) {
suppress_predication = instr.attributes.use_computed_lod &&
!instr.attributes.use_register_lod;
}
}
uint32_t exec_p0_temp = UINT32_MAX;
if (suppress_predication) {
// Close instruction-level predication.
CloseInstructionPredication();
// Temporarily close exec-level predication - will reopen at the end, so not
// changing cf_exec_predicated_.
if (cf_exec_predicated_) {
if (cf_exec_predicate_written_) {
// Restore the predicate value in the beginning of the exec and put it
// in exec_p0_temp.
exec_p0_temp = PushSystemTemp();
// `if` case - the value was cf_exec_predicate_condition_.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(exec_p0_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(cf_exec_predicate_condition_ ? 0xFFFFFFFFu : 0u);
++stat_.instruction_count;
++stat_.mov_instruction_count;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
// `else` case - the value was !cf_exec_predicate_condition_.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(exec_p0_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(cf_exec_predicate_condition_ ? 0u : 0xFFFFFFFFu);
++stat_.instruction_count;
++stat_.mov_instruction_count;
}
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
} else {
UpdateInstructionPredication(instr.is_predicated,
instr.predicate_condition);
}
bool store_result = false;
// Whether the result is only in X and all components should be remapped to X
@ -9401,6 +9458,31 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
UnloadDxbcSourceOperand(operand);
}
// Re-enter conditional execution if closed it.
if (suppress_predication) {
// Re-enter exec-level predication.
if (cf_exec_predicated_) {
D3D10_SB_INSTRUCTION_TEST_BOOLEAN test =
cf_exec_predicate_condition_ ? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, exec_p0_temp != UINT32_MAX ? 0 : 2, 1));
shader_code_.push_back(
exec_p0_temp != UINT32_MAX ? exec_p0_temp : system_temp_ps_pc_p0_a0_);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
if (exec_p0_temp != UINT32_MAX) {
PopSystemTemp();
}
}
// Update instruction-level predication to the one needed by this tfetch.
UpdateInstructionPredication(instr.is_predicated,
instr.predicate_condition);
}
if (store_result) {
StoreResult(instr.result, system_temp_pv_, replicate_result);
}
@ -9408,10 +9490,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
void DxbcShaderTranslator::ProcessVectorAluInstruction(
const ParsedAluInstruction& instr) {
CheckPredicate(instr.is_predicated, instr.predicate_condition);
UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition);
// Whether the instruction has changed the predicate and it needs to be
// checked again.
bool close_predicate = false;
// checked again later.
bool predicate_written = false;
// Whether the result is only in X and all components should be remapped to X
// while storing.
@ -10237,7 +10319,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
case AluVectorOpcode::kSetpNePush:
case AluVectorOpcode::kSetpGtPush:
case AluVectorOpcode::kSetpGePush:
close_predicate = true;
predicate_written = true;
replicate_result = true;
// pv.xy = (src0.x == 0.0, src0.w == 0.0)
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
@ -10631,17 +10713,18 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
StoreResult(instr.result, system_temp_pv_, replicate_result);
if (close_predicate) {
ClosePredicate();
if (predicate_written) {
cf_exec_predicate_written_ = true;
CloseInstructionPredication();
}
}
void DxbcShaderTranslator::ProcessScalarAluInstruction(
const ParsedAluInstruction& instr) {
CheckPredicate(instr.is_predicated, instr.predicate_condition);
UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition);
// Whether the instruction has changed the predicate and it needs to be
// checked again.
bool close_predicate = false;
// checked again later.
bool predicate_written = false;
DxbcSourceOperand dxbc_operands[3];
uint32_t operand_lengths[3];
@ -11328,7 +11411,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
case AluScalarOpcode::kSetpNe:
case AluScalarOpcode::kSetpGt:
case AluScalarOpcode::kSetpGe:
close_predicate = true;
predicate_written = true;
// Set p0 to whether the comparison with zero passes.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
@ -11369,7 +11452,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
break;
case AluScalarOpcode::kSetpInv:
close_predicate = true;
predicate_written = true;
// Compare src0 to 0.0 (taking denormals into account, for instance) to
// know what to set ps to in case src0 is not 1.0.
shader_code_.push_back(
@ -11434,7 +11517,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
break;
case AluScalarOpcode::kSetpPop:
close_predicate = true;
predicate_written = true;
// ps = src0 - 1.0
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
@ -11480,7 +11563,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
break;
case AluScalarOpcode::kSetpClr:
close_predicate = true;
predicate_written = true;
// ps = FLT_MAX
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
@ -11506,7 +11589,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
break;
case AluScalarOpcode::kSetpRstr:
close_predicate = true;
predicate_written = true;
// Copy src0 to ps.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
@ -11720,8 +11803,9 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
StoreResult(instr.result, system_temp_ps_pc_p0_a0_, true);
if (close_predicate) {
ClosePredicate();
if (predicate_written) {
cf_exec_predicate_written_ = true;
CloseInstructionPredication();
}
}

View File

@ -849,29 +849,40 @@ class DxbcShaderTranslator : public ShaderTranslator {
// The nesting of `if` instructions is the following:
// - pc checks (labels).
// - Bool constant checks (can only be done by exec).
// - Predicate checks (can be done both by exec and by instructions).
// It's probably fine to place instruction predicate checks and exec predicate
// on the same level rather than creating another level for instruction-level
// predicates, because (at least in Halo 3), in a `(p0) exec`, all
// instructions are `(p0)`, and `setp` isn't invoked in `(p0) exec`. Another
// possible constraint making things easier is labels not appearing within
// execs - so a label doesn't have to recheck the exec's condition.
// TODO(Triang3l): Check if these control flow constrains are true for all
// games.
// - exec predicate/bool constant check.
// - Instruction-level predicate checks.
// As an optimization, where possible, the DXBC translator tries to merge
// multiple execs into one, not creating endif/if doing nothing, if the
// execution condition is the same. This can't be done across labels
// (obviously) and in case `setp` is done in a predicated exec - in this case,
// the predicate value in the current exec may not match the predicate value
// in the next exec.
// Instruction-level predicate checks are also merged, and until a `setp` is
// done, if the instruction has the same predicate condition as the exec it is
// in, no instruction-level predicate `if` is created as well. One exception
// to the usual way of instruction-level predicate handling is made for
// instructions involving derivative computation, such as texture fetches with
// computed LOD. The part involving derivatives is executed disregarding the
// predication, but the result storing is predicated (this is handled in
// texture fetch instruction implementation):
// https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx9-graphics-reference-asm-ps-registers-output-color
// Closes the current predicate `if` (but doesn't reset the current exec's
// predicate).
void ClosePredicate();
// Updates the current predicate, placing if/endif when needed. This MUST be
// called before emitting any instructions within an exec because the exec
// implementation here doesn't place if/endif, only defers updating the
// predicate.
void CheckPredicate(bool instruction_predicated,
bool instruction_predicate_condition);
// Opens or closes the `if` checking the value of a bool constant - call with
// kCfExecBoolConstantNone to force close.
void SetExecBoolConstant(uint32_t index, bool condition);
// Updates the current flow control condition (to be called in the beginning
// of exec and in jumps), closing the previous conditionals if needed.
// However, if the condition is not different, the instruction-level predicate
// `if` also won't be closed - this must be checked separately if needed (for
// example, in jumps).
void UpdateExecConditionals(ParsedExecInstruction::Type type,
uint32_t bool_constant_index, bool condition);
// Closes `if`s opened by exec and instructions within them (but not by
// labels) and updates the state accordingly.
void CloseExecConditionals();
// Opens or reopens the predicate check conditional for the instruction.
void UpdateInstructionPredication(bool predicated, bool condition);
// Closes the instruction-level predicate `if` if it's open, useful if a flow
// control instruction needs to do some code which needs to respect the exec's
// conditional, but can't itself be predicated.
void CloseInstructionPredication();
void JumpToLabel(uint32_t address);
// Emits copde for endian swapping of the data located in pv.
@ -1030,19 +1041,29 @@ class DxbcShaderTranslator : public ShaderTranslator {
// for accuracy.
uint32_t system_temp_depth_;
// Whether a predicate `if` is open.
bool cf_currently_predicated_;
// Currently expected predicate value.
bool cf_current_predicate_condition_;
// Whether the current `exec` is predicated.
bool cf_exec_predicated_;
// Predicate condition in the current `exec`.
bool cf_exec_predicate_condition_;
// The bool constant number containing the condition for the current `exec`.
// The bool constant number containing the condition for the currently
// processed exec (or the last - unless a label has reset this), or
// kCfExecBoolConstantNone if it's not checked.
uint32_t cf_exec_bool_constant_;
static constexpr uint32_t kCfExecBoolConstantNone = UINT32_MAX;
// The expected value in the current conditional exec.
// The expected bool constant value in the current exec if
// cf_exec_bool_constant_ is not kCfExecBoolConstantNone.
bool cf_exec_bool_constant_condition_;
// Whether the currently processed exec is executed if a predicate is
// set/unset.
bool cf_exec_predicated_;
// The expected predicated condition if cf_exec_predicated_ is true.
bool cf_exec_predicate_condition_;
// Whether an `if` for instruction-level predicate check is currently open.
bool cf_instruction_predicate_if_open_;
// The expected predicate condition for the current or the last instruction if
// cf_exec_instruction_predicated_ is true.
bool cf_instruction_predicate_condition_;
// Whether there was a `setp` in the current exec before the current
// instruction, thus instruction-level predicate value can be different than
// the exec-level predicate value, and can't merge two execs with the same
// predicate condition anymore.
bool cf_exec_predicate_written_;
bool writes_depth_;