[D3D12] DXBC exec and predicate

This commit is contained in:
Triang3l 2018-09-10 21:35:12 +03:00
parent b0fdab1754
commit 20e607a35c
2 changed files with 206 additions and 16 deletions

View File

@ -69,6 +69,9 @@ void DxbcShaderTranslator::Reset() {
rdef_constants_used_ = 0; rdef_constants_used_ = 0;
system_temp_count_current_ = 0; system_temp_count_current_ = 0;
system_temp_count_max_ = 0; system_temp_count_max_ = 0;
cf_currently_predicated_ = false;
cf_exec_predicated_ = false;
cf_exec_bool_constant_ = kCfExecBoolConstantNone;
writes_depth_ = false; writes_depth_ = false;
texture_srvs_.clear(); texture_srvs_.clear();
sampler_bindings_.clear(); sampler_bindings_.clear();
@ -1707,6 +1710,105 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
} }
} }
void DxbcShaderTranslator::ClosePredicate() {
if (cf_currently_predicated_) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
cf_currently_predicated_ = false;
}
}
void DxbcShaderTranslator::CheckPredicate(
bool instruction_predicated, bool instruction_predicate_condition) {
// If the instruction doesn't have its own predicate check, inherit it from
// the exec.
if (!instruction_predicated) {
instruction_predicated = cf_exec_predicated_;
instruction_predicate_condition = cf_exec_predicate_condition_;
}
// Close the current predicate if the conditions don't match or not predicated
// anymore.
if (cf_currently_predicated_ &&
(!instruction_predicated ||
cf_current_predicate_condition_ != instruction_predicate_condition)) {
ClosePredicate();
}
// Open a new predicate if predicated now, but the conditions don't match (or
// the previous instruction wasn't predicated).
if (instruction_predicated &&
(!cf_currently_predicated_ ||
cf_current_predicate_condition_ != instruction_predicate_condition)) {
D3D10_SB_INSTRUCTION_TEST_BOOLEAN test =
instruction_predicate_condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
cf_currently_predicated_ = true;
cf_current_predicate_condition_ = instruction_predicate_condition;
}
}
void DxbcShaderTranslator::SetExecBoolConstant(uint32_t index, bool condition) {
if (cf_exec_bool_constant_ == index &&
(index == kCfExecBoolConstantNone ||
cf_exec_bool_constant_condition_ == condition)) {
return;
}
if (cf_exec_bool_constant_ != kCfExecBoolConstantNone) {
// Predicates are checked deeper than the bool constant.
ClosePredicate();
// Close the current `if`.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
cf_exec_bool_constant_ = kCfExecBoolConstantNone;
}
if (index != kCfExecBoolConstantNone) {
uint32_t bool_constant_test_register = PushSystemTemp();
// Check the bool constant's value.
rdef_constants_used_ |= 1ull << uint32_t(RdefConstantIndex::kBoolConstants);
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(bool_constant_test_register);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
shader_code_.push_back(
uint32_t(RdefConstantBufferIndex::kBoolLoopConstants));
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_code_.push_back(index >> 5);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1u << (index & 31));
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Open the new `if`.
D3D10_SB_INSTRUCTION_TEST_BOOLEAN test =
condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(bool_constant_test_register);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Release bool_constant_test_register.
PopSystemTemp();
cf_exec_bool_constant_ = index;
cf_exec_bool_constant_condition_ = condition;
}
}
void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index, void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
uint32_t write_mask) { uint32_t write_mask) {
// Allocate temporary registers for intermediate values. // Allocate temporary registers for intermediate values.
@ -1960,10 +2062,21 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) {
// 0 already added in the beginning. // 0 already added in the beginning.
return; return;
} }
// Force close all `if`s on the levels below for safety (they should be closed
// anyway, but what if).
// TODO(Triang3l): See if that's enough. At least in Halo 3, labels are only
// placed between different `exec`s - however, if in some game they can be
// located within `exec`s, this would require restoring all those `if`s after
// the label.
ClosePredicate();
SetExecBoolConstant(kCfExecBoolConstantNone, false);
// Close the previous label. // Close the previous label.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count; ++stat_.instruction_count;
// pc <= cf_index // pc <= cf_index
uint32_t test_register = PushSystemTemp(); uint32_t test_register = PushSystemTemp();
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UGE) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UGE) |
@ -1992,6 +2105,38 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) {
PopSystemTemp(); PopSystemTemp();
} }
void DxbcShaderTranslator::ProcessExecInstructionBegin(
const ParsedExecInstruction& instr) {
// Force close the last `exec` if ProcessExecInstructionEnd was somehow not
// called, just for safety.
ClosePredicate();
cf_exec_predicated_ = false;
SetExecBoolConstant(kCfExecBoolConstantNone, false);
// TODO(Triang3l): Handle PredicateClean=true somehow - still not known how it
// should be done (execs doing setp are marked as PredicateClean=false,
// however it's very unlikely that PredicateClean=true means clean the
// predicate after the exec - shaders in Halo 3 have sequences of (p0) exec
// without setp in them and without PredicateClean=false, if it was actually
// cleaned after exec, all but the first would never be executed. Let's just
// ignore them for now.
if (instr.type == ParsedExecInstruction::Type::kConditional) {
SetExecBoolConstant(instr.bool_constant_index, instr.condition);
} else if (instr.type == ParsedExecInstruction::Type::kPredicated) {
// The predicate will actually be checked by the next ALU/fetch instruction.
cf_exec_predicated_ = true;
cf_exec_predicate_condition_ = instr.condition;
}
}
void DxbcShaderTranslator::ProcessExecInstructionEnd(
const ParsedExecInstruction& instr) {
ClosePredicate();
cf_exec_predicated_ = false;
SetExecBoolConstant(kCfExecBoolConstantNone, false);
}
void DxbcShaderTranslator::ProcessVertexFetchInstruction( void DxbcShaderTranslator::ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) { const ParsedVertexFetchInstruction& instr) {
if (instr.operand_count < 2 || if (instr.operand_count < 2 ||
@ -2040,7 +2185,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
} }
uint32_t result_write_mask = (1 << result_component_count) - 1; uint32_t result_write_mask = (1 << result_component_count) - 1;
// TODO(Triang3l): Predicate. CheckPredicate(instr.is_predicated, instr.predicate_condition);
// Convert the index to an integer. // Convert the index to an integer.
DxbcSourceOperand index_operand; DxbcSourceOperand index_operand;
@ -2408,7 +2553,7 @@ uint32_t DxbcShaderTranslator::FindOrAddTextureSRV(uint32_t fetch_constant,
void DxbcShaderTranslator::ProcessTextureFetchInstruction( void DxbcShaderTranslator::ProcessTextureFetchInstruction(
const ParsedTextureFetchInstruction& instr) { const ParsedTextureFetchInstruction& instr) {
// TODO(Triang3l): Predicate. CheckPredicate(instr.is_predicated, instr.predicate_condition);
bool store_result = false; bool store_result = false;
@ -2492,11 +2637,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
void DxbcShaderTranslator::ProcessVectorAluInstruction( void DxbcShaderTranslator::ProcessVectorAluInstruction(
const ParsedAluInstruction& instr) { const ParsedAluInstruction& instr) {
// TODO(Triang3l): Predicate. CheckPredicate(instr.is_predicated, instr.predicate_condition);
// Whether the instruction has changed the predicate and it needs to be // Whether the instruction has changed the predicate and it needs to be
// checked again. // checked again.
bool close_predicate_block = false; bool close_predicate = false;
// Whether the result is only in X and all components should be remapped to X // Whether the result is only in X and all components should be remapped to X
// while storing. // while storing.
@ -3068,7 +3212,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
case AluVectorOpcode::kSetpNePush: case AluVectorOpcode::kSetpNePush:
case AluVectorOpcode::kSetpGtPush: case AluVectorOpcode::kSetpGtPush:
case AluVectorOpcode::kSetpGePush: case AluVectorOpcode::kSetpGePush:
close_predicate_block = true; close_predicate = true;
replicate_result = true; replicate_result = true;
// pv.xy = (src0.x == 0.0, src0.w == 0.0) // pv.xy = (src0.x == 0.0, src0.w == 0.0)
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
@ -3395,16 +3539,17 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
StoreResult(instr.result, system_temp_pv_, replicate_result); StoreResult(instr.result, system_temp_pv_, replicate_result);
// TODO(Triang3l): Close predicate check. if (close_predicate) {
ClosePredicate();
}
} }
void DxbcShaderTranslator::ProcessScalarAluInstruction( void DxbcShaderTranslator::ProcessScalarAluInstruction(
const ParsedAluInstruction& instr) { const ParsedAluInstruction& instr) {
// TODO(Triang3l): Predicate. CheckPredicate(instr.is_predicated, instr.predicate_condition);
// Whether the instruction has changed the predicate and it needs to be // Whether the instruction has changed the predicate and it needs to be
// checked again. // checked again.
bool close_predicate_block = false; bool close_predicate = false;
DxbcSourceOperand dxbc_operands[3]; DxbcSourceOperand dxbc_operands[3];
uint32_t operand_lengths[3]; uint32_t operand_lengths[3];
@ -3952,7 +4097,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
case AluScalarOpcode::kSetpNe: case AluScalarOpcode::kSetpNe:
case AluScalarOpcode::kSetpGt: case AluScalarOpcode::kSetpGt:
case AluScalarOpcode::kSetpGe: case AluScalarOpcode::kSetpGe:
close_predicate_block = true; close_predicate = true;
// Set p0 to whether the comparison with zero passes. // Set p0 to whether the comparison with zero passes.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE( ENCODE_D3D10_SB_OPCODE_TYPE(
@ -3993,7 +4138,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
break; break;
case AluScalarOpcode::kSetpInv: case AluScalarOpcode::kSetpInv:
close_predicate_block = true; close_predicate = true;
// Compare src0 to 0.0 (taking denormals into account, for instance) to // Compare src0 to 0.0 (taking denormals into account, for instance) to
// know what to set ps to in case src0 is not 1.0. // know what to set ps to in case src0 is not 1.0.
shader_code_.push_back( shader_code_.push_back(
@ -4058,7 +4203,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
break; break;
case AluScalarOpcode::kSetpPop: case AluScalarOpcode::kSetpPop:
close_predicate_block = true; close_predicate = true;
// ps = src0 - 1.0 // ps = src0 - 1.0
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
@ -4104,7 +4249,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
break; break;
case AluScalarOpcode::kSetpClr: case AluScalarOpcode::kSetpClr:
close_predicate_block = true; close_predicate = true;
// ps = FLT_MAX // ps = FLT_MAX
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
@ -4130,7 +4275,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
break; break;
case AluScalarOpcode::kSetpRstr: case AluScalarOpcode::kSetpRstr:
close_predicate_block = true; close_predicate = true;
// Copy src0 to ps. // Copy src0 to ps.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
@ -4270,7 +4415,9 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
StoreResult(instr.result, system_temp_ps_pc_p0_a0_, true); StoreResult(instr.result, system_temp_ps_pc_p0_a0_, true);
// TODO(Triang3l): Close predicate check. if (close_predicate) {
ClosePredicate();
}
} }
void DxbcShaderTranslator::ProcessAluInstruction( void DxbcShaderTranslator::ProcessAluInstruction(

View File

@ -95,6 +95,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
void ProcessLabel(uint32_t cf_index) override; void ProcessLabel(uint32_t cf_index) override;
void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override;
void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override;
void ProcessVertexFetchInstruction( void ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) override; const ParsedVertexFetchInstruction& instr) override;
void ProcessTextureFetchInstruction( void ProcessTextureFetchInstruction(
@ -300,6 +303,32 @@ class DxbcShaderTranslator : public ShaderTranslator {
void StoreResult(const InstructionResult& result, uint32_t reg, void StoreResult(const InstructionResult& result, uint32_t reg,
bool replicate_x); bool replicate_x);
// The nesting of `if` instructions is the following:
// - pc checks (labels).
// - Bool constant checks (can only be done by exec).
// - Predicate checks (can be done both by exec and by instructions).
// It's probably fine to place instruction predicate checks and exec predicate
// on the same level rather than creating another level for instruction-level
// predicates, because (at least in Halo 3), in a `(p0) exec`, all
// instructions are `(p0)`, and `setp` isn't invoked in `(p0) exec`. Another
// possible constraint making things easier is labels not appearing within
// execs - so a label doesn't have to recheck the exec's condition.
// TODO(Triang3l): Check if these control flow constrains are true for all
// games.
// Closes the current predicate `if` (but doesn't reset the current exec's
// predicate).
void ClosePredicate();
// Updates the current predicate, placing if/endif when needed. This MUST be
// called before emitting any translated instructions because the exec
// implementation here doesn't place if/endif, only defers updating the
// predicate.
void CheckPredicate(bool instruction_predicated,
bool instruction_predicate_condition);
// Opens or closes the `if` checking the value of a bool constant - call with
// kCfExecBoolConstantNone to force close.
void SetExecBoolConstant(uint32_t index, bool condition);
// Emits copde for endian swapping of the data located in pv. // Emits copde for endian swapping of the data located in pv.
void SwapVertexData(uint32_t vfetch_index, uint32_t write_mask); void SwapVertexData(uint32_t vfetch_index, uint32_t write_mask);
@ -470,6 +499,20 @@ class DxbcShaderTranslator : public ShaderTranslator {
// remapping). // remapping).
uint32_t system_temp_color_[4]; uint32_t system_temp_color_[4];
// Whether a predicate `if` is open.
bool cf_currently_predicated_;
// Currently expected predicate value.
bool cf_current_predicate_condition_;
// Whether the current `exec` is predicated.
bool cf_exec_predicated_;
// Predicate condition in the current `exec`.
bool cf_exec_predicate_condition_;
// The bool constant number containing the condition for the current `exec`.
uint32_t cf_exec_bool_constant_;
static constexpr uint32_t kCfExecBoolConstantNone = UINT32_MAX;
// The expected value in the current conditional exec.
bool cf_exec_bool_constant_condition_;
bool writes_depth_; bool writes_depth_;
std::vector<TextureSRV> texture_srvs_; std::vector<TextureSRV> texture_srvs_;