[D3D12] DXBC: Handle zeros in dot product and add missing instruction count increments

This commit is contained in:
Triang3l 2018-09-18 15:27:34 +03:00
parent 41d461d3a5
commit 836b4eef27
1 changed files with 114 additions and 19 deletions

View File

@ -4828,9 +4828,6 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
// TODO(Triang3l): Implement Shader Model 3 behavior of min/max - return the // TODO(Triang3l): Implement Shader Model 3 behavior of min/max - return the
// non-NaN operand if any is NaN. // non-NaN operand if any is NaN.
case AluVectorOpcode::kMin: case AluVectorOpcode::kMin:
case AluVectorOpcode::kDp4:
case AluVectorOpcode::kDp3:
// dp4 and dp3 replicate the result implicitly.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE( shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(
kCoreOpcodes[uint32_t(instr.vector_opcode)]) | kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -4931,6 +4928,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(system_temp_pv_);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Release is_subnormal_temp. // Release is_subnormal_temp.
PopSystemTemp(); PopSystemTemp();
} break; } break;
@ -5120,33 +5119,127 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
++stat_.movc_instruction_count; ++stat_.movc_instruction_count;
break; break;
case AluVectorOpcode::kDp2Add: case AluVectorOpcode::kDp4:
// (dot(src0.xy, src1.xy) + src2.x).xxxx case AluVectorOpcode::kDp3:
replicate_result = true; case AluVectorOpcode::kDp2Add: {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DP2) | uint32_t operand_mask;
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( if (instr.vector_opcode == AluVectorOpcode::kDp2Add) {
3 + operand_length_sums[1])); operand_mask = 0b0011;
} else if (instr.vector_opcode == AluVectorOpcode::kDp3) {
operand_mask = 0b0111;
} else {
operand_mask = 0b1111;
}
// Load the operands into pv and a temp register, zeroing if the other
// operand is zero or denormalized, reproducing the Shader Model 3
// multiplication behavior (0 * anything = 0).
uint32_t src1_temp = PushSystemTemp();
// Load the first operand into pv.
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_LT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
8 + DxbcSourceOperandLength(dxbc_operands[1], false, true)));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, operand_mask, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(system_temp_pv_);
UseDxbcSourceOperand(dxbc_operands[0]); UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 4, false, true);
UseDxbcSourceOperand(dxbc_operands[1]); shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 1));
shader_code_.push_back(0x00800000);
shader_code_.push_back(0x00800000);
shader_code_.push_back(0x00800000);
shader_code_.push_back(0x00800000);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
10 + operand_length_sums[0]));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, operand_mask, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 1));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
UseDxbcSourceOperand(dxbc_operands[0]);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Load the second operand into src1_temp.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_LT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
8 + DxbcSourceOperandLength(dxbc_operands[0], false, true)));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, operand_mask, 1));
shader_code_.push_back(src1_temp);
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 4, false, true);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 1));
shader_code_.push_back(0x00800000);
shader_code_.push_back(0x00800000);
shader_code_.push_back(0x00800000);
shader_code_.push_back(0x00800000);
++stat_.instruction_count;
++stat_.float_instruction_count;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
10 + DxbcSourceOperandLength(dxbc_operands[1])));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, operand_mask, 1));
shader_code_.push_back(src1_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(src1_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 1));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
UseDxbcSourceOperand(dxbc_operands[1]);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Calculate the dot product.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(src1_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Release src1_temp.
PopSystemTemp();
// Add src2.x for dp2add.
if (instr.vector_opcode == AluVectorOpcode::kDp2Add) {
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
5 + DxbcSourceOperandLength(dxbc_operands[2]))); 5 + DxbcSourceOperandLength(dxbc_operands[2])));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(system_temp_pv_);
shader_code_.push_back( shader_code_.push_back(EncodeVectorSwizzledOperand(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(system_temp_pv_);
UseDxbcSourceOperand(dxbc_operands[2], kSwizzleXYZW, 0); UseDxbcSourceOperand(dxbc_operands[2], kSwizzleXXXX);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
}
break; break;
}
case AluVectorOpcode::kCube: { case AluVectorOpcode::kCube: {
// 3D cubemap direction -> (T, S, 2.0 * major axis, face ID). // 3D cubemap direction -> (T, S, 2.0 * major axis, face ID).
@ -5467,6 +5560,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
shader_code_.push_back(ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER( shader_code_.push_back(ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_ABSNEG)); D3D10_SB_OPERAND_MODIFIER_ABSNEG));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(system_temp_pv_);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Release cube_mask_temp. // Release cube_mask_temp.
PopSystemTemp(); PopSystemTemp();