[D3D12] DXBC: Handle zeros in dot product and add missing instruction count increments
This commit is contained in:
parent
41d461d3a5
commit
836b4eef27
|
@ -4828,9 +4828,6 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
// TODO(Triang3l): Implement Shader Model 3 behavior of min/max - return the
|
// TODO(Triang3l): Implement Shader Model 3 behavior of min/max - return the
|
||||||
// non-NaN operand if any is NaN.
|
// non-NaN operand if any is NaN.
|
||||||
case AluVectorOpcode::kMin:
|
case AluVectorOpcode::kMin:
|
||||||
case AluVectorOpcode::kDp4:
|
|
||||||
case AluVectorOpcode::kDp3:
|
|
||||||
// dp4 and dp3 replicate the result implicitly.
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||||
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
|
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
@ -4931,6 +4928,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(system_temp_pv_);
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.movc_instruction_count;
|
||||||
// Release is_subnormal_temp.
|
// Release is_subnormal_temp.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
} break;
|
} break;
|
||||||
|
@ -5120,33 +5119,127 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
++stat_.movc_instruction_count;
|
++stat_.movc_instruction_count;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case AluVectorOpcode::kDp2Add:
|
case AluVectorOpcode::kDp4:
|
||||||
// (dot(src0.xy, src1.xy) + src2.x).xxxx
|
case AluVectorOpcode::kDp3:
|
||||||
replicate_result = true;
|
case AluVectorOpcode::kDp2Add: {
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DP2) |
|
uint32_t operand_mask;
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
if (instr.vector_opcode == AluVectorOpcode::kDp2Add) {
|
||||||
3 + operand_length_sums[1]));
|
operand_mask = 0b0011;
|
||||||
|
} else if (instr.vector_opcode == AluVectorOpcode::kDp3) {
|
||||||
|
operand_mask = 0b0111;
|
||||||
|
} else {
|
||||||
|
operand_mask = 0b1111;
|
||||||
|
}
|
||||||
|
// Load the operands into pv and a temp register, zeroing if the other
|
||||||
|
// operand is zero or denormalized, reproducing the Shader Model 3
|
||||||
|
// multiplication behavior (0 * anything = 0).
|
||||||
|
uint32_t src1_temp = PushSystemTemp();
|
||||||
|
// Load the first operand into pv.
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_LT) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
8 + DxbcSourceOperandLength(dxbc_operands[1], false, true)));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, operand_mask, 1));
|
||||||
shader_code_.push_back(system_temp_pv_);
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 4, false, true);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(0x00800000);
|
||||||
|
shader_code_.push_back(0x00800000);
|
||||||
|
shader_code_.push_back(0x00800000);
|
||||||
|
shader_code_.push_back(0x00800000);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
10 + operand_length_sums[0]));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, operand_mask, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(0);
|
||||||
UseDxbcSourceOperand(dxbc_operands[0]);
|
UseDxbcSourceOperand(dxbc_operands[0]);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.movc_instruction_count;
|
||||||
|
// Load the second operand into src1_temp.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_LT) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
8 + DxbcSourceOperandLength(dxbc_operands[0], false, true)));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, operand_mask, 1));
|
||||||
|
shader_code_.push_back(src1_temp);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 4, false, true);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(0x00800000);
|
||||||
|
shader_code_.push_back(0x00800000);
|
||||||
|
shader_code_.push_back(0x00800000);
|
||||||
|
shader_code_.push_back(0x00800000);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
10 + DxbcSourceOperandLength(dxbc_operands[1])));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, operand_mask, 1));
|
||||||
|
shader_code_.push_back(src1_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(src1_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(0);
|
||||||
UseDxbcSourceOperand(dxbc_operands[1]);
|
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.movc_instruction_count;
|
||||||
|
// Calculate the dot product.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||||
|
kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
|
||||||
5 + DxbcSourceOperandLength(dxbc_operands[2])));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
|
||||||
shader_code_.push_back(system_temp_pv_);
|
shader_code_.push_back(system_temp_pv_);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(system_temp_pv_);
|
shader_code_.push_back(system_temp_pv_);
|
||||||
UseDxbcSourceOperand(dxbc_operands[2], kSwizzleXYZW, 0);
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(src1_temp);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
|
// Release src1_temp.
|
||||||
|
PopSystemTemp();
|
||||||
|
// Add src2.x for dp2add.
|
||||||
|
if (instr.vector_opcode == AluVectorOpcode::kDp2Add) {
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
5 + DxbcSourceOperandLength(dxbc_operands[2])));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[2], kSwizzleXXXX);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case AluVectorOpcode::kCube: {
|
case AluVectorOpcode::kCube: {
|
||||||
// 3D cubemap direction -> (T, S, 2.0 * major axis, face ID).
|
// 3D cubemap direction -> (T, S, 2.0 * major axis, face ID).
|
||||||
|
@ -5467,6 +5560,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
|
shader_code_.push_back(ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
|
||||||
D3D10_SB_OPERAND_MODIFIER_ABSNEG));
|
D3D10_SB_OPERAND_MODIFIER_ABSNEG));
|
||||||
shader_code_.push_back(system_temp_pv_);
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
|
||||||
// Release cube_mask_temp.
|
// Release cube_mask_temp.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
|
|
Loading…
Reference in New Issue