[D3D12] DXBC: Slightly shorten SM3 mul behavior emulation

This commit is contained in:
Triang3l 2018-12-08 16:42:30 +03:00
parent 9427667a27
commit e1f0e9c84a
1 changed files with 163 additions and 252 deletions

View File

@ -4489,27 +4489,30 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
++stat_.movc_instruction_count; ++stat_.movc_instruction_count;
} }
// Multiply the colors by the factors. // Multiply the colors by the factors, with 0 * Infinity = 0 behavior.
for (uint32_t i = 0; i < 2; ++i) { for (uint32_t i = 0; i < 2; ++i) {
uint32_t factor_temp = uint32_t factor_temp =
i ? dest_factor_and_minmax_temp : src_factor_and_result_temp; i ? dest_factor_and_minmax_temp : src_factor_and_result_temp;
uint32_t color_temp = i ? dest_color_temp : src_color_and_output_temp; uint32_t color_temp = i ? dest_color_temp : src_color_and_output_temp;
// Check if the factor is zero to zero the result later. // Get the multiplicand closer to zero to check if any of them is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(factor_calculation_temp); shader_code_.push_back(factor_calculation_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1) |
shader_code_.push_back(factor_temp); ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_ABS));
shader_code_.push_back(color_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1) |
shader_code_.push_back(0); ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(0); shader_code_.push_back(ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
shader_code_.push_back(0); D3D10_SB_OPERAND_MODIFIER_ABS));
shader_code_.push_back(0); shader_code_.push_back(factor_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
@ -4528,28 +4531,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Zero the result if the factor was zero (and the color could be Infinity). // Check if the color or the factor is zero to zero the result (min isn't
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | // required to flush denormals in the result).
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(factor_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(factor_calculation_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(factor_temp);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Check if the color is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back( shader_code_.push_back(
@ -4557,7 +4540,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
shader_code_.push_back(factor_calculation_temp); shader_code_.push_back(factor_calculation_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(color_temp); shader_code_.push_back(factor_calculation_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0); shader_code_.push_back(0);
@ -4567,7 +4550,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Zero the result if the color was zero (and the factor could be Infinity). // Zero the result if the color or the factor is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back( shader_code_.push_back(
@ -10948,27 +10931,27 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
// image missing because rcp(0) is multiplied by 0, which results in NaN // image missing because rcp(0) is multiplied by 0, which results in NaN
// rather than 0. // rather than 0.
uint32_t is_subnormal_temp = PushSystemTemp(); uint32_t is_subnormal_temp = PushSystemTemp();
// Check the first operand. // Get the non-NaN multiplicand closer to zero to check if any of them is
// zero.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + DxbcSourceOperandLength(dxbc_operands[0], false, true) +
DxbcSourceOperandLength(dxbc_operands[1], false, true)));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 4, false, true);
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 4, false, true);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Check if any multiplicand is zero (min isn't required to flush
// denormals in the result).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
8 + operand_length_sums[0]));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(is_subnormal_temp); shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[0]);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.float_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(is_subnormal_temp); shader_code_.push_back(is_subnormal_temp);
@ -10978,28 +10961,9 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
shader_code_.push_back(0); shader_code_.push_back(0);
shader_code_.push_back(0); shader_code_.push_back(0);
shader_code_.push_back(0); shader_code_.push_back(0);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Check the second operand.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
8 + DxbcSourceOperandLength(dxbc_operands[1])));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[1]);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Zero the result if any multiplicand is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back( shader_code_.push_back(
@ -11093,47 +11057,30 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
// If any operand is zero or denormalized, just leave the addition part. // If any operand is zero or denormalized, just leave the addition part.
uint32_t is_subnormal_temp = PushSystemTemp(); uint32_t is_subnormal_temp = PushSystemTemp();
// Check the first operand. // Get the non-NaN multiplicand closer to zero to check if any of them is
// zero.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + DxbcSourceOperandLength(dxbc_operands[0], false, true) +
DxbcSourceOperandLength(dxbc_operands[1], false, true)));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 4, false, true);
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 4, false, true);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Check if any multiplicand is zero (min isn't required to flush
// denormals in the result).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
8 + operand_length_sums[0]));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(is_subnormal_temp); shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[0]); shader_code_.push_back(EncodeVectorSwizzledOperand(
shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.float_instruction_count;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
7 + DxbcSourceOperandLength(dxbc_operands[2])));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[2]);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Check the second operand.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
8 + DxbcSourceOperandLength(dxbc_operands[1])));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[1]);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0); shader_code_.push_back(0);
@ -11142,6 +11089,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
shader_code_.push_back(0); shader_code_.push_back(0);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Zero the multiplication part if any multiplicand is zero.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -11881,52 +11829,6 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.mov_instruction_count; ++stat_.mov_instruction_count;
// pv.y = src0.y * src1.y // pv.y = src0.y * src1.y
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
// This is an attenuation calculation function, so infinity is probably
// not very unlikely.
uint32_t is_subnormal_temp = PushSystemTemp();
// Check if src0.y is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
5 + operand_length_sums[0]));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[0]);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Check if src1.y is zero.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
5 + DxbcSourceOperandLength(dxbc_operands[1])));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[1]);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Check if any operand is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(is_subnormal_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Do the multiplication.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + operand_length_sums[1])); 3 + operand_length_sums[1]));
@ -11937,7 +11839,40 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1); UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Set pv.y to zero if any operand is zero. // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
// This is an attenuation calculation function, so infinity is probably
// not very unlikely.
uint32_t is_subnormal_temp = PushSystemTemp();
// Get the non-NaN multiplicand closer to zero to check if any of them is
// zero.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + DxbcSourceOperandLength(dxbc_operands[0], false, true) +
DxbcSourceOperandLength(dxbc_operands[1], false, true)));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1, false, true);
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1, false, true);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Check if any multiplicand is zero (min isn't required to flush
// denormals in the result).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Set pv.y to zero if any multiplicand is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back( shader_code_.push_back(
@ -12199,7 +12134,6 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
break; break;
case AluScalarOpcode::kMuls: { case AluScalarOpcode::kMuls: {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + 2 * operand_lengths[0])); 3 + 2 * operand_lengths[0]));
@ -12210,24 +12144,24 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1); UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Check if the operands are zero or denormalized. // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
uint32_t is_subnormal_temp = PushSystemTemp(); uint32_t is_subnormal_temp = PushSystemTemp();
// Get the non-NaN multiplicand closer to zero to check if any of them is
// zero.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8 + operand_lengths[0])); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + 2 * DxbcSourceOperandLength(dxbc_operands[0], false, true)));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp); shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[0]); UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0, false, true);
shader_code_.push_back(EncodeVectorSwizzledOperand( UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1, false, true);
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | // Check if any multiplicand is zero (min isn't required to flush
// denormals in the result).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
@ -12236,11 +12170,11 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(is_subnormal_temp); shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(is_subnormal_temp); shader_code_.push_back(0);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.float_instruction_count;
// Set the result to zero if any operand is zero. // Zero the result if any multiplicand is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back( shader_code_.push_back(
@ -12264,47 +12198,21 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
case AluScalarOpcode::kMulsPrev: { case AluScalarOpcode::kMulsPrev: {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0). // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
uint32_t is_subnormal_temp = PushSystemTemp(); uint32_t is_subnormal_temp = PushSystemTemp();
// Check if the first operand (src0.x) is zero. // Get the non-NaN multiplicand closer to zero to check if any of them is
// zero.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5 + operand_lengths[0])); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
5 + DxbcSourceOperandLength(dxbc_operands[0], false, true)));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp); shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0); UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0, false, true);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Check if the second operand (ps) is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_); shader_code_.push_back(system_temp_ps_pc_p0_a0_);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Check if any operand is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(is_subnormal_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Do the multiplication. // Do the multiplication.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
@ -12318,7 +12226,22 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
shader_code_.push_back(system_temp_ps_pc_p0_a0_); shader_code_.push_back(system_temp_ps_pc_p0_a0_);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Set the result to zero if any operand is zero. // Check if any multiplicand is zero (min isn't required to flush
// denormals in the result).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Zero the result if any multiplicand is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back( shader_code_.push_back(
@ -13044,49 +12967,6 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
case AluScalarOpcode::kMulsc0: case AluScalarOpcode::kMulsc0:
case AluScalarOpcode::kMulsc1: { case AluScalarOpcode::kMulsc1: {
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
uint32_t is_subnormal_temp = PushSystemTemp();
// Check if the first operand (src0.x) is zero.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5 + operand_lengths[0]));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Check if the second operand (src1.x) is zero.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5 + operand_lengths[1]));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Check if any operand is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(is_subnormal_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Do the multiplication.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + operand_lengths[0] + operand_lengths[1])); 3 + operand_lengths[0] + operand_lengths[1]));
@ -13097,7 +12977,38 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0); UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
// Set the result to zero if any operand is zero. // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
uint32_t is_subnormal_temp = PushSystemTemp();
// Get the non-NaN multiplicand closer to zero to check if any of them is
// zero.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + DxbcSourceOperandLength(dxbc_operands[0], false, true) +
DxbcSourceOperandLength(dxbc_operands[1], false, true)));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp);
UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0, false, true);
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0, false, true);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Check if any multiplicand is zero (min isn't required to flush
// denormals in the result).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(is_subnormal_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Zero the result if any multiplicand is zero.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back( shader_code_.push_back(