[D3D12] DXBC: Skip loading and some ALU ops for identical operands

2018-12-09 00:20:13 +03:00 · 2018-12-09 00:20:13 +03:00 · 1ee3ed03fd
parent 352a443c67
commit 1ee3ed03fd
2 changed files with 571 additions and 396 deletions
--- a/src/xenia/gpu/dxbc_shader_translator.cc
+++ b/src/xenia/gpu/dxbc_shader_translator.cc
@ -10850,10 +10850,30 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
  // while storing.
  bool replicate_result = false;
  // A small shortcut, operands of cube are the same, but swizzled.
  uint32_t operand_count;
  if (instr.vector_opcode == AluVectorOpcode::kCube) {
    operand_count = 1;
  } else {
    operand_count = uint32_t(instr.operand_count);
  }
  DxbcSourceOperand dxbc_operands[3];
  // Whether the operand is the same as any previous operand, and thus is loaded
  // only once.
  bool operands_duplicate[3] = {};
  uint32_t operand_length_sums[3];
-  for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
+  for (uint32_t i = 0; i < operand_count; ++i) {
-    LoadDxbcSourceOperand(instr.operands[i], dxbc_operands[i]);
+    const InstructionOperand& operand = instr.operands[i];
    for (uint32_t j = 0; j < i; ++j) {
      if (operand == instr.operands[j]) {
        operands_duplicate[i] = true;
        dxbc_operands[i] = dxbc_operands[j];
        break;
      }
    }
    if (!operands_duplicate[i]) {
      LoadDxbcSourceOperand(operand, dxbc_operands[i]);
    }
    operand_length_sums[i] = DxbcSourceOperandLength(dxbc_operands[i]);
    if (i != 0) {
      operand_length_sums[i] += operand_length_sums[i - 1];
@ -10896,12 +10916,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
  switch (instr.vector_opcode) {
    case AluVectorOpcode::kAdd:
-    case AluVectorOpcode::kMax:
+      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
    // max is commonly used as mov, but probably better not to convert it to
    // make sure things like flusing denormals aren't affected.
    case AluVectorOpcode::kMin:
      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(
                                 kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
                             ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
                                 3 + operand_length_sums[1]));
      shader_code_.push_back(
@ -10924,6 +10939,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
      UseDxbcSourceOperand(dxbc_operands[1]);
      ++stat_.instruction_count;
      ++stat_.float_instruction_count;
      if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
        // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0),
        // flushing denormals (must be done using eq - doing bitwise comparison
        // doesn't flush denormals).
@ -10931,8 +10947,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
        // image missing because rcp(0) is multiplied by 0, which results in NaN
        // rather than 0.
        uint32_t is_subnormal_temp = PushSystemTemp();
-      // Get the non-NaN multiplicand closer to zero to check if any of them is
+        // Get the non-NaN multiplicand closer to zero to check if any of them
-      // zero.
+        // is zero.
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -10947,7 +10963,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
        ++stat_.float_instruction_count;
        // Check if any multiplicand is zero (min isn't required to flush
        // denormals in the result).
-      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
+        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
@ -10964,7 +10981,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
        ++stat_.instruction_count;
        ++stat_.float_instruction_count;
        // Zero the result if any multiplicand is zero.
-      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
+        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
@ -10985,8 +11003,39 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
        ++stat_.movc_instruction_count;
        // Release is_subnormal_temp.
        PopSystemTemp();
      }
    } break;
    case AluVectorOpcode::kMax:
    case AluVectorOpcode::kMin:
      // max is commonly used as mov.
      if (operands_duplicate[1]) {
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
                3 + operand_length_sums[0]));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
        shader_code_.push_back(system_temp_pv_);
        UseDxbcSourceOperand(dxbc_operands[0]);
        ++stat_.instruction_count;
        ++stat_.mov_instruction_count;
      } else {
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(
                kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
                3 + operand_length_sums[1]));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
        shader_code_.push_back(system_temp_pv_);
        UseDxbcSourceOperand(dxbc_operands[0]);
        UseDxbcSourceOperand(dxbc_operands[1]);
        ++stat_.instruction_count;
        ++stat_.float_instruction_count;
      }
      break;
    case AluVectorOpcode::kSeq:
    case AluVectorOpcode::kSgt:
    case AluVectorOpcode::kSge:
@ -11054,11 +11103,12 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
      UseDxbcSourceOperand(dxbc_operands[2]);
      ++stat_.instruction_count;
      ++stat_.float_instruction_count;
      if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
        // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
        // If any operand is zero or denormalized, just leave the addition part.
        uint32_t is_subnormal_temp = PushSystemTemp();
-      // Get the non-NaN multiplicand closer to zero to check if any of them is
+        // Get the non-NaN multiplicand closer to zero to check if any of them
-      // zero.
+        // is zero.
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -11073,7 +11123,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
        ++stat_.float_instruction_count;
        // Check if any multiplicand is zero (min isn't required to flush
        // denormals in the result).
-      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
+        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_EQ) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
@ -11108,6 +11159,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
        ++stat_.movc_instruction_count;
        // Release is_subnormal_temp.
        PopSystemTemp();
      }
    } break;
    // Using true eq to compare with zero because it handles denormals and -0.
@ -11158,6 +11210,22 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
    case AluVectorOpcode::kDp4:
    case AluVectorOpcode::kDp3:
    case AluVectorOpcode::kDp2Add: {
      if (instr.operands[0].EqualsAbsolute(instr.operands[1])) {
        // The operands are the same when calculating vector length, no need to
        // emulate 0 * anything = 0 in this case.
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(
                kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
                3 + operand_length_sums[1]));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
        shader_code_.push_back(system_temp_pv_);
        UseDxbcSourceOperand(dxbc_operands[0]);
        UseDxbcSourceOperand(dxbc_operands[1]);
        ++stat_.instruction_count;
        ++stat_.float_instruction_count;
      } else {
        uint32_t operand_mask;
        if (instr.vector_opcode == AluVectorOpcode::kDp2Add) {
          operand_mask = 0b0011;
@ -11187,7 +11255,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
        shader_code_.push_back(0);
        ++stat_.instruction_count;
        ++stat_.float_instruction_count;
-      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
+        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
                10 + operand_length_sums[0]));
        shader_code_.push_back(EncodeVectorMaskedOperand(
@ -11241,7 +11310,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
        ++stat_.instruction_count;
        ++stat_.movc_instruction_count;
        // Calculate the dot product.
-      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(
+        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(
                kCoreOpcodes[uint32_t(instr.vector_opcode)]) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
        shader_code_.push_back(
@ -11257,6 +11327,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
        ++stat_.float_instruction_count;
        // Release src1_temp.
        PopSystemTemp();
      }
      // Add src2.x for dp2add.
      if (instr.vector_opcode == AluVectorOpcode::kDp2Add) {
        shader_code_.push_back(
@ -11839,12 +11910,13 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
      UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1);
      ++stat_.instruction_count;
      ++stat_.float_instruction_count;
      if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
        // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
        // This is an attenuation calculation function, so infinity is probably
        // not very unlikely.
        uint32_t is_subnormal_temp = PushSystemTemp();
-      // Get the non-NaN multiplicand closer to zero to check if any of them is
+        // Get the non-NaN multiplicand closer to zero to check if any of them
-      // zero.
+        // is zero.
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -11873,7 +11945,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
        ++stat_.instruction_count;
        ++stat_.float_instruction_count;
        // Set pv.y to zero if any multiplicand is zero.
-      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
+        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
@ -11891,6 +11964,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
        ++stat_.movc_instruction_count;
        // Release is_subnormal_temp.
        PopSystemTemp();
      }
      // pv.z = src0.z
      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
                             ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -11992,7 +12066,20 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
      ++stat_.instruction_count;
      ++stat_.conversion_instruction_count;
      // The `pv = max(src0, src1)` part.
-      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
+      if (operands_duplicate[1]) {
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
                3 + operand_length_sums[0]));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
        shader_code_.push_back(system_temp_pv_);
        UseDxbcSourceOperand(dxbc_operands[0]);
        ++stat_.instruction_count;
        ++stat_.mov_instruction_count;
      } else {
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
                3 + operand_length_sums[1]));
        shader_code_.push_back(
@ -12002,6 +12089,7 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
        UseDxbcSourceOperand(dxbc_operands[1]);
        ++stat_.instruction_count;
        ++stat_.float_instruction_count;
      }
      break;
    default:
@ -12010,8 +12098,11 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
      break;
  }
-  for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
+  for (uint32_t i = 0; i < operand_count; ++i) {
-    UnloadDxbcSourceOperand(dxbc_operands[instr.operand_count - 1 - i]);
+    uint32_t operand_index = operand_count - 1 - i;
    if (!operands_duplicate[operand_index]) {
      UnloadDxbcSourceOperand(dxbc_operands[operand_index]);
    }
  }
  StoreResult(instr.result, system_temp_pv_, replicate_result);
@ -12036,9 +12127,22 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
  bool predicate_written = false;
  DxbcSourceOperand dxbc_operands[3];
  // Whether the operand is the same as any previous operand, and thus is loaded
  // only once.
  bool operands_duplicate[3] = {};
  uint32_t operand_lengths[3];
  for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
-    LoadDxbcSourceOperand(instr.operands[i], dxbc_operands[i]);
+    const InstructionOperand& operand = instr.operands[i];
    for (uint32_t j = 0; j < i; ++j) {
      if (operand == instr.operands[j]) {
        operands_duplicate[i] = true;
        dxbc_operands[i] = dxbc_operands[j];
        break;
      }
    }
    if (!operands_duplicate[i]) {
      LoadDxbcSourceOperand(operand, dxbc_operands[i]);
    }
    operand_lengths[i] = DxbcSourceOperandLength(dxbc_operands[i]);
  }
@ -12098,8 +12202,6 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
  switch (instr.scalar_opcode) {
    case AluScalarOpcode::kAdds:
    case AluScalarOpcode::kMaxs:
    case AluScalarOpcode::kMins:
    case AluScalarOpcode::kSubs: {
      bool subtract = instr.scalar_opcode == AluScalarOpcode::kSubs;
      shader_code_.push_back(
@ -12144,14 +12246,16 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
      UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
      ++stat_.instruction_count;
      ++stat_.float_instruction_count;
      if (instr.operands[0].components[0] != instr.operands[0].components[1]) {
        // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
        uint32_t is_subnormal_temp = PushSystemTemp();
-      // Get the non-NaN multiplicand closer to zero to check if any of them is
+        // Get the non-NaN multiplicand closer to zero to check if any of them
-      // zero.
+        // is zero.
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
-              3 + 2 * DxbcSourceOperandLength(dxbc_operands[0], false, true)));
+                3 +
                2 * DxbcSourceOperandLength(dxbc_operands[0], false, true)));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
        shader_code_.push_back(is_subnormal_temp);
@ -12175,7 +12279,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
        ++stat_.instruction_count;
        ++stat_.float_instruction_count;
        // Zero the result if any multiplicand is zero.
-      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
+        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
@ -12193,6 +12298,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
        ++stat_.movc_instruction_count;
        // Release is_subnormal_temp.
        PopSystemTemp();
      }
    } break;
    case AluScalarOpcode::kMulsPrev: {
@ -12382,6 +12488,36 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
      PopSystemTemp();
    } break;
    case AluScalarOpcode::kMaxs:
    case AluScalarOpcode::kMins: {
      // max is commonly used as mov.
      if (instr.operands[0].components[0] == instr.operands[0].components[1]) {
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 +
                                                         operand_lengths[0]));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
        shader_code_.push_back(system_temp_ps_pc_p0_a0_);
        UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0);
        ++stat_.instruction_count;
        ++stat_.mov_instruction_count;
      } else {
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(
                kCoreOpcodes[uint32_t(instr.scalar_opcode)]) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
                3 + 2 * operand_lengths[0]));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
        shader_code_.push_back(system_temp_ps_pc_p0_a0_);
        UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0);
        UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
        ++stat_.instruction_count;
        ++stat_.float_instruction_count;
      }
    } break;
    case AluScalarOpcode::kSeqs:
    case AluScalarOpcode::kSgts:
    case AluScalarOpcode::kSges:
@ -12666,7 +12802,20 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
      ++stat_.instruction_count;
      ++stat_.conversion_instruction_count;
      // The `ps = max(src0.x, src0.y)` part.
-      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
+      if (instr.operands[0].components[0] == instr.operands[0].components[1]) {
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 +
                                                         operand_lengths[0]));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
        shader_code_.push_back(system_temp_ps_pc_p0_a0_);
        UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 0);
        ++stat_.instruction_count;
        ++stat_.mov_instruction_count;
      } else {
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
                3 + 2 * operand_lengths[0]));
        shader_code_.push_back(
@ -12676,6 +12825,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
        UseDxbcSourceOperand(dxbc_operands[0], kSwizzleXYZW, 1);
        ++stat_.instruction_count;
        ++stat_.float_instruction_count;
      }
      break;
    case AluScalarOpcode::kSubsPrev:
@ -12970,10 +13120,11 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
      UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0);
      ++stat_.instruction_count;
      ++stat_.float_instruction_count;
      if (!instr.operands[0].EqualsAbsolute(instr.operands[1])) {
        // Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
        uint32_t is_subnormal_temp = PushSystemTemp();
-      // Get the non-NaN multiplicand closer to zero to check if any of them is
+        // Get the non-NaN multiplicand closer to zero to check if any of them
-      // zero.
+        // is zero.
        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
@ -13002,7 +13153,8 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
        ++stat_.instruction_count;
        ++stat_.float_instruction_count;
        // Zero the result if any multiplicand is zero.
-      shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
+        shader_code_.push_back(
            ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
            ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
        shader_code_.push_back(
            EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
@ -13020,6 +13172,7 @@ void DxbcShaderTranslator::ProcessScalarAluInstruction(
        ++stat_.movc_instruction_count;
        // Release is_subnormal_temp.
        PopSystemTemp();
      }
    } break;
    case AluScalarOpcode::kAddsc0:
--- a/src/xenia/gpu/shader.h
+++ b/src/xenia/gpu/shader.h
@ -180,6 +180,28 @@ struct InstructionOperand {
    }
    return false;
  }
  // Whether absolute values of two operands are identical (useful for emulating
  // Shader Model 3 0*anything=0 multiplication behavior).
  bool EqualsAbsolute(const InstructionOperand& other) const {
    if (storage_source != other.storage_source ||
        storage_index != other.storage_index ||
        storage_addressing_mode != other.storage_addressing_mode ||
        component_count != other.component_count) {
      return false;
    }
    for (int i = 0; i < component_count; ++i) {
      if (components[i] != other.components[i]) {
        return false;
      }
    }
    return true;
  }
  bool operator==(const InstructionOperand& other) const {
    return EqualsAbsolute(other) && is_negated == other.is_negated &&
           is_absolute_value == other.is_absolute_value;
  }
 };
 struct ParsedExecInstruction {