From 6fd75cea910b2feec05ccbbe69522338df81d882 Mon Sep 17 00:00:00 2001
From: DrChat <arkolbed@gmail.com>
Date: Thu, 1 Mar 2018 20:04:56 -0600
Subject: [PATCH] [CPU] Handle constant multiply in fmadd/fmsub in constant
 propagation pass

---
 src/xenia/cpu/backend/x64/x64_sequences.cc    | 66 -------------------
 .../passes/constant_propagation_pass.cc       | 21 +++++-
 src/xenia/cpu/hir/hir_builder.h               |  7 +-
 3 files changed, 23 insertions(+), 71 deletions(-)

diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc
index cc868d758..d510fb46e 100644
--- a/src/xenia/cpu/backend/x64/x64_sequences.cc
+++ b/src/xenia/cpu/backend/x64/x64_sequences.cc
@@ -4614,16 +4614,6 @@ EMITTER_OPCODE_TABLE(OPCODE_DIV, DIV_I8, DIV_I16, DIV_I32, DIV_I64, DIV_F32,
 struct MUL_ADD_F32
     : Sequence<MUL_ADD_F32, I<OPCODE_MUL_ADD, F32Op, F32Op, F32Op, F32Op>> {
   static void Emit(X64Emitter& e, const EmitArgType& i) {
-    // Calculate the multiply part if it's constant.
-    // TODO: Do this in the constant propagation pass.
-    if (i.src1.is_constant && i.src2.is_constant) {
-      float mul = i.src1.constant() * i.src2.constant();
-
-      e.LoadConstantXmm(e.xmm0, mul);
-      e.vaddss(i.dest, e.xmm0, i.src3);
-      return;
-    }
-
     // FMA extension
     if (e.IsFeatureEnabled(kX64EmitFMA)) {
       EmitCommutativeBinaryXmmOp(e, i,
@@ -4673,16 +4663,6 @@ struct MUL_ADD_F32
 struct MUL_ADD_F64
     : Sequence<MUL_ADD_F64, I<OPCODE_MUL_ADD, F64Op, F64Op, F64Op, F64Op>> {
   static void Emit(X64Emitter& e, const EmitArgType& i) {
-    // Calculate the multiply part if it's constant.
-    // TODO: Do this in the constant propagation pass.
-    if (i.src1.is_constant && i.src2.is_constant) {
-      double mul = i.src1.constant() * i.src2.constant();
-
-      e.LoadConstantXmm(e.xmm0, mul);
-      e.vaddsd(i.dest, e.xmm0, i.src3);
-      return;
-    }
-
     // FMA extension
     if (e.IsFeatureEnabled(kX64EmitFMA)) {
       EmitCommutativeBinaryXmmOp(e, i,
@@ -4733,19 +4713,6 @@ struct MUL_ADD_V128
     : Sequence<MUL_ADD_V128,
                I<OPCODE_MUL_ADD, V128Op, V128Op, V128Op, V128Op>> {
   static void Emit(X64Emitter& e, const EmitArgType& i) {
-    // Calculate the multiply part if it's constant.
-    // TODO: Do this in the constant propagation pass.
-    if (i.src1.is_constant && i.src2.is_constant) {
-      vec128_t mul;
-      for (int n = 0; n < 4; n++) {
-        mul.f32[n] = i.src1.constant().f32[n] * i.src2.constant().f32[n];
-      }
-
-      e.LoadConstantXmm(e.xmm0, mul);
-      e.vaddps(i.dest, e.xmm0, i.src3);
-      return;
-    }
-
     // TODO(benvanik): the vfmadd sequence produces slightly different results
     // than vmul+vadd and it'd be nice to know why. Until we know, it's
     // disabled so tests pass.
@@ -4811,16 +4778,6 @@ EMITTER_OPCODE_TABLE(OPCODE_MUL_ADD, MUL_ADD_F32, MUL_ADD_F64, MUL_ADD_V128);
 struct MUL_SUB_F32
     : Sequence<MUL_SUB_F32, I<OPCODE_MUL_SUB, F32Op, F32Op, F32Op, F32Op>> {
   static void Emit(X64Emitter& e, const EmitArgType& i) {
-    // Calculate the multiply part if it's constant.
-    // TODO: Do this in the constant propagation pass.
-    if (i.src1.is_constant && i.src2.is_constant) {
-      float mul = i.src1.constant() * i.src2.constant();
-
-      e.LoadConstantXmm(e.xmm0, mul);
-      e.vsubss(i.dest, e.xmm0, i.src3);
-      return;
-    }
-
     // FMA extension
     if (e.IsFeatureEnabled(kX64EmitFMA)) {
       EmitCommutativeBinaryXmmOp(e, i,
@@ -4870,16 +4827,6 @@ struct MUL_SUB_F32
 struct MUL_SUB_F64
     : Sequence<MUL_SUB_F64, I<OPCODE_MUL_SUB, F64Op, F64Op, F64Op, F64Op>> {
   static void Emit(X64Emitter& e, const EmitArgType& i) {
-    // Calculate the multiply part if it's constant.
-    // TODO: Do this in the constant propagation pass.
-    if (i.src1.is_constant && i.src2.is_constant) {
-      double mul = i.src1.constant() * i.src2.constant();
-
-      e.LoadConstantXmm(e.xmm0, mul);
-      e.vsubsd(i.dest, e.xmm0, i.src3);
-      return;
-    }
-
     // FMA extension
     if (e.IsFeatureEnabled(kX64EmitFMA)) {
       EmitCommutativeBinaryXmmOp(e, i,
@@ -4930,19 +4877,6 @@ struct MUL_SUB_V128
     : Sequence<MUL_SUB_V128,
                I<OPCODE_MUL_SUB, V128Op, V128Op, V128Op, V128Op>> {
   static void Emit(X64Emitter& e, const EmitArgType& i) {
-    // Calculate the multiply part if it's constant.
-    // TODO: Do this in the constant propagation pass.
-    if (i.src1.is_constant && i.src2.is_constant) {
-      vec128_t mul;
-      for (int n = 0; n < 4; n++) {
-        mul.f32[n] = i.src1.constant().f32[n] * i.src2.constant().f32[n];
-      }
-
-      e.LoadConstantXmm(e.xmm0, mul);
-      e.vsubps(i.dest, e.xmm0, i.src3);
-      return;
-    }
-
     // FMA extension
     if (e.IsFeatureEnabled(kX64EmitFMA)) {
       EmitCommutativeBinaryXmmOp(e, i,
diff --git a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc
index bc59c7eab..5a208f589 100644
--- a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc
+++ b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc
@@ -499,11 +499,20 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
           break;
         case OPCODE_MUL_ADD:
           if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
-            // Multiply part is constant.
             if (i->src3.value->IsConstant()) {
               v->set_from(i->src1.value);
               Value::MulAdd(v, i->src1.value, i->src2.value, i->src3.value);
               i->Remove();
+            } else {
+              // Multiply part is constant.
+              Value* mul = builder->AllocValue();
+              mul->set_from(i->src1.value);
+              mul->Mul(i->src2.value);
+
+              Value* add = i->src3.value;
+              i->Replace(&OPCODE_ADD_info, 0);
+              i->set_src1(mul);
+              i->set_src2(add);
             }
           }
           break;
@@ -514,6 +523,16 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
               v->set_from(i->src1.value);
               Value::MulSub(v, i->src1.value, i->src2.value, i->src3.value);
               i->Remove();
+            } else {
+              // Multiply part is constant.
+              Value* mul = builder->AllocValue();
+              mul->set_from(i->src1.value);
+              mul->Mul(i->src2.value);
+
+              Value* add = i->src3.value;
+              i->Replace(&OPCODE_SUB_info, 0);
+              i->set_src1(mul);
+              i->set_src2(add);
             }
           }
           break;
diff --git a/src/xenia/cpu/hir/hir_builder.h b/src/xenia/cpu/hir/hir_builder.h
index 6f860249b..8cfe67d02 100644
--- a/src/xenia/cpu/hir/hir_builder.h
+++ b/src/xenia/cpu/hir/hir_builder.h
@@ -97,8 +97,10 @@ class HIRBuilder {
   void BranchTrue(Value* cond, Label* label, uint16_t branch_flags = 0);
   void BranchFalse(Value* cond, Label* label, uint16_t branch_flags = 0);
 
-  // phi type_name, Block* b1, Value* v1, Block* b2, Value* v2, etc
+  Value* AllocValue(TypeName type = INT64_TYPE);
+  Value* CloneValue(Value* source);
 
+  // phi type_name, Block* b1, Value* v1, Block* b2, Value* v2, etc
   Value* Assign(Value* value);
   Value* Cast(Value* value, TypeName target_type);
   Value* ZeroExtend(Value* value, TypeName target_type);
@@ -253,9 +255,6 @@ class HIRBuilder {
   void DumpValue(StringBuffer* str, Value* value);
   void DumpOp(StringBuffer* str, OpcodeSignatureType sig_type, Instr::Op* op);
 
-  Value* AllocValue(TypeName type = INT64_TYPE);
-  Value* CloneValue(Value* source);
-
  private:
   Block* AppendBlock();
   void EndBlock();