From 6fd75cea910b2feec05ccbbe69522338df81d882 Mon Sep 17 00:00:00 2001 From: DrChat Date: Thu, 1 Mar 2018 20:04:56 -0600 Subject: [PATCH] [CPU] Handle constant multiply in fmadd/fmsub in constant propagation pass --- src/xenia/cpu/backend/x64/x64_sequences.cc | 66 ------------------- .../passes/constant_propagation_pass.cc | 21 +++++- src/xenia/cpu/hir/hir_builder.h | 7 +- 3 files changed, 23 insertions(+), 71 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index cc868d758..d510fb46e 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -4614,16 +4614,6 @@ EMITTER_OPCODE_TABLE(OPCODE_DIV, DIV_I8, DIV_I16, DIV_I32, DIV_I64, DIV_F32, struct MUL_ADD_F32 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - // Calculate the multiply part if it's constant. - // TODO: Do this in the constant propagation pass. - if (i.src1.is_constant && i.src2.is_constant) { - float mul = i.src1.constant() * i.src2.constant(); - - e.LoadConstantXmm(e.xmm0, mul); - e.vaddss(i.dest, e.xmm0, i.src3); - return; - } - // FMA extension if (e.IsFeatureEnabled(kX64EmitFMA)) { EmitCommutativeBinaryXmmOp(e, i, @@ -4673,16 +4663,6 @@ struct MUL_ADD_F32 struct MUL_ADD_F64 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - // Calculate the multiply part if it's constant. - // TODO: Do this in the constant propagation pass. - if (i.src1.is_constant && i.src2.is_constant) { - double mul = i.src1.constant() * i.src2.constant(); - - e.LoadConstantXmm(e.xmm0, mul); - e.vaddsd(i.dest, e.xmm0, i.src3); - return; - } - // FMA extension if (e.IsFeatureEnabled(kX64EmitFMA)) { EmitCommutativeBinaryXmmOp(e, i, @@ -4733,19 +4713,6 @@ struct MUL_ADD_V128 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - // Calculate the multiply part if it's constant. - // TODO: Do this in the constant propagation pass. - if (i.src1.is_constant && i.src2.is_constant) { - vec128_t mul; - for (int n = 0; n < 4; n++) { - mul.f32[n] = i.src1.constant().f32[n] * i.src2.constant().f32[n]; - } - - e.LoadConstantXmm(e.xmm0, mul); - e.vaddps(i.dest, e.xmm0, i.src3); - return; - } - // TODO(benvanik): the vfmadd sequence produces slightly different results // than vmul+vadd and it'd be nice to know why. Until we know, it's // disabled so tests pass. @@ -4811,16 +4778,6 @@ EMITTER_OPCODE_TABLE(OPCODE_MUL_ADD, MUL_ADD_F32, MUL_ADD_F64, MUL_ADD_V128); struct MUL_SUB_F32 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - // Calculate the multiply part if it's constant. - // TODO: Do this in the constant propagation pass. - if (i.src1.is_constant && i.src2.is_constant) { - float mul = i.src1.constant() * i.src2.constant(); - - e.LoadConstantXmm(e.xmm0, mul); - e.vsubss(i.dest, e.xmm0, i.src3); - return; - } - // FMA extension if (e.IsFeatureEnabled(kX64EmitFMA)) { EmitCommutativeBinaryXmmOp(e, i, @@ -4870,16 +4827,6 @@ struct MUL_SUB_F32 struct MUL_SUB_F64 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - // Calculate the multiply part if it's constant. - // TODO: Do this in the constant propagation pass. - if (i.src1.is_constant && i.src2.is_constant) { - double mul = i.src1.constant() * i.src2.constant(); - - e.LoadConstantXmm(e.xmm0, mul); - e.vsubsd(i.dest, e.xmm0, i.src3); - return; - } - // FMA extension if (e.IsFeatureEnabled(kX64EmitFMA)) { EmitCommutativeBinaryXmmOp(e, i, @@ -4930,19 +4877,6 @@ struct MUL_SUB_V128 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - // Calculate the multiply part if it's constant. - // TODO: Do this in the constant propagation pass. - if (i.src1.is_constant && i.src2.is_constant) { - vec128_t mul; - for (int n = 0; n < 4; n++) { - mul.f32[n] = i.src1.constant().f32[n] * i.src2.constant().f32[n]; - } - - e.LoadConstantXmm(e.xmm0, mul); - e.vsubps(i.dest, e.xmm0, i.src3); - return; - } - // FMA extension if (e.IsFeatureEnabled(kX64EmitFMA)) { EmitCommutativeBinaryXmmOp(e, i, diff --git a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc index bc59c7eab..5a208f589 100644 --- a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc +++ b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc @@ -499,11 +499,20 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) { break; case OPCODE_MUL_ADD: if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { - // Multiply part is constant. if (i->src3.value->IsConstant()) { v->set_from(i->src1.value); Value::MulAdd(v, i->src1.value, i->src2.value, i->src3.value); i->Remove(); + } else { + // Multiply part is constant. + Value* mul = builder->AllocValue(); + mul->set_from(i->src1.value); + mul->Mul(i->src2.value); + + Value* add = i->src3.value; + i->Replace(&OPCODE_ADD_info, 0); + i->set_src1(mul); + i->set_src2(add); } } break; @@ -514,6 +523,16 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) { v->set_from(i->src1.value); Value::MulSub(v, i->src1.value, i->src2.value, i->src3.value); i->Remove(); + } else { + // Multiply part is constant. + Value* mul = builder->AllocValue(); + mul->set_from(i->src1.value); + mul->Mul(i->src2.value); + + Value* add = i->src3.value; + i->Replace(&OPCODE_SUB_info, 0); + i->set_src1(mul); + i->set_src2(add); } } break; diff --git a/src/xenia/cpu/hir/hir_builder.h b/src/xenia/cpu/hir/hir_builder.h index 6f860249b..8cfe67d02 100644 --- a/src/xenia/cpu/hir/hir_builder.h +++ b/src/xenia/cpu/hir/hir_builder.h @@ -97,8 +97,10 @@ class HIRBuilder { void BranchTrue(Value* cond, Label* label, uint16_t branch_flags = 0); void BranchFalse(Value* cond, Label* label, uint16_t branch_flags = 0); - // phi type_name, Block* b1, Value* v1, Block* b2, Value* v2, etc + Value* AllocValue(TypeName type = INT64_TYPE); + Value* CloneValue(Value* source); + // phi type_name, Block* b1, Value* v1, Block* b2, Value* v2, etc Value* Assign(Value* value); Value* Cast(Value* value, TypeName target_type); Value* ZeroExtend(Value* value, TypeName target_type); @@ -253,9 +255,6 @@ class HIRBuilder { void DumpValue(StringBuffer* str, Value* value); void DumpOp(StringBuffer* str, OpcodeSignatureType sig_type, Instr::Op* op); - Value* AllocValue(TypeName type = INT64_TYPE); - Value* CloneValue(Value* source); - private: Block* AppendBlock(); void EndBlock();