diff --git a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc index b74a826c5..309cb24d9 100644 --- a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc +++ b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc @@ -642,6 +642,15 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) { i->Remove(); } break; + + case OPCODE_DOT_PRODUCT_3: + if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { + v->set_from(i->src1.value); + v->DotProduct3(i->src2.value); + i->Remove(); + } + break; + default: // Ignored. break; diff --git a/src/xenia/cpu/hir/value.cc b/src/xenia/cpu/hir/value.cc index 1198585df..7265d8bfe 100644 --- a/src/xenia/cpu/hir/value.cc +++ b/src/xenia/cpu/hir/value.cc @@ -1163,6 +1163,25 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned, } } +void Value::DotProduct3(Value* other) { + assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE); + switch (type) { + case VEC128_TYPE: + alignas(16) float result[4]; + __m128 src1 = _mm_load_ps(constant.v128.f32); + __m128 src2 = _mm_load_ps(other->constant.v128.f32); + __m128 dest = _mm_dp_ps(src1, src2, 0b01110001); + _mm_store_ps(result, dest); + // TODO(rick): is this sane? + type = FLOAT32_TYPE; + constant.f32 = result[0]; + break; + default: + assert_unhandled_case(type); + break; + } +} + void Value::ByteSwap() { switch (type) { case INT8_TYPE: diff --git a/src/xenia/cpu/hir/value.h b/src/xenia/cpu/hir/value.h index b5aec8459..6b152896f 100644 --- a/src/xenia/cpu/hir/value.h +++ b/src/xenia/cpu/hir/value.h @@ -506,6 +506,7 @@ class Value { void VectorRol(Value* other, TypeName type); void VectorAdd(Value* other, TypeName type, bool is_unsigned, bool saturate); void VectorSub(Value* other, TypeName type, bool is_unsigned, bool saturate); + void DotProduct3(Value* other); void ByteSwap(); void CountLeadingZeros(const Value* other); bool Compare(Opcode opcode, Value* other);