diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index ddf61c693..001501b44 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -5640,6 +5640,14 @@ struct VECTOR_SHL_V128 // Shift 8 words in src1 by amount specified in src2. Xbyak::Label emu, end; + Xmm src1; + if (i.src1.is_constant) { + src1 = e.xmm2; + e.LoadConstantXmm(src1, i.src1.constant()); + } else { + src1 = i.src1; + } + // Only bother with this check if shift amt isn't constant. if (!i.src2.is_constant) { // See if the shift is equal first for a shortcut. @@ -5653,7 +5661,7 @@ struct VECTOR_SHL_V128 e.mov(e.rax, 0xF); e.vmovq(e.xmm1, e.rax); e.vpand(e.xmm0, e.xmm0, e.xmm1); - e.vpsllw(i.dest, i.src1, e.xmm0); + e.vpsllw(i.dest, src1, e.xmm0); e.jmp(end); } @@ -5665,7 +5673,7 @@ struct VECTOR_SHL_V128 } else { e.lea(e.r9, e.StashXmm(1, i.src2)); } - e.lea(e.r8, e.StashXmm(0, i.src1)); + e.lea(e.r8, e.StashXmm(0, src1)); e.CallNativeSafe(reinterpret_cast(EmulateVectorShlI16)); e.vmovaps(i.dest, e.xmm0); diff --git a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc index 8c3e7f196..44e1e37c1 100644 --- a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc +++ b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc @@ -772,6 +772,17 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) { } break; + case OPCODE_VECTOR_AVERAGE: + if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { + v->set_from(i->src1.value); + uint32_t arith_flags = i->flags >> 8; + v->VectorAverage(i->src2.value, hir::TypeName(i->flags & 0xFF), + !!(arith_flags & ARITHMETIC_UNSIGNED), + !!(arith_flags & ARITHMETIC_SATURATE)); + i->Remove(); + } + break; + default: // Ignored. break; diff --git a/src/xenia/cpu/hir/value.cc b/src/xenia/cpu/hir/value.cc index 38cd29294..a2613e3e9 100644 --- a/src/xenia/cpu/hir/value.cc +++ b/src/xenia/cpu/hir/value.cc @@ -1376,6 +1376,27 @@ void Value::DotProduct4(Value* other) { } } +void Value::VectorAverage(Value* other, TypeName type, bool is_unsigned, + bool saturate) { + assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE); + switch (type) { + case INT16_TYPE: + // TODO(gibbed): is this correct? + alignas(16) int8_t result[16]; + __m128i src1 = + _mm_load_si128(reinterpret_cast(constant.v128.i8)); + __m128i src2 = _mm_load_si128( + reinterpret_cast(other->constant.v128.i8)); + __m128i dest = _mm_avg_epu16(src1, src2); + _mm_store_si128(reinterpret_cast<__m128i*>(result), dest); + std::memcpy(constant.v128.i8, result, sizeof(result)); + break; + default: + assert_unhandled_case(type); + break; + } +} + void Value::ByteSwap() { switch (type) { case INT8_TYPE: diff --git a/src/xenia/cpu/hir/value.h b/src/xenia/cpu/hir/value.h index 44e4807c4..ff41edf3b 100644 --- a/src/xenia/cpu/hir/value.h +++ b/src/xenia/cpu/hir/value.h @@ -535,6 +535,8 @@ class Value { void VectorSub(Value* other, TypeName type, bool is_unsigned, bool saturate); void DotProduct3(Value* other); void DotProduct4(Value* other); + void VectorAverage(Value* other, TypeName type, bool is_unsigned, + bool saturate); void ByteSwap(); void CountLeadingZeros(const Value* other); bool Compare(Opcode opcode, Value* other);