diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index 59646c067..0542a7277 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -3927,6 +3927,7 @@ uint32_t IntCode_PACK_FLOAT16_2(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_PACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) { const vec128_t& src1 = ics.rf[i->src1_reg].v128; vec128_t& dest = ics.rf[i->dest_reg].v128; + dest.ix = dest.iy = 0; dest.iz = ((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.x) << 16) | DirectX::PackedVector::XMConvertFloatToHalf(src1.y); diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index dccad4e37..8af3c5669 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -2976,7 +2976,12 @@ EMITTER(MUL_HI_I8, MATCH(I, I8<>, I8<>>)) { e.mulx(i.dest.reg().cvt32(), e.eax, i.src2.reg().cvt32()); } else { e.mov(e.al, i.src1); - e.imul(i.src2); + if (i.src2.is_constant) { + e.mov(e.al, i.src2.constant()); + e.imul(e.al); + } else { + e.imul(i.src2); + } e.mov(i.dest, e.ah); } e.ReloadEDX(); @@ -2990,7 +2995,12 @@ EMITTER(MUL_HI_I16, MATCH(I, I16<>, I16<>>)) { e.mulx(i.dest.reg().cvt32(), e.eax, i.src2.reg().cvt32()); } else { e.mov(e.ax, i.src1); - e.imul(i.src2); + if (i.src2.is_constant) { + e.mov(e.dx, i.src2.constant()); + e.imul(e.dx); + } else { + e.imul(i.src2); + } e.mov(i.dest, e.dx); } e.ReloadEDX(); @@ -3009,7 +3019,12 @@ EMITTER(MUL_HI_I32, MATCH(I, I32<>, I32<>>)) { } } else { e.mov(e.eax, i.src1); - e.imul(i.src2); + if (i.src2.is_constant) { + e.mov(e.edx, i.src2.constant()); + e.imul(e.edx); + } else { + e.imul(i.src2); + } e.mov(i.dest, e.edx); } e.ReloadEDX(); @@ -3028,7 +3043,12 @@ EMITTER(MUL_HI_I64, MATCH(I, I64<>, I64<>>)) { } } else { e.mov(e.rax, i.src1); - e.imul(i.src2); + if (i.src2.is_constant) { + e.mov(e.rdx, i.src2.constant()); + e.imul(e.rdx); + } else { + e.imul(i.src2); + } e.mov(i.dest, e.rdx); } e.ReloadEDX(); @@ -4781,10 +4801,19 @@ EMITTER(PACK, MATCH(I, V128<>>)) { e.vpshufb(i.dest, e.xmm0, e.GetXmmConstPtr(XMMPackD3DCOLOR)); } static void EmitFLOAT16_2(X64Emitter& e, const EmitArgType& i) { - XEASSERTALWAYS(); + // http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx + // dest = [(src1.x | src1.y), 0, 0, 0] + e.db(0xCC); + e.vcvtps2ph(e.xmm0, i.src1, B00000011); + e.vxorps(i.dest, i.dest); + e.vpblendw(i.dest, e.xmm0, B00000011); } static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) { - XEASSERTALWAYS(); + // dest = [(src1.x | src1.y), (src1.z | src1.w), 0, 0] + e.db(0xCC); + e.vcvtps2ph(e.xmm0, i.src1, B00000011); + e.vxorps(i.dest, i.dest); + e.vpblendw(i.dest, e.xmm0, B00001111); } static void EmitSHORT_2(X64Emitter& e, const EmitArgType& i) { XEASSERTALWAYS();