diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index 4aed60d15..61f6e0e0a 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -3792,6 +3792,8 @@ int Translate_PACK(TranslationContext& ctx, Instr* i) { IntCode_PACK_SHORT_2, IntCode_INVALID_TYPE, IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, }; return DispatchToC(ctx, i, fns[i->flags]); } @@ -3862,6 +3864,22 @@ uint32_t IntCode_UNPACK_S8_IN_16_HI(IntCodeState& ics, const IntCode* i) { } return IA_NEXT; } +uint32_t IntCode_UNPACK_S16_IN_32_LO(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 4; n++) { + dest.i4[n] = (int32_t)(int16_t)src1.s8[4 + n]; + } + return IA_NEXT; +} +uint32_t IntCode_UNPACK_S16_IN_32_HI(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 4; n++) { + dest.i4[n] = (int32_t)(int16_t)src1.s8[n]; + } + return IA_NEXT; +} int Translate_UNPACK(TranslationContext& ctx, Instr* i) { static IntCodeFn fns[] = { IntCode_UNPACK_D3DCOLOR, @@ -3870,6 +3888,8 @@ int Translate_UNPACK(TranslationContext& ctx, Instr* i) { IntCode_UNPACK_SHORT_2, IntCode_UNPACK_S8_IN_16_LO, IntCode_UNPACK_S8_IN_16_HI, + IntCode_UNPACK_S16_IN_32_LO, + IntCode_UNPACK_S16_IN_32_HI, }; return DispatchToC(ctx, i, fns[i->flags]); } diff --git a/src/alloy/frontend/ppc/ppc_emit_altivec.cc b/src/alloy/frontend/ppc/ppc_emit_altivec.cc index 9c6c9efa8..535f206a6 100644 --- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc +++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc @@ -1702,7 +1702,7 @@ XEEMITTER(vupklpx, 0x100003CE, VX )(PPCHIRBuilder& f, InstrData& i) { } int InstrEmit_vupkhsb_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { - // bytes 0-7 expanded to halfwords 0-8 and sign extended + // bytes 0-7 expanded to halfwords 0-7 and sign extended Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S8_IN_16_HI); f.StoreVR(vd, v); return 0; @@ -1714,15 +1714,8 @@ XEEMITTER(vupkhsb128, VX128(6, 896), VX128 )(PPCHIRBuilder& f, InstrData return InstrEmit_vupkhsb_(f, VX128_VD128, VX128_VB128); } -int InstrEmit_vupkhsh_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) { - return 1; -} -XEEMITTER(vupkhsh, 0x1000024E, VX )(PPCHIRBuilder& f, InstrData& i) { - return InstrEmit_vupkhsh_(f, i.VX.VD, i.VX.VA, i.VX.VB); -} - int InstrEmit_vupklsb_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { - // bytes 8-15 expanded to halfwords 0-8 and sign extended + // bytes 8-15 expanded to halfwords 0-7 and sign extended Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S8_IN_16_LO); f.StoreVR(vd, v); return 0; @@ -1734,8 +1727,21 @@ XEEMITTER(vupklsb128, VX128(6, 960), VX128 )(PPCHIRBuilder& f, InstrData return InstrEmit_vupklsb_(f, VX128_VD128, VX128_VB128); } +int InstrEmit_vupkhsh_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) { + // halfwords 0-3 expanded to words 0-3 and sign extended + Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S16_IN_32_HI); + f.StoreVR(vd, v); + return 0; +} +XEEMITTER(vupkhsh, 0x1000024E, VX )(PPCHIRBuilder& f, InstrData& i) { + return InstrEmit_vupkhsh_(f, i.VX.VD, i.VX.VA, i.VX.VB); +} + int InstrEmit_vupklsh_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) { - return 1; + // halfwords 4-7 expanded to words 0-3 and sign extended + Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S16_IN_32_LO); + f.StoreVR(vd, v); + return 0; } XEEMITTER(vupklsh, 0x100002CE, VX )(PPCHIRBuilder& f, InstrData& i) { return InstrEmit_vupklsh_(f, i.VX.VD, i.VX.VA, i.VX.VB); diff --git a/src/alloy/hir/opcodes.h b/src/alloy/hir/opcodes.h index af3c36b72..5fbbf198c 100644 --- a/src/alloy/hir/opcodes.h +++ b/src/alloy/hir/opcodes.h @@ -73,6 +73,8 @@ enum PackType { PACK_TYPE_SHORT_2 = 3, PACK_TYPE_S8_IN_16_LO = 4, PACK_TYPE_S8_IN_16_HI = 5, + PACK_TYPE_S16_IN_32_LO = 6, + PACK_TYPE_S16_IN_32_HI = 7, };