From 96c203699dcdb068c6c477022c632768011b26f8 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 2 Nov 2014 21:49:39 -0800 Subject: [PATCH] Fixing totally broken vpkd3d128 and adding new pack instructions. --- src/alloy/backend/x64/x64_emitter.cc | 6 + src/alloy/backend/x64/x64_emitter.h | 5 + src/alloy/backend/x64/x64_sequences.cc | 265 ++++++++++++++---- src/alloy/frontend/ppc/ppc_emit_altivec.cc | 240 ++++++++++++---- .../frontend/ppc/test/bin/instr_vpkd3d128.bin | Bin 112 -> 184 bytes .../frontend/ppc/test/bin/instr_vpkd3d128.dis | 36 +++ .../frontend/ppc/test/bin/instr_vpkd3d128.map | 9 + .../frontend/ppc/test/bin/instr_vpkshss.bin | Bin 0 -> 16 bytes .../frontend/ppc/test/bin/instr_vpkshss.dis | 13 + .../frontend/ppc/test/bin/instr_vpkshss.map | 2 + .../frontend/ppc/test/bin/instr_vpkswss.bin | Bin 0 -> 16 bytes .../frontend/ppc/test/bin/instr_vpkswss.dis | 13 + .../frontend/ppc/test/bin/instr_vpkswss.map | 2 + src/alloy/frontend/ppc/test/instr_vpkd3d128.s | 79 ++++++ src/alloy/frontend/ppc/test/instr_vpkshss.s | 17 ++ src/alloy/frontend/ppc/test/instr_vpkswss.s | 17 ++ src/alloy/hir/hir_builder.cc | 20 +- src/alloy/hir/hir_builder.h | 1 + src/alloy/hir/opcodes.h | 40 ++- src/alloy/hir/opcodes.inl | 2 +- src/alloy/test/test_pack.cc | 2 +- 21 files changed, 643 insertions(+), 126 deletions(-) create mode 100644 src/alloy/frontend/ppc/test/bin/instr_vpkshss.bin create mode 100644 src/alloy/frontend/ppc/test/bin/instr_vpkshss.dis create mode 100644 src/alloy/frontend/ppc/test/bin/instr_vpkshss.map create mode 100644 src/alloy/frontend/ppc/test/bin/instr_vpkswss.bin create mode 100644 src/alloy/frontend/ppc/test/bin/instr_vpkswss.dis create mode 100644 src/alloy/frontend/ppc/test/bin/instr_vpkswss.map create mode 100644 src/alloy/frontend/ppc/test/instr_vpkshss.s create mode 100644 src/alloy/frontend/ppc/test/instr_vpkswss.s diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index 2a7a933d3..5f0597f32 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -801,6 +801,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) { 1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f), /* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f), /* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f), + /* XMM3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f), /* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u), /* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, @@ -811,7 +812,10 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) { 0xFFFFFFFFu, 0x7FFFFFFFu), /* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu), + /* XMMByteOrderMask */ vec128i(0x01000302u, 0x05040706u, + 0x09080B0Au, 0x0D0C0F0Eu), /* XMMPermuteControl15 */ vec128b(15), + /* XMMPackD3DCOLORSat */ vec128i(0x404000FFu), /* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u), /* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF0Eu, 0xFFFFFF0Du, @@ -824,6 +828,8 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) { 0x05040706u, 0x01000302u), /* XMMUnpackFLOAT16_4 */ vec128i(0x09080B0Au, 0x0D0C0F0Eu, 0xFFFFFFFFu, 0xFFFFFFFFu), + /* XMMPackSHORT_2Min */ vec128i(0x403F8001u), + /* XMMPackSHORT_2Max */ vec128i(0x40407FFFu), /* XMMPackSHORT_2 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000504u), /* XMMUnpackSHORT_2 */ vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index 83b2bd4ff..c178cff0b 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -49,18 +49,23 @@ enum XmmConst { XMMNormalizeX16Y16, XMM0001, XMM3301, + XMM3333, XMMSignMaskPS, XMMSignMaskPD, XMMAbsMaskPS, XMMAbsMaskPD, XMMByteSwapMask, + XMMByteOrderMask, XMMPermuteControl15, + XMMPackD3DCOLORSat, XMMPackD3DCOLOR, XMMUnpackD3DCOLOR, XMMPackFLOAT16_2, XMMUnpackFLOAT16_2, XMMPackFLOAT16_4, XMMUnpackFLOAT16_4, + XMMPackSHORT_2Min, + XMMPackSHORT_2Max, XMMPackSHORT_2, XMMUnpackSHORT_2, XMMOneOver255, diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index acc2242a8..b927a5258 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -5080,9 +5080,9 @@ EMITTER_OPCODE_TABLE( // ============================================================================ // OPCODE_PACK // ============================================================================ -EMITTER(PACK, MATCH(I, V128<>>)) { +EMITTER(PACK, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - switch (i.instr->flags) { + switch (i.instr->flags & PACK_TYPE_MODE) { case PACK_TYPE_D3DCOLOR: EmitD3DCOLOR(e, i); break; @@ -5095,33 +5095,34 @@ EMITTER(PACK, MATCH(I, V128<>>)) { case PACK_TYPE_SHORT_2: EmitSHORT_2(e, i); break; - case PACK_TYPE_S8_IN_16_LO: - EmitS8_IN_16_LO(e, i); + case PACK_TYPE_8_IN_16: + Emit8_IN_16(e, i, i.instr->flags); break; - case PACK_TYPE_S8_IN_16_HI: - EmitS8_IN_16_HI(e, i); - break; - case PACK_TYPE_S16_IN_32_LO: - EmitS16_IN_32_LO(e, i); - break; - case PACK_TYPE_S16_IN_32_HI: - EmitS16_IN_32_HI(e, i); + case PACK_TYPE_16_IN_32: + Emit16_IN_32(e, i, i.instr->flags); break; default: assert_unhandled_case(i.instr->flags); break; } } static void EmitD3DCOLOR(X64Emitter& e, const EmitArgType& i) { + assert_true(i.src2.value->IsConstantZero()); + // Saturate to [3,3....] so that only values between 3...[00] and 3...[FF] + // are valid. + if (i.src1.is_constant) { + e.LoadConstantXmm(i.dest, i.src1.constant()); + e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLORSat)); + } else { + e.vminps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLORSat)); + } + e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMM3333)); + // Extract bytes. // RGBA (XYZW) -> ARGB (WXYZ) // w = ((src1.uw & 0xFF) << 24) | ((src1.ux & 0xFF) << 16) | // ((src1.uy & 0xFF) << 8) | (src1.uz & 0xFF) - if (i.src1.is_constant) { - e.LoadConstantXmm(i.dest, i.src1.constant()); - e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR)); - } else { - e.vpshufb(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLOR)); - } + e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR)); } static void EmitFLOAT16_2(X64Emitter& e, const EmitArgType& i) { + assert_true(i.src2.value->IsConstantZero()); // http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx // dest = [(src1.x | src1.y), 0, 0, 0] // 0|0|0|0|W|Z|Y|X @@ -5130,34 +5131,112 @@ EMITTER(PACK, MATCH(I, V128<>>)) { e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackFLOAT16_2)); } static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) { + assert_true(i.src2.value->IsConstantZero()); // dest = [(src1.x | src1.y), (src1.z | src1.w), 0, 0] // 0|0|0|0|W|Z|Y|X - e.vcvtps2ph(e.xmm0, i.src1, B00000011); + e.vcvtps2ph(i.dest, i.src1, B00000011); // Shuffle to X|Y|Z|W|0|0|0|0 e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackFLOAT16_4)); } static void EmitSHORT_2(X64Emitter& e, const EmitArgType& i) { + assert_true(i.src2.value->IsConstantZero()); // Saturate. - e.vmaxps(i.dest, i.src1, e.GetXmmConstPtr(XMMNegativeOne)); - e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMOne)); - // Multiply by SHRT_MAX. - e.vmulps(i.dest, i.dest, e.GetXmmConstPtr(XMMShortMaxPS)); - // Convert to int32. - e.vcvtps2dq(i.dest, i.dest); + e.vmaxps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackSHORT_2Min)); + e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_2Max)); // Pack. e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_2)); } - static void EmitS8_IN_16_LO(X64Emitter& e, const EmitArgType& i) { - assert_always(); + static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) { + // TODO(benvanik): handle src2 (or src1) being constant zero + if (IsPackInUnsigned(flags)) { + if (IsPackOutUnsigned(flags)) { + if (IsPackOutSaturate(flags)) { + // unsigned -> unsigned + saturate + assert_always(); + } else { + // unsigned -> unsigned + assert_always(); + } + } else { + if (IsPackOutSaturate(flags)) { + // unsigned -> signed + saturate + assert_always(); + } else { + // unsigned -> signed + assert_always(); + } + } + } else { + if (IsPackOutUnsigned(flags)) { + if (IsPackOutSaturate(flags)) { + // signed -> unsigned + saturate + // PACKUSWB / SaturateSignedWordToUnsignedByte + e.vpackuswb(i.dest, i.src1, i.src2); + e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMByteOrderMask)); + } else { + // signed -> unsigned + assert_always(); + } + } else { + if (IsPackOutSaturate(flags)) { + // signed -> signed + saturate + // PACKSSWB / SaturateSignedWordToSignedByte + e.vpacksswb(i.dest, i.src1, i.src2); + e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMByteOrderMask)); + } else { + // signed -> signed + assert_always(); + } + } + } } - static void EmitS8_IN_16_HI(X64Emitter& e, const EmitArgType& i) { - assert_always(); - } - static void EmitS16_IN_32_LO(X64Emitter& e, const EmitArgType& i) { - assert_always(); - } - static void EmitS16_IN_32_HI(X64Emitter& e, const EmitArgType& i) { - assert_always(); + static void Emit16_IN_32(X64Emitter& e, const EmitArgType& i, uint32_t flags) { + // TODO(benvanik): handle src2 (or src1) being constant zero + if (IsPackInUnsigned(flags)) { + if (IsPackOutUnsigned(flags)) { + if (IsPackOutSaturate(flags)) { + // unsigned -> unsigned + saturate + assert_always(); + } else { + // unsigned -> unsigned + assert_always(); + } + } else { + if (IsPackOutSaturate(flags)) { + // unsigned -> signed + saturate + assert_always(); + } else { + // unsigned -> signed + assert_always(); + } + } + } else { + if (IsPackOutUnsigned(flags)) { + if (IsPackOutSaturate(flags)) { + // signed -> unsigned + saturate + // PACKUSDW + // TMP[15:0] <- (DEST[31:0] < 0) ? 0 : DEST[15:0]; + // DEST[15:0] <- (DEST[31:0] > FFFFH) ? FFFFH : TMP[15:0]; + e.vpackusdw(i.dest, i.src1, i.src2); + e.vpshuflw(i.dest, i.dest, B10110001); + e.vpshufhw(i.dest, i.dest, B10110001); + } else { + // signed -> unsigned + assert_always(); + } + } else { + if (IsPackOutSaturate(flags)) { + // signed -> signed + saturate + // PACKSSDW / SaturateSignedDwordToSignedWord + e.vpackssdw(i.dest, i.src1, i.src2); + e.vpshuflw(i.dest, i.dest, B10110001); + e.vpshufhw(i.dest, i.dest, B10110001); + } else { + // signed -> signed + assert_always(); + } + } + } } }; EMITTER_OPCODE_TABLE( @@ -5170,7 +5249,7 @@ EMITTER_OPCODE_TABLE( // ============================================================================ EMITTER(UNPACK, MATCH(I, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - switch (i.instr->flags) { + switch (i.instr->flags & PACK_TYPE_MODE) { case PACK_TYPE_D3DCOLOR: EmitD3DCOLOR(e, i); break; @@ -5183,17 +5262,11 @@ EMITTER(UNPACK, MATCH(I, V128<>>)) { case PACK_TYPE_SHORT_2: EmitSHORT_2(e, i); break; - case PACK_TYPE_S8_IN_16_LO: - EmitS8_IN_16_LO(e, i); + case PACK_TYPE_8_IN_16: + Emit8_IN_16(e, i, i.instr->flags); break; - case PACK_TYPE_S8_IN_16_HI: - EmitS8_IN_16_HI(e, i); - break; - case PACK_TYPE_S16_IN_32_LO: - EmitS16_IN_32_LO(e, i); - break; - case PACK_TYPE_S16_IN_32_HI: - EmitS16_IN_32_HI(e, i); + case PACK_TYPE_16_IN_32: + Emit16_IN_32(e, i, i.instr->flags); break; default: assert_unhandled_case(i.instr->flags); break; } @@ -5271,21 +5344,93 @@ EMITTER(UNPACK, MATCH(I, V128<>>)) { // Add 3,3,0,1. e.vpor(i.dest, e.GetXmmConstPtr(XMM3301)); } - static void EmitS8_IN_16_LO(X64Emitter& e, const EmitArgType& i) { - e.vpunpckhbw(i.dest, i.src1, i.src1); - e.vpsrad(i.dest, 8); + static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) { + assert_false(IsPackOutSaturate(flags)); + if (IsPackToLo(flags)) { + // Unpack to LO. + if (IsPackInUnsigned(flags)) { + if (IsPackOutUnsigned(flags)) { + // unsigned -> unsigned + assert_always(); + } else { + // unsigned -> signed + assert_always(); + } + } else { + if (IsPackOutUnsigned(flags)) { + // signed -> unsigned + assert_always(); + } else { + // signed -> signed + e.vpunpckhbw(i.dest, i.src1, i.src1); + e.vpsrad(i.dest, 8); + } + } + } else { + // Unpack to HI. + if (IsPackInUnsigned(flags)) { + if (IsPackOutUnsigned(flags)) { + // unsigned -> unsigned + assert_always(); + } else { + // unsigned -> signed + assert_always(); + } + } else { + if (IsPackOutUnsigned(flags)) { + // signed -> unsigned + assert_always(); + } else { + // signed -> signed + e.vpunpcklbw(i.dest, i.src1, i.src1); + e.vpsrad(i.dest, 8); + } + } + } } - static void EmitS8_IN_16_HI(X64Emitter& e, const EmitArgType& i) { - e.vpunpcklbw(i.dest, i.src1, i.src1); - e.vpsrad(i.dest, 8); - } - static void EmitS16_IN_32_LO(X64Emitter& e, const EmitArgType& i) { - e.vpunpckhwd(i.dest, i.src1, i.src1); - e.vpsrad(i.dest, 16); - } - static void EmitS16_IN_32_HI(X64Emitter& e, const EmitArgType& i) { - e.vpunpcklwd(i.dest, i.src1, i.src1); - e.vpsrad(i.dest, 16); + static void Emit16_IN_32(X64Emitter& e, const EmitArgType& i, uint32_t flags) { + assert_false(IsPackOutSaturate(flags)); + if (IsPackToLo(flags)) { + // Unpack to LO. + if (IsPackInUnsigned(flags)) { + if (IsPackOutUnsigned(flags)) { + // unsigned -> unsigned + assert_always(); + } else { + // unsigned -> signed + assert_always(); + } + } else { + if (IsPackOutUnsigned(flags)) { + // signed -> unsigned + assert_always(); + } else { + // signed -> signed + e.vpunpckhwd(i.dest, i.src1, i.src1); + e.vpsrad(i.dest, 16); + } + } + } else { + // Unpack to HI. + if (IsPackInUnsigned(flags)) { + if (IsPackOutUnsigned(flags)) { + // unsigned -> unsigned + assert_always(); + } else { + // unsigned -> signed + assert_always(); + } + } else { + if (IsPackOutUnsigned(flags)) { + // signed -> unsigned + assert_always(); + } else { + // signed -> signed + e.vpunpcklwd(i.dest, i.src1, i.src1); + e.vpsrad(i.dest, 16); + } + } + } } }; EMITTER_OPCODE_TABLE( diff --git a/src/alloy/frontend/ppc/ppc_emit_altivec.cc b/src/alloy/frontend/ppc/ppc_emit_altivec.cc index f5ad96053..ac6e1ea45 100644 --- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc +++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc @@ -1733,76 +1733,162 @@ XEEMITTER(vpkpx, 0x1000030E, VX)(PPCHIRBuilder& f, InstrData& i) { return 1; } +int InstrEmit_vpkshss_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) { + // Vector Pack Signed Halfword Signed Saturate + // Convert VA and VB from signed words to signed saturated bytes then + // concat: + // for each i in VA + VB: + // i = int8_t(Clamp(EXTS(int16_t(t)), -128, 127)) + // dest = VA | VB (lower 8bit values) + Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb), + PACK_TYPE_8_IN_16 | PACK_TYPE_IN_SIGNED | + PACK_TYPE_OUT_SIGNED | PACK_TYPE_OUT_SATURATE); + f.StoreVR(vd, v); + return 0; +} XEEMITTER(vpkshss, 0x1000018E, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkshss_(f, i.VX.VD, i.VX.VA, i.VX.VB); } XEEMITTER(vpkshss128, VX128(5, 512), VX128)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkshss_(f, VX128_VD128, VX128_VA128, VX128_VB128); } +int InstrEmit_vpkswss_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) { + // Vector Pack Signed Word Signed Saturate + // Convert VA and VB from signed int words to signed saturated shorts then + // concat: + // for each i in VA + VB: + // i = int16_t(Clamp(EXTS(int32_t(t)), -2^15, 2^15-1)) + // dest = VA | VB (lower 16bit values) + Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb), + PACK_TYPE_16_IN_32 | PACK_TYPE_IN_SIGNED | + PACK_TYPE_OUT_SIGNED | PACK_TYPE_OUT_SATURATE); + f.StoreVR(vd, v); + return 0; +} XEEMITTER(vpkswss, 0x100001CE, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkswss_(f, i.VX.VD, i.VX.VA, i.VX.VB); } XEEMITTER(vpkswss128, VX128(5, 640), VX128)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkswss_(f, VX128_VD128, VX128_VA128, VX128_VB128); } +int InstrEmit_vpkswus_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) { + // Vector Pack Signed Word Unsigned Saturate + // Convert VA and VB from signed int words to unsigned saturated shorts then + // concat: + // for each i in VA + VB: + // i = uint16_t(Clamp(EXTS(int32_t(t)), 0, 2^16-1)) + // dest = VA | VB (lower 16bit values) + Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb), + PACK_TYPE_16_IN_32 | PACK_TYPE_IN_SIGNED | + PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_SATURATE); + f.StoreVR(vd, v); + return 0; +} XEEMITTER(vpkswus, 0x1000014E, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkswus_(f, i.VX.VD, i.VX.VA, i.VX.VB); } XEEMITTER(vpkswus128, VX128(5, 704), VX128)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkswus_(f, VX128_VD128, VX128_VA128, VX128_VB128); } +int InstrEmit_vpkuhum_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) { + // Vector Pack Unsigned Halfword Unsigned Modulo + // Convert VA and VB from unsigned shorts to unsigned bytes then concat: + // for each i in VA + VB: + // i = uint8_t(uint16_t(i)) + // dest = VA | VB (lower 8bit values) + Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb), + PACK_TYPE_8_IN_16 | PACK_TYPE_IN_UNSIGNED | + PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_UNSATURATE); + f.StoreVR(vd, v); + return 0; +} XEEMITTER(vpkuhum, 0x1000000E, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkuhum_(f, i.VX.VD, i.VX.VA, i.VX.VB); } XEEMITTER(vpkuhum128, VX128(5, 768), VX128)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkuhum_(f, VX128_VD128, VX128_VA128, VX128_VB128); } +int InstrEmit_vpkuhus_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) { + // Vector Pack Unsigned Halfword Unsigned Saturate + // Convert VA and VB from unsigned shorts to unsigned saturated bytes then + // concat: + // for each i in VA + VB: + // i = uint8_t(Clamp(EXTZ(uint16_t(i)), 0, 255)) + // dest = VA | VB (lower 8bit values) + Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb), + PACK_TYPE_8_IN_16 | PACK_TYPE_IN_UNSIGNED | + PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_SATURATE); + f.StoreVR(vd, v); + return 0; +} XEEMITTER(vpkuhus, 0x1000008E, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkuhus_(f, i.VX.VD, i.VX.VA, i.VX.VB); } XEEMITTER(vpkuhus128, VX128(5, 832), VX128)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkuhus_(f, VX128_VD128, VX128_VA128, VX128_VB128); } +int InstrEmit_vpkshus_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) { + // Vector Pack Signed Halfword Unsigned Saturate + // Convert VA and VB from signed shorts to unsigned saturated bytes then + // concat: + // for each i in VA + VB: + // i = uint8_t(Clamp(EXTS(int16_t(i)), 0, 255)) + // dest = VA | VB (lower 8bit values) + Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb), + PACK_TYPE_8_IN_16 | PACK_TYPE_IN_SIGNED | + PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_SATURATE); + f.StoreVR(vd, v); + return 0; +} XEEMITTER(vpkshus, 0x1000010E, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkshus_(f, i.VX.VD, i.VX.VA, i.VX.VB); } XEEMITTER(vpkshus128, VX128(5, 576), VX128)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkshus_(f, VX128_VD128, VX128_VA128, VX128_VB128); } +int InstrEmit_vpkuwum_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) { + // Vector Pack Unsigned Word Unsigned Modulo + // Concat low shorts from VA + VB: + // for each i in VA + VB: + // i = uint16_t(uint32_t(i)) + // dest = VA | VB (lower 16bit values) + Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb), + PACK_TYPE_16_IN_32 | PACK_TYPE_IN_UNSIGNED | + PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_UNSATURATE); + f.StoreVR(vd, v); + return 0; +} XEEMITTER(vpkuwum, 0x1000004E, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkuwum_(f, i.VX.VD, i.VX.VA, i.VX.VB); } XEEMITTER(vpkuwum128, VX128(5, 896), VX128)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkuwum_(f, VX128_VD128, VX128_VA128, VX128_VB128); } +int InstrEmit_vpkuwus_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) { + // Vector Pack Unsigned Word Unsigned Saturate + // Convert VA and VB from unsigned int words to unsigned saturated shorts then + // concat: + // for each i in VA + VB: + // i = uint16_t(Clamp(EXTZ(uint32_t(t)), 0, 2^16-1)) + // dest = VA | VB (lower 16bit values) + Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb), + PACK_TYPE_16_IN_32 | PACK_TYPE_IN_UNSIGNED | + PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_SATURATE); + f.StoreVR(vd, v); + return 0; +} XEEMITTER(vpkuwus, 0x100000CE, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkuwus_(f, i.VX.VD, i.VX.VA, i.VX.VB); } XEEMITTER(vpkuwus128, VX128(5, 960), VX128)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_vpkuwus_(f, VX128_VD128, VX128_VA128, VX128_VB128); } XEEMITTER(vupkhpx, 0x1000034E, VX)(PPCHIRBuilder& f, InstrData& i) { @@ -1816,8 +1902,11 @@ XEEMITTER(vupklpx, 0x100003CE, VX)(PPCHIRBuilder& f, InstrData& i) { } int InstrEmit_vupkhsh_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { + // Vector Unpack High Signed Halfword // halfwords 0-3 expanded to words 0-3 and sign extended - Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S16_IN_32_HI); + Value* v = + f.Unpack(f.LoadVR(vb), PACK_TYPE_TO_HI | PACK_TYPE_16_IN_32 | + PACK_TYPE_IN_SIGNED | PACK_TYPE_OUT_SIGNED); f.StoreVR(vd, v); return 0; } @@ -1831,8 +1920,11 @@ XEEMITTER(vupkhsh128, 0x100002CE, VX)(PPCHIRBuilder& f, InstrData& i) { } int InstrEmit_vupklsh_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { + // Vector Unpack Low Signed Halfword // halfwords 4-7 expanded to words 0-3 and sign extended - Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S16_IN_32_LO); + Value* v = + f.Unpack(f.LoadVR(vb), PACK_TYPE_TO_LO | PACK_TYPE_16_IN_32 | + PACK_TYPE_IN_SIGNED | PACK_TYPE_OUT_SIGNED); f.StoreVR(vd, v); return 0; } @@ -1846,8 +1938,11 @@ XEEMITTER(vupklsh128, 0x100002CE, VX)(PPCHIRBuilder& f, InstrData& i) { } int InstrEmit_vupkhsb_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { + // Vector Unpack High Signed Byte // bytes 0-7 expanded to halfwords 0-7 and sign extended - Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S8_IN_16_HI); + Value* v = + f.Unpack(f.LoadVR(vb), PACK_TYPE_TO_HI | PACK_TYPE_8_IN_16 | + PACK_TYPE_IN_SIGNED | PACK_TYPE_OUT_SIGNED); f.StoreVR(vd, v); return 0; } @@ -1864,8 +1959,10 @@ XEEMITTER(vupkhsb128, VX128(6, 896), VX128)(PPCHIRBuilder& f, InstrData& i) { } int InstrEmit_vupklsb_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { + // Vector Unpack Low Signed Byte // bytes 8-15 expanded to halfwords 0-7 and sign extended - Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S8_IN_16_LO); + Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_TO_LO | PACK_TYPE_8_IN_16 | + PACK_TYPE_IN_SIGNED | PACK_TYPE_OUT_SIGNED); f.StoreVR(vd, v); return 0; } @@ -1886,8 +1983,8 @@ XEEMITTER(vpkd3d128, VX128_4(6, 1552), VX128_4)(PPCHIRBuilder& f, const uint32_t vd = i.VX128_4.VD128l | (i.VX128_4.VD128h << 5); const uint32_t vb = i.VX128_4.VB128l | (i.VX128_4.VB128h << 5); uint32_t type = i.VX128_4.IMM >> 2; - uint32_t shift = i.VX128_4.IMM & 0x3; - uint32_t pack = i.VX128_4.z; + uint32_t pack = i.VX128_4.IMM & 0x3; + uint32_t shift = i.VX128_4.z; Value* v = f.LoadVR(vb); switch (type) { case 0: // VPACK_D3DCOLOR @@ -1909,33 +2006,64 @@ XEEMITTER(vpkd3d128, VX128_4(6, 1552), VX128_4)(PPCHIRBuilder& f, // http://hlssmod.net/he_code/public/pixelwriter.h // control = prev:0123 | new:4567 uint32_t control = PERMUTE_IDENTITY; // original - uint32_t src = xerotl(0x07060504, shift * 8); - uint32_t mask = 0; switch (pack) { case 1: // VPACK_32 // VPACK_32 & shift = 3 puts lower 32 bits in x (leftmost slot). - mask = 0x000000FF << (shift * 8); - control = (control & ~mask) | (src & mask); + switch (shift) { + case 0: + control = PERMUTE_MASK(0, 0, 0, 1, 0, 2, 1, 3); + break; + case 1: + control = PERMUTE_MASK(0, 0, 0, 1, 1, 3, 0, 3); + break; + case 2: + control = PERMUTE_MASK(0, 0, 1, 3, 0, 2, 0, 3); + break; + case 3: + control = PERMUTE_MASK(1, 3, 0, 1, 0, 2, 0, 3); + break; + default: + assert_unhandled_case(shift); + return 1; + } break; case 2: // 64bit - if (shift < 3) { - mask = 0x0000FFFF << (shift * 8); - } else { - // w - src = 0x07000000; - mask = 0xFF000000; + switch (shift) { + case 0: + control = PERMUTE_MASK(0, 0, 0, 1, 1, 2, 1, 3); + break; + case 1: + control = PERMUTE_MASK(0, 0, 1, 2, 1, 3, 0, 3); + break; + case 2: + control = PERMUTE_MASK(1, 2, 1, 3, 0, 2, 0, 3); + break; + case 3: + control = PERMUTE_MASK(1, 3, 0, 1, 0, 2, 0, 3); + break; + default: + assert_unhandled_case(shift); + return 1; } - control = (control & ~mask) | (src & mask); break; case 3: // 64bit - if (shift < 3) { - mask = 0x0000FFFF << (shift * 8); - } else { - // z - src = 0x00000004; - mask = 0x000000FF; + switch (shift) { + case 0: + control = PERMUTE_MASK(0, 0, 0, 1, 1, 2, 1, 3); + break; + case 1: + control = PERMUTE_MASK(0, 0, 1, 2, 1, 3, 0, 3); + break; + case 2: + control = PERMUTE_MASK(1, 2, 1, 3, 0, 2, 0, 3); + break; + case 3: + control = PERMUTE_MASK(0, 0, 0, 1, 0, 2, 1, 2); + break; + default: + assert_unhandled_case(shift); + return 1; } - control = (control & ~mask) | (src & mask); break; default: assert_unhandled_case(pack); diff --git a/src/alloy/frontend/ppc/test/bin/instr_vpkd3d128.bin b/src/alloy/frontend/ppc/test/bin/instr_vpkd3d128.bin index 44bf995edb9139fd18748b3fb9599d1107ce2cd7..acd9e1c819525874388cf179c2996ff6c4984a01 100644 GIT binary patch literal 184 zcmb1WloRl4U{H|2qywPh6QJ}3Fx>=I-y{c-Z<2$^H_1Wdo1yZ}Q2A!4d^1$O6)I1N O?uDv@(bJ%OC=CEhyDTOE delta 5 McmdnNSTLag00w~qDgXcg diff --git a/src/alloy/frontend/ppc/test/bin/instr_vpkd3d128.dis b/src/alloy/frontend/ppc/test/bin/instr_vpkd3d128.dis index 82604fd17..9f616af8d 100644 --- a/src/alloy/frontend/ppc/test/bin/instr_vpkd3d128.dis +++ b/src/alloy/frontend/ppc/test/bin/instr_vpkd3d128.dis @@ -59,3 +59,39 @@ Disassembly of section .text: 0000000000100068 : 100068: 18 83 1e d0 vpkd3d128 v4,v3,0,2,2 10006c: 4e 80 00 20 blr + +0000000000100070 : + 100070: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0 + 100074: 4e 80 00 20 blr + +0000000000100078 : + 100078: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0 + 10007c: 4e 80 00 20 blr + +0000000000100080 : + 100080: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0 + 100084: 4e 80 00 20 blr + +0000000000100088 : + 100088: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0 + 10008c: 4e 80 00 20 blr + +0000000000100090 : + 100090: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0 + 100094: 4e 80 00 20 blr + +0000000000100098 : + 100098: 18 8d 1e 10 vpkd3d128 v4,v3,3,0,0 + 10009c: 4e 80 00 20 blr + +00000000001000a0 : + 1000a0: 18 8d 1e 10 vpkd3d128 v4,v3,3,0,0 + 1000a4: 4e 80 00 20 blr + +00000000001000a8 : + 1000a8: 18 96 1e 10 vpkd3d128 v4,v3,1,2,0 + 1000ac: 4e 80 00 20 blr + +00000000001000b0 : + 1000b0: 18 96 1e 10 vpkd3d128 v4,v3,1,2,0 + 1000b4: 4e 80 00 20 blr diff --git a/src/alloy/frontend/ppc/test/bin/instr_vpkd3d128.map b/src/alloy/frontend/ppc/test/bin/instr_vpkd3d128.map index f52351d9b..d8fde4ee6 100644 --- a/src/alloy/frontend/ppc/test/bin/instr_vpkd3d128.map +++ b/src/alloy/frontend/ppc/test/bin/instr_vpkd3d128.map @@ -12,3 +12,12 @@ 0000000000000058 t test_vpkd3d128_d3dcolor_3_1 0000000000000060 t test_vpkd3d128_d3dcolor_3_2 0000000000000068 t test_vpkd3d128_d3dcolor_3_3 +0000000000000070 t test_vpkd3d128_short2_invalid_0 +0000000000000078 t test_vpkd3d128_short2_invalid_1 +0000000000000080 t test_vpkd3d128_short2_0 +0000000000000088 t test_vpkd3d128_short2_1 +0000000000000090 t test_vpkd3d128_short2_2 +0000000000000098 t test_vpkd3d128_float16_2_invalid_0 +00000000000000a0 t test_vpkd3d128_float16_2_0 +00000000000000a8 t test_vpkd3d128_float16_4_invalid_0 +00000000000000b0 t test_vpkd3d128_float16_4_0 diff --git a/src/alloy/frontend/ppc/test/bin/instr_vpkshss.bin b/src/alloy/frontend/ppc/test/bin/instr_vpkshss.bin new file mode 100644 index 0000000000000000000000000000000000000000..45005cc56cc8d7bf953b0ec925f1c8436b160971 GIT binary patch literal 16 RcmWewtk~z*z@Q)ir2#2I1fc)` literal 0 HcmV?d00001 diff --git a/src/alloy/frontend/ppc/test/bin/instr_vpkshss.dis b/src/alloy/frontend/ppc/test/bin/instr_vpkshss.dis new file mode 100644 index 000000000..580c9f973 --- /dev/null +++ b/src/alloy/frontend/ppc/test/bin/instr_vpkshss.dis @@ -0,0 +1,13 @@ + +/vagrant/src/alloy/frontend/ppc/test/bin//instr_vpkshss.o: file format elf64-powerpc + + +Disassembly of section .text: + +0000000000100000 : + 100000: 10 a3 21 8e vpkshss v5,v3,v4 + 100004: 4e 80 00 20 blr + +0000000000100008 : + 100008: 10 a3 21 8e vpkshss v5,v3,v4 + 10000c: 4e 80 00 20 blr diff --git a/src/alloy/frontend/ppc/test/bin/instr_vpkshss.map b/src/alloy/frontend/ppc/test/bin/instr_vpkshss.map new file mode 100644 index 000000000..5e87f54da --- /dev/null +++ b/src/alloy/frontend/ppc/test/bin/instr_vpkshss.map @@ -0,0 +1,2 @@ +0000000000000000 t test_vpkshss_0 +0000000000000008 t test_vpkshss_1 diff --git a/src/alloy/frontend/ppc/test/bin/instr_vpkswss.bin b/src/alloy/frontend/ppc/test/bin/instr_vpkswss.bin new file mode 100644 index 0000000000000000000000000000000000000000..a61dc7ceb6c4fcc2736c58c1e73a8c025be5cc99 GIT binary patch literal 16 ScmWewta#3^fk8n4N&^5b!UZ7! literal 0 HcmV?d00001 diff --git a/src/alloy/frontend/ppc/test/bin/instr_vpkswss.dis b/src/alloy/frontend/ppc/test/bin/instr_vpkswss.dis new file mode 100644 index 000000000..fb1339cc4 --- /dev/null +++ b/src/alloy/frontend/ppc/test/bin/instr_vpkswss.dis @@ -0,0 +1,13 @@ + +/vagrant/src/alloy/frontend/ppc/test/bin//instr_vpkswss.o: file format elf64-powerpc + + +Disassembly of section .text: + +0000000000100000 : + 100000: 10 a3 21 ce vpkswss v5,v3,v4 + 100004: 4e 80 00 20 blr + +0000000000100008 : + 100008: 10 a3 21 ce vpkswss v5,v3,v4 + 10000c: 4e 80 00 20 blr diff --git a/src/alloy/frontend/ppc/test/bin/instr_vpkswss.map b/src/alloy/frontend/ppc/test/bin/instr_vpkswss.map new file mode 100644 index 000000000..a9055626a --- /dev/null +++ b/src/alloy/frontend/ppc/test/bin/instr_vpkswss.map @@ -0,0 +1,2 @@ +0000000000000000 t test_vpkswss_0 +0000000000000008 t test_vpkswss_1 diff --git a/src/alloy/frontend/ppc/test/instr_vpkd3d128.s b/src/alloy/frontend/ppc/test/instr_vpkd3d128.s index ccb5d17d0..babff5853 100644 --- a/src/alloy/frontend/ppc/test/instr_vpkd3d128.s +++ b/src/alloy/frontend/ppc/test/instr_vpkd3d128.s @@ -133,3 +133,82 @@ test_vpkd3d128_d3dcolor_3_3: blr #_ REGISTER_OUT v3 [40400001, 40400002, 40400003, 40400004] #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 00000000] + + +test_vpkd3d128_short2_invalid_0: + #_ REGISTER_IN v3 [43817E00, C37CFC00, 42A23EC8, 403DB757] + #_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD] + # vpkd3d128 v4, v3, 1, 1, 0 + .long 0x18851E10 + blr + #_ REGISTER_OUT v3 [43817E00, C37CFC00, 42A23EC8, 403DB757] + #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFF8001] +test_vpkd3d128_short2_invalid_1: + #_ REGISTER_IN v3 [412FDF00, C09FBE00, 42A23EC8, 403DB757] + #_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD] + # vpkd3d128 v4, v3, 1, 1, 0 + .long 0x18851E10 + blr + #_ REGISTER_OUT v3 [412FDF00, C09FBE00, 42A23EC8, 403DB757] + #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFF8001] + +test_vpkd3d128_short2_0: + #_ REGISTER_IN v3 [40407FFF, 403F8001, 00000000, 00000000] + #_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD] + # vpkd3d128 v4, v3, 1, 1, 0 + .long 0x18851E10 + blr + #_ REGISTER_OUT v3 [40407FFF, 403F8001, 00000000, 00000000] + #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFF8001] +test_vpkd3d128_short2_1: + #_ REGISTER_IN v3 [40404000, 403FC000, 40400003, 403F8001] + #_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD] + # vpkd3d128 v4, v3, 1, 1, 0 + .long 0x18851E10 + blr + #_ REGISTER_OUT v3 [40404000, 403FC000, 40400003, 403F8001] + #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 4000C000] +test_vpkd3d128_short2_2: + #_ REGISTER_IN v3 [4040FFFE, 403FF333, 42A23EC8, 403DB757] + #_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD] + # vpkd3d128 v4, v3, 1, 1, 0 + .long 0x18851E10 + blr + #_ REGISTER_OUT v3 [4040FFFE, 403FF333, 42A23EC8, 403DB757] + #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFFF333] + +test_vpkd3d128_float16_2_invalid_0: + #_ REGISTER_IN v3 [3FC00000, BFC00000, 42A23EC8, 403DB757] + #_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD] + # vpkd3d128 v4, v3, 3, 1, 0 + .long 0x188D1E10 + blr + #_ REGISTER_OUT v3 [3FC00000, BFC00000, 42A23EC8, 403DB757] + #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3E00BE00] + +test_vpkd3d128_float16_2_0: + #_ REGISTER_IN v3 [3F000000, BF000000, 00000000, 00000000] + #_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD] + # vpkd3d128 v4, v3, 3, 1, 0 + .long 0x188D1E10 + blr + #_ REGISTER_OUT v3 [3F000000, BF000000, 00000000, 00000000] + #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3800B800] + +test_vpkd3d128_float16_4_invalid_0: + #_ REGISTER_IN v3 [3FC00000, BFC00000, 3FC00000, BFC00000] + #_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD] + # vpkd3d128 v4, v3, 5, 2, 0 + .long 0x18961E10 + blr + #_ REGISTER_OUT v3 [3FC00000, BFC00000, 3FC00000, BFC00000] + #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3E00BE00, 3E00BE00] + +test_vpkd3d128_float16_4_0: + #_ REGISTER_IN v3 [3F000000, BF000000, 3F000000, BF000000] + #_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD] + # vpkd3d128 v4, v3, 5, 2, 0 + .long 0x18961E10 + blr + #_ REGISTER_OUT v3 [3F000000, BF000000, 3F000000, BF000000] + #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3800B800, 3800B800] diff --git a/src/alloy/frontend/ppc/test/instr_vpkshss.s b/src/alloy/frontend/ppc/test/instr_vpkshss.s new file mode 100644 index 000000000..ac179dfe0 --- /dev/null +++ b/src/alloy/frontend/ppc/test/instr_vpkshss.s @@ -0,0 +1,17 @@ +test_vpkshss_0: + #_ REGISTER_IN v3 [00000001, 00020003, 00040005, 00060007] + #_ REGISTER_IN v4 [00080009, 000A000B, 000C000D, 000E000F] + vpkshss v5, v3, v4 + blr + #_ REGISTER_OUT v3 [00000001, 00020003, 00040005, 00060007] + #_ REGISTER_OUT v4 [00080009, 000A000B, 000C000D, 000E000F] + #_ REGISTER_OUT v5 [00010203, 04050607, 08090A0B, 0C0D0E0F] + +test_vpkshss_1: + #_ REGISTER_IN v3 [7FFF8000, 00020003, 00040005, 00060007] + #_ REGISTER_IN v4 [7FFF8000, 000A000B, 000C000D, 000E000F] + vpkshss v5, v3, v4 + blr + #_ REGISTER_OUT v3 [7FFF8000, 00020003, 00040005, 00060007] + #_ REGISTER_OUT v4 [7FFF8000, 000A000B, 000C000D, 000E000F] + #_ REGISTER_OUT v5 [7F800203, 04050607, 7F800A0B, 0C0D0E0F] diff --git a/src/alloy/frontend/ppc/test/instr_vpkswss.s b/src/alloy/frontend/ppc/test/instr_vpkswss.s new file mode 100644 index 000000000..f4fbd1f1c --- /dev/null +++ b/src/alloy/frontend/ppc/test/instr_vpkswss.s @@ -0,0 +1,17 @@ +test_vpkswss_0: + #_ REGISTER_IN v3 [00000001, 00000002, 00000003, 00000004] + #_ REGISTER_IN v4 [00000005, 00000006, 00000007, 00000008] + vpkswss v5, v3, v4 + blr + #_ REGISTER_OUT v3 [00000001, 00000002, 00000003, 00000004] + #_ REGISTER_OUT v4 [00000005, 00000006, 00000007, 00000008] + #_ REGISTER_OUT v5 [00010002, 00030004, 00050006, 00070008] + +test_vpkswss_1: + #_ REGISTER_IN v3 [7FFFFFFF, 80000000, 00000000, 00000004] + #_ REGISTER_IN v4 [7FFFFFFF, 80000000, 00000000, 00000008] + vpkswss v5, v3, v4 + blr + #_ REGISTER_OUT v3 [7FFFFFFF, 80000000, 00000000, 00000004] + #_ REGISTER_OUT v4 [7FFFFFFF, 80000000, 00000000, 00000008] + #_ REGISTER_OUT v5 [7FFF8000, 00000004, 7FFF8000, 00000008] diff --git a/src/alloy/hir/hir_builder.cc b/src/alloy/hir/hir_builder.cc index 58929fd2d..f1d3f7883 100644 --- a/src/alloy/hir/hir_builder.cc +++ b/src/alloy/hir/hir_builder.cc @@ -1880,10 +1880,24 @@ Value* HIRBuilder::Swizzle(Value* value, TypeName part_type, } Value* HIRBuilder::Pack(Value* value, uint32_t pack_flags) { - ASSERT_VECTOR_TYPE(value); + return Pack(value, LoadZero(VEC128_TYPE), pack_flags); +} + +Value* HIRBuilder::Pack(Value* value1, Value* value2, uint32_t pack_flags) { + ASSERT_VECTOR_TYPE(value1); + ASSERT_VECTOR_TYPE(value2); + switch (pack_flags & PACK_TYPE_MODE) { + case PACK_TYPE_D3DCOLOR: + case PACK_TYPE_FLOAT16_2: + case PACK_TYPE_FLOAT16_4: + case PACK_TYPE_SHORT_2: + assert_true(value2->IsConstantZero()); + break; + } Instr* i = AppendInstr(OPCODE_PACK_info, pack_flags, AllocValue(VEC128_TYPE)); - i->set_src1(value); - i->src2.value = i->src3.value = NULL; + i->set_src1(value1); + i->set_src2(value2); + i->src3.value = NULL; return i->dest; } diff --git a/src/alloy/hir/hir_builder.h b/src/alloy/hir/hir_builder.h index ccbd3b339..71cc4ea6d 100644 --- a/src/alloy/hir/hir_builder.h +++ b/src/alloy/hir/hir_builder.h @@ -218,6 +218,7 @@ class HIRBuilder { Value* Swizzle(Value* value, TypeName part_type, uint32_t swizzle_mask); // SelectBits(cond, value1, value2) Value* Pack(Value* value, uint32_t pack_flags = 0); + Value* Pack(Value* value1, Value* value2, uint32_t pack_flags = 0); Value* Unpack(Value* value, uint32_t pack_flags = 0); Value* CompareExchange(Value* address, Value* compare_value, diff --git a/src/alloy/hir/opcodes.h b/src/alloy/hir/opcodes.h index ae3b9ca33..60aaff61f 100644 --- a/src/alloy/hir/opcodes.h +++ b/src/alloy/hir/opcodes.h @@ -65,16 +65,46 @@ enum Swizzles { SWIZZLE_XYZW_TO_ZWXY = SWIZZLE_MASK(2, 3, 0, 1), SWIZZLE_XYZW_TO_WXYZ = SWIZZLE_MASK(3, 0, 1, 2), }; -enum PackType { +enum PackType : uint16_t { + // Special types: PACK_TYPE_D3DCOLOR = 0, PACK_TYPE_FLOAT16_2 = 1, PACK_TYPE_FLOAT16_4 = 2, PACK_TYPE_SHORT_2 = 3, - PACK_TYPE_S8_IN_16_LO = 4, - PACK_TYPE_S8_IN_16_HI = 5, - PACK_TYPE_S16_IN_32_LO = 6, - PACK_TYPE_S16_IN_32_HI = 7, + + // Types which use the bitmasks below for configuration: + PACK_TYPE_8_IN_16 = 4, + PACK_TYPE_16_IN_32 = 5, + + PACK_TYPE_MODE = 0x000F, // just to get the mode + + // Unpack to low or high parts. + PACK_TYPE_TO_LO = 0 << 12, + PACK_TYPE_TO_HI = 1 << 12, + + // Input/output arithmetic flags: + PACK_TYPE_IN_SIGNED = 0 << 13, + PACK_TYPE_IN_UNSIGNED = 1 << 13, + PACK_TYPE_OUT_SIGNED = 0 << 14, + PACK_TYPE_OUT_UNSIGNED = 1 << 14, + PACK_TYPE_OUT_UNSATURATE = 0 << 15, + PACK_TYPE_OUT_SATURATE = 1 << 15, }; +inline bool IsPackToHi(uint32_t flags) { + return (flags & PACK_TYPE_TO_HI) == PACK_TYPE_TO_HI; +} +inline bool IsPackToLo(uint32_t flags) { + return !IsPackToHi(flags); +} +inline bool IsPackInUnsigned(uint32_t flags) { + return (flags & PACK_TYPE_IN_UNSIGNED) == PACK_TYPE_IN_UNSIGNED; +} +inline bool IsPackOutUnsigned(uint32_t flags) { + return (flags & PACK_TYPE_OUT_UNSIGNED) == PACK_TYPE_OUT_UNSIGNED; +} +inline bool IsPackOutSaturate(uint32_t flags) { + return (flags & PACK_TYPE_OUT_SATURATE) == PACK_TYPE_OUT_SATURATE; +} enum Opcode { OPCODE_COMMENT, diff --git a/src/alloy/hir/opcodes.inl b/src/alloy/hir/opcodes.inl index 3baf405c8..d7e27cab5 100644 --- a/src/alloy/hir/opcodes.inl +++ b/src/alloy/hir/opcodes.inl @@ -596,7 +596,7 @@ DEFINE_OPCODE( DEFINE_OPCODE( OPCODE_PACK, "pack", - OPCODE_SIG_V_V, + OPCODE_SIG_V_V_V, 0) DEFINE_OPCODE( diff --git a/src/alloy/test/test_pack.cc b/src/alloy/test/test_pack.cc index 4e246e72a..4dc82f509 100644 --- a/src/alloy/test/test_pack.cc +++ b/src/alloy/test/test_pack.cc @@ -27,7 +27,7 @@ TEST_CASE("PACK_D3DCOLOR", "[instr]") { }); test.Run([](PPCContext* ctx) { ctx->v[4] = - vec128i(0x3F800050, 0x3F800060, 0x3F800070, 0x3F800080); + vec128i(0x40400050, 0x40400060, 0x40400070, 0x40400080); }, [](PPCContext* ctx) { auto result = ctx->v[3];