Fixing totally broken vpkd3d128 and adding new pack instructions.

This commit is contained in:
Ben Vanik 2014-11-02 21:49:39 -08:00
parent 32f42cd5ae
commit 96c203699d
21 changed files with 643 additions and 126 deletions

View File

@ -801,6 +801,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f), 1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f),
/* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f), /* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f),
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f), /* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
/* XMM3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f),
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, /* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u,
0x80000000u, 0x80000000u), 0x80000000u, 0x80000000u),
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, /* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u,
@ -811,7 +812,10 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
0xFFFFFFFFu, 0x7FFFFFFFu), 0xFFFFFFFFu, 0x7FFFFFFFu),
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, /* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u,
0x08090A0Bu, 0x0C0D0E0Fu), 0x08090A0Bu, 0x0C0D0E0Fu),
/* XMMByteOrderMask */ vec128i(0x01000302u, 0x05040706u,
0x09080B0Au, 0x0D0C0F0Eu),
/* XMMPermuteControl15 */ vec128b(15), /* XMMPermuteControl15 */ vec128b(15),
/* XMMPackD3DCOLORSat */ vec128i(0x404000FFu),
/* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, /* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu,
0xFFFFFFFFu, 0x0C000408u), 0xFFFFFFFFu, 0x0C000408u),
/* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF0Eu, 0xFFFFFF0Du, /* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF0Eu, 0xFFFFFF0Du,
@ -824,6 +828,8 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
0x05040706u, 0x01000302u), 0x05040706u, 0x01000302u),
/* XMMUnpackFLOAT16_4 */ vec128i(0x09080B0Au, 0x0D0C0F0Eu, /* XMMUnpackFLOAT16_4 */ vec128i(0x09080B0Au, 0x0D0C0F0Eu,
0xFFFFFFFFu, 0xFFFFFFFFu), 0xFFFFFFFFu, 0xFFFFFFFFu),
/* XMMPackSHORT_2Min */ vec128i(0x403F8001u),
/* XMMPackSHORT_2Max */ vec128i(0x40407FFFu),
/* XMMPackSHORT_2 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, /* XMMPackSHORT_2 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu,
0xFFFFFFFFu, 0x01000504u), 0xFFFFFFFFu, 0x01000504u),
/* XMMUnpackSHORT_2 */ vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, /* XMMUnpackSHORT_2 */ vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu,

View File

@ -49,18 +49,23 @@ enum XmmConst {
XMMNormalizeX16Y16, XMMNormalizeX16Y16,
XMM0001, XMM0001,
XMM3301, XMM3301,
XMM3333,
XMMSignMaskPS, XMMSignMaskPS,
XMMSignMaskPD, XMMSignMaskPD,
XMMAbsMaskPS, XMMAbsMaskPS,
XMMAbsMaskPD, XMMAbsMaskPD,
XMMByteSwapMask, XMMByteSwapMask,
XMMByteOrderMask,
XMMPermuteControl15, XMMPermuteControl15,
XMMPackD3DCOLORSat,
XMMPackD3DCOLOR, XMMPackD3DCOLOR,
XMMUnpackD3DCOLOR, XMMUnpackD3DCOLOR,
XMMPackFLOAT16_2, XMMPackFLOAT16_2,
XMMUnpackFLOAT16_2, XMMUnpackFLOAT16_2,
XMMPackFLOAT16_4, XMMPackFLOAT16_4,
XMMUnpackFLOAT16_4, XMMUnpackFLOAT16_4,
XMMPackSHORT_2Min,
XMMPackSHORT_2Max,
XMMPackSHORT_2, XMMPackSHORT_2,
XMMUnpackSHORT_2, XMMUnpackSHORT_2,
XMMOneOver255, XMMOneOver255,

View File

@ -5080,9 +5080,9 @@ EMITTER_OPCODE_TABLE(
// ============================================================================ // ============================================================================
// OPCODE_PACK // OPCODE_PACK
// ============================================================================ // ============================================================================
EMITTER(PACK, MATCH(I<OPCODE_PACK, V128<>, V128<>>)) { EMITTER(PACK, MATCH(I<OPCODE_PACK, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
switch (i.instr->flags) { switch (i.instr->flags & PACK_TYPE_MODE) {
case PACK_TYPE_D3DCOLOR: case PACK_TYPE_D3DCOLOR:
EmitD3DCOLOR(e, i); EmitD3DCOLOR(e, i);
break; break;
@ -5095,33 +5095,34 @@ EMITTER(PACK, MATCH(I<OPCODE_PACK, V128<>, V128<>>)) {
case PACK_TYPE_SHORT_2: case PACK_TYPE_SHORT_2:
EmitSHORT_2(e, i); EmitSHORT_2(e, i);
break; break;
case PACK_TYPE_S8_IN_16_LO: case PACK_TYPE_8_IN_16:
EmitS8_IN_16_LO(e, i); Emit8_IN_16(e, i, i.instr->flags);
break; break;
case PACK_TYPE_S8_IN_16_HI: case PACK_TYPE_16_IN_32:
EmitS8_IN_16_HI(e, i); Emit16_IN_32(e, i, i.instr->flags);
break;
case PACK_TYPE_S16_IN_32_LO:
EmitS16_IN_32_LO(e, i);
break;
case PACK_TYPE_S16_IN_32_HI:
EmitS16_IN_32_HI(e, i);
break; break;
default: assert_unhandled_case(i.instr->flags); break; default: assert_unhandled_case(i.instr->flags); break;
} }
} }
static void EmitD3DCOLOR(X64Emitter& e, const EmitArgType& i) { static void EmitD3DCOLOR(X64Emitter& e, const EmitArgType& i) {
assert_true(i.src2.value->IsConstantZero());
// Saturate to [3,3....] so that only values between 3...[00] and 3...[FF]
// are valid.
if (i.src1.is_constant) {
e.LoadConstantXmm(i.dest, i.src1.constant());
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLORSat));
} else {
e.vminps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLORSat));
}
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMM3333));
// Extract bytes.
// RGBA (XYZW) -> ARGB (WXYZ) // RGBA (XYZW) -> ARGB (WXYZ)
// w = ((src1.uw & 0xFF) << 24) | ((src1.ux & 0xFF) << 16) | // w = ((src1.uw & 0xFF) << 24) | ((src1.ux & 0xFF) << 16) |
// ((src1.uy & 0xFF) << 8) | (src1.uz & 0xFF) // ((src1.uy & 0xFF) << 8) | (src1.uz & 0xFF)
if (i.src1.is_constant) { e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR));
e.LoadConstantXmm(i.dest, i.src1.constant());
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR));
} else {
e.vpshufb(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLOR));
}
} }
static void EmitFLOAT16_2(X64Emitter& e, const EmitArgType& i) { static void EmitFLOAT16_2(X64Emitter& e, const EmitArgType& i) {
assert_true(i.src2.value->IsConstantZero());
// http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx // http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx
// dest = [(src1.x | src1.y), 0, 0, 0] // dest = [(src1.x | src1.y), 0, 0, 0]
// 0|0|0|0|W|Z|Y|X // 0|0|0|0|W|Z|Y|X
@ -5130,34 +5131,112 @@ EMITTER(PACK, MATCH(I<OPCODE_PACK, V128<>, V128<>>)) {
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackFLOAT16_2)); e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackFLOAT16_2));
} }
static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) { static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) {
assert_true(i.src2.value->IsConstantZero());
// dest = [(src1.x | src1.y), (src1.z | src1.w), 0, 0] // dest = [(src1.x | src1.y), (src1.z | src1.w), 0, 0]
// 0|0|0|0|W|Z|Y|X // 0|0|0|0|W|Z|Y|X
e.vcvtps2ph(e.xmm0, i.src1, B00000011); e.vcvtps2ph(i.dest, i.src1, B00000011);
// Shuffle to X|Y|Z|W|0|0|0|0 // Shuffle to X|Y|Z|W|0|0|0|0
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackFLOAT16_4)); e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackFLOAT16_4));
} }
static void EmitSHORT_2(X64Emitter& e, const EmitArgType& i) { static void EmitSHORT_2(X64Emitter& e, const EmitArgType& i) {
assert_true(i.src2.value->IsConstantZero());
// Saturate. // Saturate.
e.vmaxps(i.dest, i.src1, e.GetXmmConstPtr(XMMNegativeOne)); e.vmaxps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackSHORT_2Min));
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMOne)); e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_2Max));
// Multiply by SHRT_MAX.
e.vmulps(i.dest, i.dest, e.GetXmmConstPtr(XMMShortMaxPS));
// Convert to int32.
e.vcvtps2dq(i.dest, i.dest);
// Pack. // Pack.
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_2)); e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_2));
} }
static void EmitS8_IN_16_LO(X64Emitter& e, const EmitArgType& i) { static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
assert_always(); // TODO(benvanik): handle src2 (or src1) being constant zero
if (IsPackInUnsigned(flags)) {
if (IsPackOutUnsigned(flags)) {
if (IsPackOutSaturate(flags)) {
// unsigned -> unsigned + saturate
assert_always();
} else {
// unsigned -> unsigned
assert_always();
}
} else {
if (IsPackOutSaturate(flags)) {
// unsigned -> signed + saturate
assert_always();
} else {
// unsigned -> signed
assert_always();
}
}
} else {
if (IsPackOutUnsigned(flags)) {
if (IsPackOutSaturate(flags)) {
// signed -> unsigned + saturate
// PACKUSWB / SaturateSignedWordToUnsignedByte
e.vpackuswb(i.dest, i.src1, i.src2);
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMByteOrderMask));
} else {
// signed -> unsigned
assert_always();
}
} else {
if (IsPackOutSaturate(flags)) {
// signed -> signed + saturate
// PACKSSWB / SaturateSignedWordToSignedByte
e.vpacksswb(i.dest, i.src1, i.src2);
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMByteOrderMask));
} else {
// signed -> signed
assert_always();
}
}
}
} }
static void EmitS8_IN_16_HI(X64Emitter& e, const EmitArgType& i) { static void Emit16_IN_32(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
assert_always(); // TODO(benvanik): handle src2 (or src1) being constant zero
} if (IsPackInUnsigned(flags)) {
static void EmitS16_IN_32_LO(X64Emitter& e, const EmitArgType& i) { if (IsPackOutUnsigned(flags)) {
assert_always(); if (IsPackOutSaturate(flags)) {
} // unsigned -> unsigned + saturate
static void EmitS16_IN_32_HI(X64Emitter& e, const EmitArgType& i) { assert_always();
assert_always(); } else {
// unsigned -> unsigned
assert_always();
}
} else {
if (IsPackOutSaturate(flags)) {
// unsigned -> signed + saturate
assert_always();
} else {
// unsigned -> signed
assert_always();
}
}
} else {
if (IsPackOutUnsigned(flags)) {
if (IsPackOutSaturate(flags)) {
// signed -> unsigned + saturate
// PACKUSDW
// TMP[15:0] <- (DEST[31:0] < 0) ? 0 : DEST[15:0];
// DEST[15:0] <- (DEST[31:0] > FFFFH) ? FFFFH : TMP[15:0];
e.vpackusdw(i.dest, i.src1, i.src2);
e.vpshuflw(i.dest, i.dest, B10110001);
e.vpshufhw(i.dest, i.dest, B10110001);
} else {
// signed -> unsigned
assert_always();
}
} else {
if (IsPackOutSaturate(flags)) {
// signed -> signed + saturate
// PACKSSDW / SaturateSignedDwordToSignedWord
e.vpackssdw(i.dest, i.src1, i.src2);
e.vpshuflw(i.dest, i.dest, B10110001);
e.vpshufhw(i.dest, i.dest, B10110001);
} else {
// signed -> signed
assert_always();
}
}
}
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -5170,7 +5249,7 @@ EMITTER_OPCODE_TABLE(
// ============================================================================ // ============================================================================
EMITTER(UNPACK, MATCH(I<OPCODE_UNPACK, V128<>, V128<>>)) { EMITTER(UNPACK, MATCH(I<OPCODE_UNPACK, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
switch (i.instr->flags) { switch (i.instr->flags & PACK_TYPE_MODE) {
case PACK_TYPE_D3DCOLOR: case PACK_TYPE_D3DCOLOR:
EmitD3DCOLOR(e, i); EmitD3DCOLOR(e, i);
break; break;
@ -5183,17 +5262,11 @@ EMITTER(UNPACK, MATCH(I<OPCODE_UNPACK, V128<>, V128<>>)) {
case PACK_TYPE_SHORT_2: case PACK_TYPE_SHORT_2:
EmitSHORT_2(e, i); EmitSHORT_2(e, i);
break; break;
case PACK_TYPE_S8_IN_16_LO: case PACK_TYPE_8_IN_16:
EmitS8_IN_16_LO(e, i); Emit8_IN_16(e, i, i.instr->flags);
break; break;
case PACK_TYPE_S8_IN_16_HI: case PACK_TYPE_16_IN_32:
EmitS8_IN_16_HI(e, i); Emit16_IN_32(e, i, i.instr->flags);
break;
case PACK_TYPE_S16_IN_32_LO:
EmitS16_IN_32_LO(e, i);
break;
case PACK_TYPE_S16_IN_32_HI:
EmitS16_IN_32_HI(e, i);
break; break;
default: assert_unhandled_case(i.instr->flags); break; default: assert_unhandled_case(i.instr->flags); break;
} }
@ -5271,21 +5344,93 @@ EMITTER(UNPACK, MATCH(I<OPCODE_UNPACK, V128<>, V128<>>)) {
// Add 3,3,0,1. // Add 3,3,0,1.
e.vpor(i.dest, e.GetXmmConstPtr(XMM3301)); e.vpor(i.dest, e.GetXmmConstPtr(XMM3301));
} }
static void EmitS8_IN_16_LO(X64Emitter& e, const EmitArgType& i) { static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
e.vpunpckhbw(i.dest, i.src1, i.src1); assert_false(IsPackOutSaturate(flags));
e.vpsrad(i.dest, 8); if (IsPackToLo(flags)) {
// Unpack to LO.
if (IsPackInUnsigned(flags)) {
if (IsPackOutUnsigned(flags)) {
// unsigned -> unsigned
assert_always();
} else {
// unsigned -> signed
assert_always();
}
} else {
if (IsPackOutUnsigned(flags)) {
// signed -> unsigned
assert_always();
} else {
// signed -> signed
e.vpunpckhbw(i.dest, i.src1, i.src1);
e.vpsrad(i.dest, 8);
}
}
} else {
// Unpack to HI.
if (IsPackInUnsigned(flags)) {
if (IsPackOutUnsigned(flags)) {
// unsigned -> unsigned
assert_always();
} else {
// unsigned -> signed
assert_always();
}
} else {
if (IsPackOutUnsigned(flags)) {
// signed -> unsigned
assert_always();
} else {
// signed -> signed
e.vpunpcklbw(i.dest, i.src1, i.src1);
e.vpsrad(i.dest, 8);
}
}
}
} }
static void EmitS8_IN_16_HI(X64Emitter& e, const EmitArgType& i) { static void Emit16_IN_32(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
e.vpunpcklbw(i.dest, i.src1, i.src1); assert_false(IsPackOutSaturate(flags));
e.vpsrad(i.dest, 8); if (IsPackToLo(flags)) {
} // Unpack to LO.
static void EmitS16_IN_32_LO(X64Emitter& e, const EmitArgType& i) { if (IsPackInUnsigned(flags)) {
e.vpunpckhwd(i.dest, i.src1, i.src1); if (IsPackOutUnsigned(flags)) {
e.vpsrad(i.dest, 16); // unsigned -> unsigned
} assert_always();
static void EmitS16_IN_32_HI(X64Emitter& e, const EmitArgType& i) { } else {
e.vpunpcklwd(i.dest, i.src1, i.src1); // unsigned -> signed
e.vpsrad(i.dest, 16); assert_always();
}
} else {
if (IsPackOutUnsigned(flags)) {
// signed -> unsigned
assert_always();
} else {
// signed -> signed
e.vpunpckhwd(i.dest, i.src1, i.src1);
e.vpsrad(i.dest, 16);
}
}
} else {
// Unpack to HI.
if (IsPackInUnsigned(flags)) {
if (IsPackOutUnsigned(flags)) {
// unsigned -> unsigned
assert_always();
} else {
// unsigned -> signed
assert_always();
}
} else {
if (IsPackOutUnsigned(flags)) {
// signed -> unsigned
assert_always();
} else {
// signed -> signed
e.vpunpcklwd(i.dest, i.src1, i.src1);
e.vpsrad(i.dest, 16);
}
}
}
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(

View File

@ -1733,76 +1733,162 @@ XEEMITTER(vpkpx, 0x1000030E, VX)(PPCHIRBuilder& f, InstrData& i) {
return 1; return 1;
} }
int InstrEmit_vpkshss_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
// Vector Pack Signed Halfword Signed Saturate
// Convert VA and VB from signed words to signed saturated bytes then
// concat:
// for each i in VA + VB:
// i = int8_t(Clamp(EXTS(int16_t(t)), -128, 127))
// dest = VA | VB (lower 8bit values)
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
PACK_TYPE_8_IN_16 | PACK_TYPE_IN_SIGNED |
PACK_TYPE_OUT_SIGNED | PACK_TYPE_OUT_SATURATE);
f.StoreVR(vd, v);
return 0;
}
XEEMITTER(vpkshss, 0x1000018E, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkshss, 0x1000018E, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkshss_(f, i.VX.VD, i.VX.VA, i.VX.VB);
return 1;
} }
XEEMITTER(vpkshss128, VX128(5, 512), VX128)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkshss128, VX128(5, 512), VX128)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkshss_(f, VX128_VD128, VX128_VA128, VX128_VB128);
return 1;
} }
int InstrEmit_vpkswss_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
// Vector Pack Signed Word Signed Saturate
// Convert VA and VB from signed int words to signed saturated shorts then
// concat:
// for each i in VA + VB:
// i = int16_t(Clamp(EXTS(int32_t(t)), -2^15, 2^15-1))
// dest = VA | VB (lower 16bit values)
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
PACK_TYPE_16_IN_32 | PACK_TYPE_IN_SIGNED |
PACK_TYPE_OUT_SIGNED | PACK_TYPE_OUT_SATURATE);
f.StoreVR(vd, v);
return 0;
}
XEEMITTER(vpkswss, 0x100001CE, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkswss, 0x100001CE, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkswss_(f, i.VX.VD, i.VX.VA, i.VX.VB);
return 1;
} }
XEEMITTER(vpkswss128, VX128(5, 640), VX128)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkswss128, VX128(5, 640), VX128)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkswss_(f, VX128_VD128, VX128_VA128, VX128_VB128);
return 1;
} }
int InstrEmit_vpkswus_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
// Vector Pack Signed Word Unsigned Saturate
// Convert VA and VB from signed int words to unsigned saturated shorts then
// concat:
// for each i in VA + VB:
// i = uint16_t(Clamp(EXTS(int32_t(t)), 0, 2^16-1))
// dest = VA | VB (lower 16bit values)
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
PACK_TYPE_16_IN_32 | PACK_TYPE_IN_SIGNED |
PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_SATURATE);
f.StoreVR(vd, v);
return 0;
}
XEEMITTER(vpkswus, 0x1000014E, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkswus, 0x1000014E, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkswus_(f, i.VX.VD, i.VX.VA, i.VX.VB);
return 1;
} }
XEEMITTER(vpkswus128, VX128(5, 704), VX128)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkswus128, VX128(5, 704), VX128)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkswus_(f, VX128_VD128, VX128_VA128, VX128_VB128);
return 1;
} }
int InstrEmit_vpkuhum_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
// Vector Pack Unsigned Halfword Unsigned Modulo
// Convert VA and VB from unsigned shorts to unsigned bytes then concat:
// for each i in VA + VB:
// i = uint8_t(uint16_t(i))
// dest = VA | VB (lower 8bit values)
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
PACK_TYPE_8_IN_16 | PACK_TYPE_IN_UNSIGNED |
PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_UNSATURATE);
f.StoreVR(vd, v);
return 0;
}
XEEMITTER(vpkuhum, 0x1000000E, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkuhum, 0x1000000E, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkuhum_(f, i.VX.VD, i.VX.VA, i.VX.VB);
return 1;
} }
XEEMITTER(vpkuhum128, VX128(5, 768), VX128)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkuhum128, VX128(5, 768), VX128)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkuhum_(f, VX128_VD128, VX128_VA128, VX128_VB128);
return 1;
} }
int InstrEmit_vpkuhus_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
// Vector Pack Unsigned Halfword Unsigned Saturate
// Convert VA and VB from unsigned shorts to unsigned saturated bytes then
// concat:
// for each i in VA + VB:
// i = uint8_t(Clamp(EXTZ(uint16_t(i)), 0, 255))
// dest = VA | VB (lower 8bit values)
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
PACK_TYPE_8_IN_16 | PACK_TYPE_IN_UNSIGNED |
PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_SATURATE);
f.StoreVR(vd, v);
return 0;
}
XEEMITTER(vpkuhus, 0x1000008E, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkuhus, 0x1000008E, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkuhus_(f, i.VX.VD, i.VX.VA, i.VX.VB);
return 1;
} }
XEEMITTER(vpkuhus128, VX128(5, 832), VX128)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkuhus128, VX128(5, 832), VX128)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkuhus_(f, VX128_VD128, VX128_VA128, VX128_VB128);
return 1;
} }
int InstrEmit_vpkshus_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
// Vector Pack Signed Halfword Unsigned Saturate
// Convert VA and VB from signed shorts to unsigned saturated bytes then
// concat:
// for each i in VA + VB:
// i = uint8_t(Clamp(EXTS(int16_t(i)), 0, 255))
// dest = VA | VB (lower 8bit values)
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
PACK_TYPE_8_IN_16 | PACK_TYPE_IN_SIGNED |
PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_SATURATE);
f.StoreVR(vd, v);
return 0;
}
XEEMITTER(vpkshus, 0x1000010E, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkshus, 0x1000010E, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkshus_(f, i.VX.VD, i.VX.VA, i.VX.VB);
return 1;
} }
XEEMITTER(vpkshus128, VX128(5, 576), VX128)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkshus128, VX128(5, 576), VX128)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkshus_(f, VX128_VD128, VX128_VA128, VX128_VB128);
return 1;
} }
int InstrEmit_vpkuwum_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
// Vector Pack Unsigned Word Unsigned Modulo
// Concat low shorts from VA + VB:
// for each i in VA + VB:
// i = uint16_t(uint32_t(i))
// dest = VA | VB (lower 16bit values)
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
PACK_TYPE_16_IN_32 | PACK_TYPE_IN_UNSIGNED |
PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_UNSATURATE);
f.StoreVR(vd, v);
return 0;
}
XEEMITTER(vpkuwum, 0x1000004E, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkuwum, 0x1000004E, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkuwum_(f, i.VX.VD, i.VX.VA, i.VX.VB);
return 1;
} }
XEEMITTER(vpkuwum128, VX128(5, 896), VX128)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkuwum128, VX128(5, 896), VX128)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkuwum_(f, VX128_VD128, VX128_VA128, VX128_VB128);
return 1;
} }
int InstrEmit_vpkuwus_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
// Vector Pack Unsigned Word Unsigned Saturate
// Convert VA and VB from unsigned int words to unsigned saturated shorts then
// concat:
// for each i in VA + VB:
// i = uint16_t(Clamp(EXTZ(uint32_t(t)), 0, 2^16-1))
// dest = VA | VB (lower 16bit values)
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
PACK_TYPE_16_IN_32 | PACK_TYPE_IN_UNSIGNED |
PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_SATURATE);
f.StoreVR(vd, v);
return 0;
}
XEEMITTER(vpkuwus, 0x100000CE, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkuwus, 0x100000CE, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkuwus_(f, i.VX.VD, i.VX.VA, i.VX.VB);
return 1;
} }
XEEMITTER(vpkuwus128, VX128(5, 960), VX128)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vpkuwus128, VX128(5, 960), VX128)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); return InstrEmit_vpkuwus_(f, VX128_VD128, VX128_VA128, VX128_VB128);
return 1;
} }
XEEMITTER(vupkhpx, 0x1000034E, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vupkhpx, 0x1000034E, VX)(PPCHIRBuilder& f, InstrData& i) {
@ -1816,8 +1902,11 @@ XEEMITTER(vupklpx, 0x100003CE, VX)(PPCHIRBuilder& f, InstrData& i) {
} }
int InstrEmit_vupkhsh_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { int InstrEmit_vupkhsh_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) {
// Vector Unpack High Signed Halfword
// halfwords 0-3 expanded to words 0-3 and sign extended // halfwords 0-3 expanded to words 0-3 and sign extended
Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S16_IN_32_HI); Value* v =
f.Unpack(f.LoadVR(vb), PACK_TYPE_TO_HI | PACK_TYPE_16_IN_32 |
PACK_TYPE_IN_SIGNED | PACK_TYPE_OUT_SIGNED);
f.StoreVR(vd, v); f.StoreVR(vd, v);
return 0; return 0;
} }
@ -1831,8 +1920,11 @@ XEEMITTER(vupkhsh128, 0x100002CE, VX)(PPCHIRBuilder& f, InstrData& i) {
} }
int InstrEmit_vupklsh_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { int InstrEmit_vupklsh_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) {
// Vector Unpack Low Signed Halfword
// halfwords 4-7 expanded to words 0-3 and sign extended // halfwords 4-7 expanded to words 0-3 and sign extended
Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S16_IN_32_LO); Value* v =
f.Unpack(f.LoadVR(vb), PACK_TYPE_TO_LO | PACK_TYPE_16_IN_32 |
PACK_TYPE_IN_SIGNED | PACK_TYPE_OUT_SIGNED);
f.StoreVR(vd, v); f.StoreVR(vd, v);
return 0; return 0;
} }
@ -1846,8 +1938,11 @@ XEEMITTER(vupklsh128, 0x100002CE, VX)(PPCHIRBuilder& f, InstrData& i) {
} }
int InstrEmit_vupkhsb_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { int InstrEmit_vupkhsb_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) {
// Vector Unpack High Signed Byte
// bytes 0-7 expanded to halfwords 0-7 and sign extended // bytes 0-7 expanded to halfwords 0-7 and sign extended
Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S8_IN_16_HI); Value* v =
f.Unpack(f.LoadVR(vb), PACK_TYPE_TO_HI | PACK_TYPE_8_IN_16 |
PACK_TYPE_IN_SIGNED | PACK_TYPE_OUT_SIGNED);
f.StoreVR(vd, v); f.StoreVR(vd, v);
return 0; return 0;
} }
@ -1864,8 +1959,10 @@ XEEMITTER(vupkhsb128, VX128(6, 896), VX128)(PPCHIRBuilder& f, InstrData& i) {
} }
int InstrEmit_vupklsb_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { int InstrEmit_vupklsb_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) {
// Vector Unpack Low Signed Byte
// bytes 8-15 expanded to halfwords 0-7 and sign extended // bytes 8-15 expanded to halfwords 0-7 and sign extended
Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S8_IN_16_LO); Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_TO_LO | PACK_TYPE_8_IN_16 |
PACK_TYPE_IN_SIGNED | PACK_TYPE_OUT_SIGNED);
f.StoreVR(vd, v); f.StoreVR(vd, v);
return 0; return 0;
} }
@ -1886,8 +1983,8 @@ XEEMITTER(vpkd3d128, VX128_4(6, 1552), VX128_4)(PPCHIRBuilder& f,
const uint32_t vd = i.VX128_4.VD128l | (i.VX128_4.VD128h << 5); const uint32_t vd = i.VX128_4.VD128l | (i.VX128_4.VD128h << 5);
const uint32_t vb = i.VX128_4.VB128l | (i.VX128_4.VB128h << 5); const uint32_t vb = i.VX128_4.VB128l | (i.VX128_4.VB128h << 5);
uint32_t type = i.VX128_4.IMM >> 2; uint32_t type = i.VX128_4.IMM >> 2;
uint32_t shift = i.VX128_4.IMM & 0x3; uint32_t pack = i.VX128_4.IMM & 0x3;
uint32_t pack = i.VX128_4.z; uint32_t shift = i.VX128_4.z;
Value* v = f.LoadVR(vb); Value* v = f.LoadVR(vb);
switch (type) { switch (type) {
case 0: // VPACK_D3DCOLOR case 0: // VPACK_D3DCOLOR
@ -1909,33 +2006,64 @@ XEEMITTER(vpkd3d128, VX128_4(6, 1552), VX128_4)(PPCHIRBuilder& f,
// http://hlssmod.net/he_code/public/pixelwriter.h // http://hlssmod.net/he_code/public/pixelwriter.h
// control = prev:0123 | new:4567 // control = prev:0123 | new:4567
uint32_t control = PERMUTE_IDENTITY; // original uint32_t control = PERMUTE_IDENTITY; // original
uint32_t src = xerotl(0x07060504, shift * 8);
uint32_t mask = 0;
switch (pack) { switch (pack) {
case 1: // VPACK_32 case 1: // VPACK_32
// VPACK_32 & shift = 3 puts lower 32 bits in x (leftmost slot). // VPACK_32 & shift = 3 puts lower 32 bits in x (leftmost slot).
mask = 0x000000FF << (shift * 8); switch (shift) {
control = (control & ~mask) | (src & mask); case 0:
control = PERMUTE_MASK(0, 0, 0, 1, 0, 2, 1, 3);
break;
case 1:
control = PERMUTE_MASK(0, 0, 0, 1, 1, 3, 0, 3);
break;
case 2:
control = PERMUTE_MASK(0, 0, 1, 3, 0, 2, 0, 3);
break;
case 3:
control = PERMUTE_MASK(1, 3, 0, 1, 0, 2, 0, 3);
break;
default:
assert_unhandled_case(shift);
return 1;
}
break; break;
case 2: // 64bit case 2: // 64bit
if (shift < 3) { switch (shift) {
mask = 0x0000FFFF << (shift * 8); case 0:
} else { control = PERMUTE_MASK(0, 0, 0, 1, 1, 2, 1, 3);
// w break;
src = 0x07000000; case 1:
mask = 0xFF000000; control = PERMUTE_MASK(0, 0, 1, 2, 1, 3, 0, 3);
break;
case 2:
control = PERMUTE_MASK(1, 2, 1, 3, 0, 2, 0, 3);
break;
case 3:
control = PERMUTE_MASK(1, 3, 0, 1, 0, 2, 0, 3);
break;
default:
assert_unhandled_case(shift);
return 1;
} }
control = (control & ~mask) | (src & mask);
break; break;
case 3: // 64bit case 3: // 64bit
if (shift < 3) { switch (shift) {
mask = 0x0000FFFF << (shift * 8); case 0:
} else { control = PERMUTE_MASK(0, 0, 0, 1, 1, 2, 1, 3);
// z break;
src = 0x00000004; case 1:
mask = 0x000000FF; control = PERMUTE_MASK(0, 0, 1, 2, 1, 3, 0, 3);
break;
case 2:
control = PERMUTE_MASK(1, 2, 1, 3, 0, 2, 0, 3);
break;
case 3:
control = PERMUTE_MASK(0, 0, 0, 1, 0, 2, 1, 2);
break;
default:
assert_unhandled_case(shift);
return 1;
} }
control = (control & ~mask) | (src & mask);
break; break;
default: default:
assert_unhandled_case(pack); assert_unhandled_case(pack);

View File

@ -59,3 +59,39 @@ Disassembly of section .text:
0000000000100068 <test_vpkd3d128_d3dcolor_3_3>: 0000000000100068 <test_vpkd3d128_d3dcolor_3_3>:
100068: 18 83 1e d0 vpkd3d128 v4,v3,0,2,2 100068: 18 83 1e d0 vpkd3d128 v4,v3,0,2,2
10006c: 4e 80 00 20 blr 10006c: 4e 80 00 20 blr
0000000000100070 <test_vpkd3d128_short2_invalid_0>:
100070: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0
100074: 4e 80 00 20 blr
0000000000100078 <test_vpkd3d128_short2_invalid_1>:
100078: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0
10007c: 4e 80 00 20 blr
0000000000100080 <test_vpkd3d128_short2_0>:
100080: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0
100084: 4e 80 00 20 blr
0000000000100088 <test_vpkd3d128_short2_1>:
100088: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0
10008c: 4e 80 00 20 blr
0000000000100090 <test_vpkd3d128_short2_2>:
100090: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0
100094: 4e 80 00 20 blr
0000000000100098 <test_vpkd3d128_float16_2_invalid_0>:
100098: 18 8d 1e 10 vpkd3d128 v4,v3,3,0,0
10009c: 4e 80 00 20 blr
00000000001000a0 <test_vpkd3d128_float16_2_0>:
1000a0: 18 8d 1e 10 vpkd3d128 v4,v3,3,0,0
1000a4: 4e 80 00 20 blr
00000000001000a8 <test_vpkd3d128_float16_4_invalid_0>:
1000a8: 18 96 1e 10 vpkd3d128 v4,v3,1,2,0
1000ac: 4e 80 00 20 blr
00000000001000b0 <test_vpkd3d128_float16_4_0>:
1000b0: 18 96 1e 10 vpkd3d128 v4,v3,1,2,0
1000b4: 4e 80 00 20 blr

View File

@ -12,3 +12,12 @@
0000000000000058 t test_vpkd3d128_d3dcolor_3_1 0000000000000058 t test_vpkd3d128_d3dcolor_3_1
0000000000000060 t test_vpkd3d128_d3dcolor_3_2 0000000000000060 t test_vpkd3d128_d3dcolor_3_2
0000000000000068 t test_vpkd3d128_d3dcolor_3_3 0000000000000068 t test_vpkd3d128_d3dcolor_3_3
0000000000000070 t test_vpkd3d128_short2_invalid_0
0000000000000078 t test_vpkd3d128_short2_invalid_1
0000000000000080 t test_vpkd3d128_short2_0
0000000000000088 t test_vpkd3d128_short2_1
0000000000000090 t test_vpkd3d128_short2_2
0000000000000098 t test_vpkd3d128_float16_2_invalid_0
00000000000000a0 t test_vpkd3d128_float16_2_0
00000000000000a8 t test_vpkd3d128_float16_4_invalid_0
00000000000000b0 t test_vpkd3d128_float16_4_0

Binary file not shown.

View File

@ -0,0 +1,13 @@
/vagrant/src/alloy/frontend/ppc/test/bin//instr_vpkshss.o: file format elf64-powerpc
Disassembly of section .text:
0000000000100000 <test_vpkshss_0>:
100000: 10 a3 21 8e vpkshss v5,v3,v4
100004: 4e 80 00 20 blr
0000000000100008 <test_vpkshss_1>:
100008: 10 a3 21 8e vpkshss v5,v3,v4
10000c: 4e 80 00 20 blr

View File

@ -0,0 +1,2 @@
0000000000000000 t test_vpkshss_0
0000000000000008 t test_vpkshss_1

Binary file not shown.

View File

@ -0,0 +1,13 @@
/vagrant/src/alloy/frontend/ppc/test/bin//instr_vpkswss.o: file format elf64-powerpc
Disassembly of section .text:
0000000000100000 <test_vpkswss_0>:
100000: 10 a3 21 ce vpkswss v5,v3,v4
100004: 4e 80 00 20 blr
0000000000100008 <test_vpkswss_1>:
100008: 10 a3 21 ce vpkswss v5,v3,v4
10000c: 4e 80 00 20 blr

View File

@ -0,0 +1,2 @@
0000000000000000 t test_vpkswss_0
0000000000000008 t test_vpkswss_1

View File

@ -133,3 +133,82 @@ test_vpkd3d128_d3dcolor_3_3:
blr blr
#_ REGISTER_OUT v3 [40400001, 40400002, 40400003, 40400004] #_ REGISTER_OUT v3 [40400001, 40400002, 40400003, 40400004]
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 00000000] #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 00000000]
test_vpkd3d128_short2_invalid_0:
#_ REGISTER_IN v3 [43817E00, C37CFC00, 42A23EC8, 403DB757]
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
# vpkd3d128 v4, v3, 1, 1, 0
.long 0x18851E10
blr
#_ REGISTER_OUT v3 [43817E00, C37CFC00, 42A23EC8, 403DB757]
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFF8001]
test_vpkd3d128_short2_invalid_1:
#_ REGISTER_IN v3 [412FDF00, C09FBE00, 42A23EC8, 403DB757]
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
# vpkd3d128 v4, v3, 1, 1, 0
.long 0x18851E10
blr
#_ REGISTER_OUT v3 [412FDF00, C09FBE00, 42A23EC8, 403DB757]
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFF8001]
test_vpkd3d128_short2_0:
#_ REGISTER_IN v3 [40407FFF, 403F8001, 00000000, 00000000]
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
# vpkd3d128 v4, v3, 1, 1, 0
.long 0x18851E10
blr
#_ REGISTER_OUT v3 [40407FFF, 403F8001, 00000000, 00000000]
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFF8001]
test_vpkd3d128_short2_1:
#_ REGISTER_IN v3 [40404000, 403FC000, 40400003, 403F8001]
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
# vpkd3d128 v4, v3, 1, 1, 0
.long 0x18851E10
blr
#_ REGISTER_OUT v3 [40404000, 403FC000, 40400003, 403F8001]
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 4000C000]
test_vpkd3d128_short2_2:
#_ REGISTER_IN v3 [4040FFFE, 403FF333, 42A23EC8, 403DB757]
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
# vpkd3d128 v4, v3, 1, 1, 0
.long 0x18851E10
blr
#_ REGISTER_OUT v3 [4040FFFE, 403FF333, 42A23EC8, 403DB757]
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFFF333]
test_vpkd3d128_float16_2_invalid_0:
#_ REGISTER_IN v3 [3FC00000, BFC00000, 42A23EC8, 403DB757]
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
# vpkd3d128 v4, v3, 3, 1, 0
.long 0x188D1E10
blr
#_ REGISTER_OUT v3 [3FC00000, BFC00000, 42A23EC8, 403DB757]
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3E00BE00]
test_vpkd3d128_float16_2_0:
#_ REGISTER_IN v3 [3F000000, BF000000, 00000000, 00000000]
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
# vpkd3d128 v4, v3, 3, 1, 0
.long 0x188D1E10
blr
#_ REGISTER_OUT v3 [3F000000, BF000000, 00000000, 00000000]
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3800B800]
test_vpkd3d128_float16_4_invalid_0:
#_ REGISTER_IN v3 [3FC00000, BFC00000, 3FC00000, BFC00000]
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
# vpkd3d128 v4, v3, 5, 2, 0
.long 0x18961E10
blr
#_ REGISTER_OUT v3 [3FC00000, BFC00000, 3FC00000, BFC00000]
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3E00BE00, 3E00BE00]
test_vpkd3d128_float16_4_0:
#_ REGISTER_IN v3 [3F000000, BF000000, 3F000000, BF000000]
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
# vpkd3d128 v4, v3, 5, 2, 0
.long 0x18961E10
blr
#_ REGISTER_OUT v3 [3F000000, BF000000, 3F000000, BF000000]
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3800B800, 3800B800]

View File

@ -0,0 +1,17 @@
test_vpkshss_0:
#_ REGISTER_IN v3 [00000001, 00020003, 00040005, 00060007]
#_ REGISTER_IN v4 [00080009, 000A000B, 000C000D, 000E000F]
vpkshss v5, v3, v4
blr
#_ REGISTER_OUT v3 [00000001, 00020003, 00040005, 00060007]
#_ REGISTER_OUT v4 [00080009, 000A000B, 000C000D, 000E000F]
#_ REGISTER_OUT v5 [00010203, 04050607, 08090A0B, 0C0D0E0F]
test_vpkshss_1:
#_ REGISTER_IN v3 [7FFF8000, 00020003, 00040005, 00060007]
#_ REGISTER_IN v4 [7FFF8000, 000A000B, 000C000D, 000E000F]
vpkshss v5, v3, v4
blr
#_ REGISTER_OUT v3 [7FFF8000, 00020003, 00040005, 00060007]
#_ REGISTER_OUT v4 [7FFF8000, 000A000B, 000C000D, 000E000F]
#_ REGISTER_OUT v5 [7F800203, 04050607, 7F800A0B, 0C0D0E0F]

View File

@ -0,0 +1,17 @@
test_vpkswss_0:
#_ REGISTER_IN v3 [00000001, 00000002, 00000003, 00000004]
#_ REGISTER_IN v4 [00000005, 00000006, 00000007, 00000008]
vpkswss v5, v3, v4
blr
#_ REGISTER_OUT v3 [00000001, 00000002, 00000003, 00000004]
#_ REGISTER_OUT v4 [00000005, 00000006, 00000007, 00000008]
#_ REGISTER_OUT v5 [00010002, 00030004, 00050006, 00070008]
test_vpkswss_1:
#_ REGISTER_IN v3 [7FFFFFFF, 80000000, 00000000, 00000004]
#_ REGISTER_IN v4 [7FFFFFFF, 80000000, 00000000, 00000008]
vpkswss v5, v3, v4
blr
#_ REGISTER_OUT v3 [7FFFFFFF, 80000000, 00000000, 00000004]
#_ REGISTER_OUT v4 [7FFFFFFF, 80000000, 00000000, 00000008]
#_ REGISTER_OUT v5 [7FFF8000, 00000004, 7FFF8000, 00000008]

View File

@ -1880,10 +1880,24 @@ Value* HIRBuilder::Swizzle(Value* value, TypeName part_type,
} }
Value* HIRBuilder::Pack(Value* value, uint32_t pack_flags) { Value* HIRBuilder::Pack(Value* value, uint32_t pack_flags) {
ASSERT_VECTOR_TYPE(value); return Pack(value, LoadZero(VEC128_TYPE), pack_flags);
}
Value* HIRBuilder::Pack(Value* value1, Value* value2, uint32_t pack_flags) {
ASSERT_VECTOR_TYPE(value1);
ASSERT_VECTOR_TYPE(value2);
switch (pack_flags & PACK_TYPE_MODE) {
case PACK_TYPE_D3DCOLOR:
case PACK_TYPE_FLOAT16_2:
case PACK_TYPE_FLOAT16_4:
case PACK_TYPE_SHORT_2:
assert_true(value2->IsConstantZero());
break;
}
Instr* i = AppendInstr(OPCODE_PACK_info, pack_flags, AllocValue(VEC128_TYPE)); Instr* i = AppendInstr(OPCODE_PACK_info, pack_flags, AllocValue(VEC128_TYPE));
i->set_src1(value); i->set_src1(value1);
i->src2.value = i->src3.value = NULL; i->set_src2(value2);
i->src3.value = NULL;
return i->dest; return i->dest;
} }

View File

@ -218,6 +218,7 @@ class HIRBuilder {
Value* Swizzle(Value* value, TypeName part_type, uint32_t swizzle_mask); Value* Swizzle(Value* value, TypeName part_type, uint32_t swizzle_mask);
// SelectBits(cond, value1, value2) // SelectBits(cond, value1, value2)
Value* Pack(Value* value, uint32_t pack_flags = 0); Value* Pack(Value* value, uint32_t pack_flags = 0);
Value* Pack(Value* value1, Value* value2, uint32_t pack_flags = 0);
Value* Unpack(Value* value, uint32_t pack_flags = 0); Value* Unpack(Value* value, uint32_t pack_flags = 0);
Value* CompareExchange(Value* address, Value* compare_value, Value* CompareExchange(Value* address, Value* compare_value,

View File

@ -65,16 +65,46 @@ enum Swizzles {
SWIZZLE_XYZW_TO_ZWXY = SWIZZLE_MASK(2, 3, 0, 1), SWIZZLE_XYZW_TO_ZWXY = SWIZZLE_MASK(2, 3, 0, 1),
SWIZZLE_XYZW_TO_WXYZ = SWIZZLE_MASK(3, 0, 1, 2), SWIZZLE_XYZW_TO_WXYZ = SWIZZLE_MASK(3, 0, 1, 2),
}; };
enum PackType { enum PackType : uint16_t {
// Special types:
PACK_TYPE_D3DCOLOR = 0, PACK_TYPE_D3DCOLOR = 0,
PACK_TYPE_FLOAT16_2 = 1, PACK_TYPE_FLOAT16_2 = 1,
PACK_TYPE_FLOAT16_4 = 2, PACK_TYPE_FLOAT16_4 = 2,
PACK_TYPE_SHORT_2 = 3, PACK_TYPE_SHORT_2 = 3,
PACK_TYPE_S8_IN_16_LO = 4,
PACK_TYPE_S8_IN_16_HI = 5, // Types which use the bitmasks below for configuration:
PACK_TYPE_S16_IN_32_LO = 6, PACK_TYPE_8_IN_16 = 4,
PACK_TYPE_S16_IN_32_HI = 7, PACK_TYPE_16_IN_32 = 5,
PACK_TYPE_MODE = 0x000F, // just to get the mode
// Unpack to low or high parts.
PACK_TYPE_TO_LO = 0 << 12,
PACK_TYPE_TO_HI = 1 << 12,
// Input/output arithmetic flags:
PACK_TYPE_IN_SIGNED = 0 << 13,
PACK_TYPE_IN_UNSIGNED = 1 << 13,
PACK_TYPE_OUT_SIGNED = 0 << 14,
PACK_TYPE_OUT_UNSIGNED = 1 << 14,
PACK_TYPE_OUT_UNSATURATE = 0 << 15,
PACK_TYPE_OUT_SATURATE = 1 << 15,
}; };
inline bool IsPackToHi(uint32_t flags) {
return (flags & PACK_TYPE_TO_HI) == PACK_TYPE_TO_HI;
}
inline bool IsPackToLo(uint32_t flags) {
return !IsPackToHi(flags);
}
inline bool IsPackInUnsigned(uint32_t flags) {
return (flags & PACK_TYPE_IN_UNSIGNED) == PACK_TYPE_IN_UNSIGNED;
}
inline bool IsPackOutUnsigned(uint32_t flags) {
return (flags & PACK_TYPE_OUT_UNSIGNED) == PACK_TYPE_OUT_UNSIGNED;
}
inline bool IsPackOutSaturate(uint32_t flags) {
return (flags & PACK_TYPE_OUT_SATURATE) == PACK_TYPE_OUT_SATURATE;
}
enum Opcode { enum Opcode {
OPCODE_COMMENT, OPCODE_COMMENT,

View File

@ -596,7 +596,7 @@ DEFINE_OPCODE(
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_PACK, OPCODE_PACK,
"pack", "pack",
OPCODE_SIG_V_V, OPCODE_SIG_V_V_V,
0) 0)
DEFINE_OPCODE( DEFINE_OPCODE(

View File

@ -27,7 +27,7 @@ TEST_CASE("PACK_D3DCOLOR", "[instr]") {
}); });
test.Run([](PPCContext* ctx) { test.Run([](PPCContext* ctx) {
ctx->v[4] = ctx->v[4] =
vec128i(0x3F800050, 0x3F800060, 0x3F800070, 0x3F800080); vec128i(0x40400050, 0x40400060, 0x40400070, 0x40400080);
}, },
[](PPCContext* ctx) { [](PPCContext* ctx) {
auto result = ctx->v[3]; auto result = ctx->v[3];