Fixing totally broken vpkd3d128 and adding new pack instructions.
This commit is contained in:
parent
32f42cd5ae
commit
96c203699d
|
@ -801,6 +801,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
|||
1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f),
|
||||
/* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f),
|
||||
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
|
||||
/* XMM3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f),
|
||||
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u,
|
||||
0x80000000u, 0x80000000u),
|
||||
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u,
|
||||
|
@ -811,7 +812,10 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
|||
0xFFFFFFFFu, 0x7FFFFFFFu),
|
||||
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u,
|
||||
0x08090A0Bu, 0x0C0D0E0Fu),
|
||||
/* XMMByteOrderMask */ vec128i(0x01000302u, 0x05040706u,
|
||||
0x09080B0Au, 0x0D0C0F0Eu),
|
||||
/* XMMPermuteControl15 */ vec128b(15),
|
||||
/* XMMPackD3DCOLORSat */ vec128i(0x404000FFu),
|
||||
/* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu,
|
||||
0xFFFFFFFFu, 0x0C000408u),
|
||||
/* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF0Eu, 0xFFFFFF0Du,
|
||||
|
@ -824,6 +828,8 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
|||
0x05040706u, 0x01000302u),
|
||||
/* XMMUnpackFLOAT16_4 */ vec128i(0x09080B0Au, 0x0D0C0F0Eu,
|
||||
0xFFFFFFFFu, 0xFFFFFFFFu),
|
||||
/* XMMPackSHORT_2Min */ vec128i(0x403F8001u),
|
||||
/* XMMPackSHORT_2Max */ vec128i(0x40407FFFu),
|
||||
/* XMMPackSHORT_2 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu,
|
||||
0xFFFFFFFFu, 0x01000504u),
|
||||
/* XMMUnpackSHORT_2 */ vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu,
|
||||
|
|
|
@ -49,18 +49,23 @@ enum XmmConst {
|
|||
XMMNormalizeX16Y16,
|
||||
XMM0001,
|
||||
XMM3301,
|
||||
XMM3333,
|
||||
XMMSignMaskPS,
|
||||
XMMSignMaskPD,
|
||||
XMMAbsMaskPS,
|
||||
XMMAbsMaskPD,
|
||||
XMMByteSwapMask,
|
||||
XMMByteOrderMask,
|
||||
XMMPermuteControl15,
|
||||
XMMPackD3DCOLORSat,
|
||||
XMMPackD3DCOLOR,
|
||||
XMMUnpackD3DCOLOR,
|
||||
XMMPackFLOAT16_2,
|
||||
XMMUnpackFLOAT16_2,
|
||||
XMMPackFLOAT16_4,
|
||||
XMMUnpackFLOAT16_4,
|
||||
XMMPackSHORT_2Min,
|
||||
XMMPackSHORT_2Max,
|
||||
XMMPackSHORT_2,
|
||||
XMMUnpackSHORT_2,
|
||||
XMMOneOver255,
|
||||
|
|
|
@ -5080,9 +5080,9 @@ EMITTER_OPCODE_TABLE(
|
|||
// ============================================================================
|
||||
// OPCODE_PACK
|
||||
// ============================================================================
|
||||
EMITTER(PACK, MATCH(I<OPCODE_PACK, V128<>, V128<>>)) {
|
||||
EMITTER(PACK, MATCH(I<OPCODE_PACK, V128<>, V128<>, V128<>>)) {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
switch (i.instr->flags) {
|
||||
switch (i.instr->flags & PACK_TYPE_MODE) {
|
||||
case PACK_TYPE_D3DCOLOR:
|
||||
EmitD3DCOLOR(e, i);
|
||||
break;
|
||||
|
@ -5095,33 +5095,34 @@ EMITTER(PACK, MATCH(I<OPCODE_PACK, V128<>, V128<>>)) {
|
|||
case PACK_TYPE_SHORT_2:
|
||||
EmitSHORT_2(e, i);
|
||||
break;
|
||||
case PACK_TYPE_S8_IN_16_LO:
|
||||
EmitS8_IN_16_LO(e, i);
|
||||
case PACK_TYPE_8_IN_16:
|
||||
Emit8_IN_16(e, i, i.instr->flags);
|
||||
break;
|
||||
case PACK_TYPE_S8_IN_16_HI:
|
||||
EmitS8_IN_16_HI(e, i);
|
||||
break;
|
||||
case PACK_TYPE_S16_IN_32_LO:
|
||||
EmitS16_IN_32_LO(e, i);
|
||||
break;
|
||||
case PACK_TYPE_S16_IN_32_HI:
|
||||
EmitS16_IN_32_HI(e, i);
|
||||
case PACK_TYPE_16_IN_32:
|
||||
Emit16_IN_32(e, i, i.instr->flags);
|
||||
break;
|
||||
default: assert_unhandled_case(i.instr->flags); break;
|
||||
}
|
||||
}
|
||||
static void EmitD3DCOLOR(X64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src2.value->IsConstantZero());
|
||||
// Saturate to [3,3....] so that only values between 3...[00] and 3...[FF]
|
||||
// are valid.
|
||||
if (i.src1.is_constant) {
|
||||
e.LoadConstantXmm(i.dest, i.src1.constant());
|
||||
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLORSat));
|
||||
} else {
|
||||
e.vminps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLORSat));
|
||||
}
|
||||
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMM3333));
|
||||
// Extract bytes.
|
||||
// RGBA (XYZW) -> ARGB (WXYZ)
|
||||
// w = ((src1.uw & 0xFF) << 24) | ((src1.ux & 0xFF) << 16) |
|
||||
// ((src1.uy & 0xFF) << 8) | (src1.uz & 0xFF)
|
||||
if (i.src1.is_constant) {
|
||||
e.LoadConstantXmm(i.dest, i.src1.constant());
|
||||
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR));
|
||||
} else {
|
||||
e.vpshufb(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLOR));
|
||||
}
|
||||
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR));
|
||||
}
|
||||
static void EmitFLOAT16_2(X64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src2.value->IsConstantZero());
|
||||
// http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx
|
||||
// dest = [(src1.x | src1.y), 0, 0, 0]
|
||||
// 0|0|0|0|W|Z|Y|X
|
||||
|
@ -5130,34 +5131,112 @@ EMITTER(PACK, MATCH(I<OPCODE_PACK, V128<>, V128<>>)) {
|
|||
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackFLOAT16_2));
|
||||
}
|
||||
static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src2.value->IsConstantZero());
|
||||
// dest = [(src1.x | src1.y), (src1.z | src1.w), 0, 0]
|
||||
// 0|0|0|0|W|Z|Y|X
|
||||
e.vcvtps2ph(e.xmm0, i.src1, B00000011);
|
||||
e.vcvtps2ph(i.dest, i.src1, B00000011);
|
||||
// Shuffle to X|Y|Z|W|0|0|0|0
|
||||
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackFLOAT16_4));
|
||||
}
|
||||
static void EmitSHORT_2(X64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src2.value->IsConstantZero());
|
||||
// Saturate.
|
||||
e.vmaxps(i.dest, i.src1, e.GetXmmConstPtr(XMMNegativeOne));
|
||||
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMOne));
|
||||
// Multiply by SHRT_MAX.
|
||||
e.vmulps(i.dest, i.dest, e.GetXmmConstPtr(XMMShortMaxPS));
|
||||
// Convert to int32.
|
||||
e.vcvtps2dq(i.dest, i.dest);
|
||||
e.vmaxps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackSHORT_2Min));
|
||||
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_2Max));
|
||||
// Pack.
|
||||
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_2));
|
||||
}
|
||||
static void EmitS8_IN_16_LO(X64Emitter& e, const EmitArgType& i) {
|
||||
assert_always();
|
||||
static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
|
||||
// TODO(benvanik): handle src2 (or src1) being constant zero
|
||||
if (IsPackInUnsigned(flags)) {
|
||||
if (IsPackOutUnsigned(flags)) {
|
||||
if (IsPackOutSaturate(flags)) {
|
||||
// unsigned -> unsigned + saturate
|
||||
assert_always();
|
||||
} else {
|
||||
// unsigned -> unsigned
|
||||
assert_always();
|
||||
}
|
||||
} else {
|
||||
if (IsPackOutSaturate(flags)) {
|
||||
// unsigned -> signed + saturate
|
||||
assert_always();
|
||||
} else {
|
||||
// unsigned -> signed
|
||||
assert_always();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (IsPackOutUnsigned(flags)) {
|
||||
if (IsPackOutSaturate(flags)) {
|
||||
// signed -> unsigned + saturate
|
||||
// PACKUSWB / SaturateSignedWordToUnsignedByte
|
||||
e.vpackuswb(i.dest, i.src1, i.src2);
|
||||
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMByteOrderMask));
|
||||
} else {
|
||||
// signed -> unsigned
|
||||
assert_always();
|
||||
}
|
||||
} else {
|
||||
if (IsPackOutSaturate(flags)) {
|
||||
// signed -> signed + saturate
|
||||
// PACKSSWB / SaturateSignedWordToSignedByte
|
||||
e.vpacksswb(i.dest, i.src1, i.src2);
|
||||
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMByteOrderMask));
|
||||
} else {
|
||||
// signed -> signed
|
||||
assert_always();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
static void EmitS8_IN_16_HI(X64Emitter& e, const EmitArgType& i) {
|
||||
assert_always();
|
||||
}
|
||||
static void EmitS16_IN_32_LO(X64Emitter& e, const EmitArgType& i) {
|
||||
assert_always();
|
||||
}
|
||||
static void EmitS16_IN_32_HI(X64Emitter& e, const EmitArgType& i) {
|
||||
assert_always();
|
||||
static void Emit16_IN_32(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
|
||||
// TODO(benvanik): handle src2 (or src1) being constant zero
|
||||
if (IsPackInUnsigned(flags)) {
|
||||
if (IsPackOutUnsigned(flags)) {
|
||||
if (IsPackOutSaturate(flags)) {
|
||||
// unsigned -> unsigned + saturate
|
||||
assert_always();
|
||||
} else {
|
||||
// unsigned -> unsigned
|
||||
assert_always();
|
||||
}
|
||||
} else {
|
||||
if (IsPackOutSaturate(flags)) {
|
||||
// unsigned -> signed + saturate
|
||||
assert_always();
|
||||
} else {
|
||||
// unsigned -> signed
|
||||
assert_always();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (IsPackOutUnsigned(flags)) {
|
||||
if (IsPackOutSaturate(flags)) {
|
||||
// signed -> unsigned + saturate
|
||||
// PACKUSDW
|
||||
// TMP[15:0] <- (DEST[31:0] < 0) ? 0 : DEST[15:0];
|
||||
// DEST[15:0] <- (DEST[31:0] > FFFFH) ? FFFFH : TMP[15:0];
|
||||
e.vpackusdw(i.dest, i.src1, i.src2);
|
||||
e.vpshuflw(i.dest, i.dest, B10110001);
|
||||
e.vpshufhw(i.dest, i.dest, B10110001);
|
||||
} else {
|
||||
// signed -> unsigned
|
||||
assert_always();
|
||||
}
|
||||
} else {
|
||||
if (IsPackOutSaturate(flags)) {
|
||||
// signed -> signed + saturate
|
||||
// PACKSSDW / SaturateSignedDwordToSignedWord
|
||||
e.vpackssdw(i.dest, i.src1, i.src2);
|
||||
e.vpshuflw(i.dest, i.dest, B10110001);
|
||||
e.vpshufhw(i.dest, i.dest, B10110001);
|
||||
} else {
|
||||
// signed -> signed
|
||||
assert_always();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(
|
||||
|
@ -5170,7 +5249,7 @@ EMITTER_OPCODE_TABLE(
|
|||
// ============================================================================
|
||||
EMITTER(UNPACK, MATCH(I<OPCODE_UNPACK, V128<>, V128<>>)) {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
switch (i.instr->flags) {
|
||||
switch (i.instr->flags & PACK_TYPE_MODE) {
|
||||
case PACK_TYPE_D3DCOLOR:
|
||||
EmitD3DCOLOR(e, i);
|
||||
break;
|
||||
|
@ -5183,17 +5262,11 @@ EMITTER(UNPACK, MATCH(I<OPCODE_UNPACK, V128<>, V128<>>)) {
|
|||
case PACK_TYPE_SHORT_2:
|
||||
EmitSHORT_2(e, i);
|
||||
break;
|
||||
case PACK_TYPE_S8_IN_16_LO:
|
||||
EmitS8_IN_16_LO(e, i);
|
||||
case PACK_TYPE_8_IN_16:
|
||||
Emit8_IN_16(e, i, i.instr->flags);
|
||||
break;
|
||||
case PACK_TYPE_S8_IN_16_HI:
|
||||
EmitS8_IN_16_HI(e, i);
|
||||
break;
|
||||
case PACK_TYPE_S16_IN_32_LO:
|
||||
EmitS16_IN_32_LO(e, i);
|
||||
break;
|
||||
case PACK_TYPE_S16_IN_32_HI:
|
||||
EmitS16_IN_32_HI(e, i);
|
||||
case PACK_TYPE_16_IN_32:
|
||||
Emit16_IN_32(e, i, i.instr->flags);
|
||||
break;
|
||||
default: assert_unhandled_case(i.instr->flags); break;
|
||||
}
|
||||
|
@ -5271,21 +5344,93 @@ EMITTER(UNPACK, MATCH(I<OPCODE_UNPACK, V128<>, V128<>>)) {
|
|||
// Add 3,3,0,1.
|
||||
e.vpor(i.dest, e.GetXmmConstPtr(XMM3301));
|
||||
}
|
||||
static void EmitS8_IN_16_LO(X64Emitter& e, const EmitArgType& i) {
|
||||
e.vpunpckhbw(i.dest, i.src1, i.src1);
|
||||
e.vpsrad(i.dest, 8);
|
||||
static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
|
||||
assert_false(IsPackOutSaturate(flags));
|
||||
if (IsPackToLo(flags)) {
|
||||
// Unpack to LO.
|
||||
if (IsPackInUnsigned(flags)) {
|
||||
if (IsPackOutUnsigned(flags)) {
|
||||
// unsigned -> unsigned
|
||||
assert_always();
|
||||
} else {
|
||||
// unsigned -> signed
|
||||
assert_always();
|
||||
}
|
||||
} else {
|
||||
if (IsPackOutUnsigned(flags)) {
|
||||
// signed -> unsigned
|
||||
assert_always();
|
||||
} else {
|
||||
// signed -> signed
|
||||
e.vpunpckhbw(i.dest, i.src1, i.src1);
|
||||
e.vpsrad(i.dest, 8);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Unpack to HI.
|
||||
if (IsPackInUnsigned(flags)) {
|
||||
if (IsPackOutUnsigned(flags)) {
|
||||
// unsigned -> unsigned
|
||||
assert_always();
|
||||
} else {
|
||||
// unsigned -> signed
|
||||
assert_always();
|
||||
}
|
||||
} else {
|
||||
if (IsPackOutUnsigned(flags)) {
|
||||
// signed -> unsigned
|
||||
assert_always();
|
||||
} else {
|
||||
// signed -> signed
|
||||
e.vpunpcklbw(i.dest, i.src1, i.src1);
|
||||
e.vpsrad(i.dest, 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
static void EmitS8_IN_16_HI(X64Emitter& e, const EmitArgType& i) {
|
||||
e.vpunpcklbw(i.dest, i.src1, i.src1);
|
||||
e.vpsrad(i.dest, 8);
|
||||
}
|
||||
static void EmitS16_IN_32_LO(X64Emitter& e, const EmitArgType& i) {
|
||||
e.vpunpckhwd(i.dest, i.src1, i.src1);
|
||||
e.vpsrad(i.dest, 16);
|
||||
}
|
||||
static void EmitS16_IN_32_HI(X64Emitter& e, const EmitArgType& i) {
|
||||
e.vpunpcklwd(i.dest, i.src1, i.src1);
|
||||
e.vpsrad(i.dest, 16);
|
||||
static void Emit16_IN_32(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
|
||||
assert_false(IsPackOutSaturate(flags));
|
||||
if (IsPackToLo(flags)) {
|
||||
// Unpack to LO.
|
||||
if (IsPackInUnsigned(flags)) {
|
||||
if (IsPackOutUnsigned(flags)) {
|
||||
// unsigned -> unsigned
|
||||
assert_always();
|
||||
} else {
|
||||
// unsigned -> signed
|
||||
assert_always();
|
||||
}
|
||||
} else {
|
||||
if (IsPackOutUnsigned(flags)) {
|
||||
// signed -> unsigned
|
||||
assert_always();
|
||||
} else {
|
||||
// signed -> signed
|
||||
e.vpunpckhwd(i.dest, i.src1, i.src1);
|
||||
e.vpsrad(i.dest, 16);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Unpack to HI.
|
||||
if (IsPackInUnsigned(flags)) {
|
||||
if (IsPackOutUnsigned(flags)) {
|
||||
// unsigned -> unsigned
|
||||
assert_always();
|
||||
} else {
|
||||
// unsigned -> signed
|
||||
assert_always();
|
||||
}
|
||||
} else {
|
||||
if (IsPackOutUnsigned(flags)) {
|
||||
// signed -> unsigned
|
||||
assert_always();
|
||||
} else {
|
||||
// signed -> signed
|
||||
e.vpunpcklwd(i.dest, i.src1, i.src1);
|
||||
e.vpsrad(i.dest, 16);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(
|
||||
|
|
|
@ -1733,76 +1733,162 @@ XEEMITTER(vpkpx, 0x1000030E, VX)(PPCHIRBuilder& f, InstrData& i) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
int InstrEmit_vpkshss_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
|
||||
// Vector Pack Signed Halfword Signed Saturate
|
||||
// Convert VA and VB from signed words to signed saturated bytes then
|
||||
// concat:
|
||||
// for each i in VA + VB:
|
||||
// i = int8_t(Clamp(EXTS(int16_t(t)), -128, 127))
|
||||
// dest = VA | VB (lower 8bit values)
|
||||
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
|
||||
PACK_TYPE_8_IN_16 | PACK_TYPE_IN_SIGNED |
|
||||
PACK_TYPE_OUT_SIGNED | PACK_TYPE_OUT_SATURATE);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(vpkshss, 0x1000018E, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkshss_(f, i.VX.VD, i.VX.VA, i.VX.VB);
|
||||
}
|
||||
XEEMITTER(vpkshss128, VX128(5, 512), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkshss_(f, VX128_VD128, VX128_VA128, VX128_VB128);
|
||||
}
|
||||
|
||||
int InstrEmit_vpkswss_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
|
||||
// Vector Pack Signed Word Signed Saturate
|
||||
// Convert VA and VB from signed int words to signed saturated shorts then
|
||||
// concat:
|
||||
// for each i in VA + VB:
|
||||
// i = int16_t(Clamp(EXTS(int32_t(t)), -2^15, 2^15-1))
|
||||
// dest = VA | VB (lower 16bit values)
|
||||
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
|
||||
PACK_TYPE_16_IN_32 | PACK_TYPE_IN_SIGNED |
|
||||
PACK_TYPE_OUT_SIGNED | PACK_TYPE_OUT_SATURATE);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(vpkswss, 0x100001CE, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkswss_(f, i.VX.VD, i.VX.VA, i.VX.VB);
|
||||
}
|
||||
XEEMITTER(vpkswss128, VX128(5, 640), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkswss_(f, VX128_VD128, VX128_VA128, VX128_VB128);
|
||||
}
|
||||
|
||||
int InstrEmit_vpkswus_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
|
||||
// Vector Pack Signed Word Unsigned Saturate
|
||||
// Convert VA and VB from signed int words to unsigned saturated shorts then
|
||||
// concat:
|
||||
// for each i in VA + VB:
|
||||
// i = uint16_t(Clamp(EXTS(int32_t(t)), 0, 2^16-1))
|
||||
// dest = VA | VB (lower 16bit values)
|
||||
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
|
||||
PACK_TYPE_16_IN_32 | PACK_TYPE_IN_SIGNED |
|
||||
PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_SATURATE);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(vpkswus, 0x1000014E, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkswus_(f, i.VX.VD, i.VX.VA, i.VX.VB);
|
||||
}
|
||||
XEEMITTER(vpkswus128, VX128(5, 704), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkswus_(f, VX128_VD128, VX128_VA128, VX128_VB128);
|
||||
}
|
||||
|
||||
int InstrEmit_vpkuhum_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
|
||||
// Vector Pack Unsigned Halfword Unsigned Modulo
|
||||
// Convert VA and VB from unsigned shorts to unsigned bytes then concat:
|
||||
// for each i in VA + VB:
|
||||
// i = uint8_t(uint16_t(i))
|
||||
// dest = VA | VB (lower 8bit values)
|
||||
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
|
||||
PACK_TYPE_8_IN_16 | PACK_TYPE_IN_UNSIGNED |
|
||||
PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_UNSATURATE);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(vpkuhum, 0x1000000E, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkuhum_(f, i.VX.VD, i.VX.VA, i.VX.VB);
|
||||
}
|
||||
XEEMITTER(vpkuhum128, VX128(5, 768), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkuhum_(f, VX128_VD128, VX128_VA128, VX128_VB128);
|
||||
}
|
||||
|
||||
int InstrEmit_vpkuhus_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
|
||||
// Vector Pack Unsigned Halfword Unsigned Saturate
|
||||
// Convert VA and VB from unsigned shorts to unsigned saturated bytes then
|
||||
// concat:
|
||||
// for each i in VA + VB:
|
||||
// i = uint8_t(Clamp(EXTZ(uint16_t(i)), 0, 255))
|
||||
// dest = VA | VB (lower 8bit values)
|
||||
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
|
||||
PACK_TYPE_8_IN_16 | PACK_TYPE_IN_UNSIGNED |
|
||||
PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_SATURATE);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(vpkuhus, 0x1000008E, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkuhus_(f, i.VX.VD, i.VX.VA, i.VX.VB);
|
||||
}
|
||||
XEEMITTER(vpkuhus128, VX128(5, 832), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkuhus_(f, VX128_VD128, VX128_VA128, VX128_VB128);
|
||||
}
|
||||
|
||||
int InstrEmit_vpkshus_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
|
||||
// Vector Pack Signed Halfword Unsigned Saturate
|
||||
// Convert VA and VB from signed shorts to unsigned saturated bytes then
|
||||
// concat:
|
||||
// for each i in VA + VB:
|
||||
// i = uint8_t(Clamp(EXTS(int16_t(i)), 0, 255))
|
||||
// dest = VA | VB (lower 8bit values)
|
||||
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
|
||||
PACK_TYPE_8_IN_16 | PACK_TYPE_IN_SIGNED |
|
||||
PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_SATURATE);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(vpkshus, 0x1000010E, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkshus_(f, i.VX.VD, i.VX.VA, i.VX.VB);
|
||||
}
|
||||
XEEMITTER(vpkshus128, VX128(5, 576), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkshus_(f, VX128_VD128, VX128_VA128, VX128_VB128);
|
||||
}
|
||||
|
||||
int InstrEmit_vpkuwum_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
|
||||
// Vector Pack Unsigned Word Unsigned Modulo
|
||||
// Concat low shorts from VA + VB:
|
||||
// for each i in VA + VB:
|
||||
// i = uint16_t(uint32_t(i))
|
||||
// dest = VA | VB (lower 16bit values)
|
||||
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
|
||||
PACK_TYPE_16_IN_32 | PACK_TYPE_IN_UNSIGNED |
|
||||
PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_UNSATURATE);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(vpkuwum, 0x1000004E, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkuwum_(f, i.VX.VD, i.VX.VA, i.VX.VB);
|
||||
}
|
||||
XEEMITTER(vpkuwum128, VX128(5, 896), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkuwum_(f, VX128_VD128, VX128_VA128, VX128_VB128);
|
||||
}
|
||||
|
||||
int InstrEmit_vpkuwus_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
|
||||
// Vector Pack Unsigned Word Unsigned Saturate
|
||||
// Convert VA and VB from unsigned int words to unsigned saturated shorts then
|
||||
// concat:
|
||||
// for each i in VA + VB:
|
||||
// i = uint16_t(Clamp(EXTZ(uint32_t(t)), 0, 2^16-1))
|
||||
// dest = VA | VB (lower 16bit values)
|
||||
Value* v = f.Pack(f.LoadVR(va), f.LoadVR(vb),
|
||||
PACK_TYPE_16_IN_32 | PACK_TYPE_IN_UNSIGNED |
|
||||
PACK_TYPE_OUT_UNSIGNED | PACK_TYPE_OUT_SATURATE);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(vpkuwus, 0x100000CE, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkuwus_(f, i.VX.VD, i.VX.VA, i.VX.VB);
|
||||
}
|
||||
XEEMITTER(vpkuwus128, VX128(5, 960), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vpkuwus_(f, VX128_VD128, VX128_VA128, VX128_VB128);
|
||||
}
|
||||
|
||||
XEEMITTER(vupkhpx, 0x1000034E, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
|
@ -1816,8 +1902,11 @@ XEEMITTER(vupklpx, 0x100003CE, VX)(PPCHIRBuilder& f, InstrData& i) {
|
|||
}
|
||||
|
||||
int InstrEmit_vupkhsh_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) {
|
||||
// Vector Unpack High Signed Halfword
|
||||
// halfwords 0-3 expanded to words 0-3 and sign extended
|
||||
Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S16_IN_32_HI);
|
||||
Value* v =
|
||||
f.Unpack(f.LoadVR(vb), PACK_TYPE_TO_HI | PACK_TYPE_16_IN_32 |
|
||||
PACK_TYPE_IN_SIGNED | PACK_TYPE_OUT_SIGNED);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1831,8 +1920,11 @@ XEEMITTER(vupkhsh128, 0x100002CE, VX)(PPCHIRBuilder& f, InstrData& i) {
|
|||
}
|
||||
|
||||
int InstrEmit_vupklsh_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) {
|
||||
// Vector Unpack Low Signed Halfword
|
||||
// halfwords 4-7 expanded to words 0-3 and sign extended
|
||||
Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S16_IN_32_LO);
|
||||
Value* v =
|
||||
f.Unpack(f.LoadVR(vb), PACK_TYPE_TO_LO | PACK_TYPE_16_IN_32 |
|
||||
PACK_TYPE_IN_SIGNED | PACK_TYPE_OUT_SIGNED);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1846,8 +1938,11 @@ XEEMITTER(vupklsh128, 0x100002CE, VX)(PPCHIRBuilder& f, InstrData& i) {
|
|||
}
|
||||
|
||||
int InstrEmit_vupkhsb_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) {
|
||||
// Vector Unpack High Signed Byte
|
||||
// bytes 0-7 expanded to halfwords 0-7 and sign extended
|
||||
Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S8_IN_16_HI);
|
||||
Value* v =
|
||||
f.Unpack(f.LoadVR(vb), PACK_TYPE_TO_HI | PACK_TYPE_8_IN_16 |
|
||||
PACK_TYPE_IN_SIGNED | PACK_TYPE_OUT_SIGNED);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1864,8 +1959,10 @@ XEEMITTER(vupkhsb128, VX128(6, 896), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
|||
}
|
||||
|
||||
int InstrEmit_vupklsb_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) {
|
||||
// Vector Unpack Low Signed Byte
|
||||
// bytes 8-15 expanded to halfwords 0-7 and sign extended
|
||||
Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_S8_IN_16_LO);
|
||||
Value* v = f.Unpack(f.LoadVR(vb), PACK_TYPE_TO_LO | PACK_TYPE_8_IN_16 |
|
||||
PACK_TYPE_IN_SIGNED | PACK_TYPE_OUT_SIGNED);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1886,8 +1983,8 @@ XEEMITTER(vpkd3d128, VX128_4(6, 1552), VX128_4)(PPCHIRBuilder& f,
|
|||
const uint32_t vd = i.VX128_4.VD128l | (i.VX128_4.VD128h << 5);
|
||||
const uint32_t vb = i.VX128_4.VB128l | (i.VX128_4.VB128h << 5);
|
||||
uint32_t type = i.VX128_4.IMM >> 2;
|
||||
uint32_t shift = i.VX128_4.IMM & 0x3;
|
||||
uint32_t pack = i.VX128_4.z;
|
||||
uint32_t pack = i.VX128_4.IMM & 0x3;
|
||||
uint32_t shift = i.VX128_4.z;
|
||||
Value* v = f.LoadVR(vb);
|
||||
switch (type) {
|
||||
case 0: // VPACK_D3DCOLOR
|
||||
|
@ -1909,33 +2006,64 @@ XEEMITTER(vpkd3d128, VX128_4(6, 1552), VX128_4)(PPCHIRBuilder& f,
|
|||
// http://hlssmod.net/he_code/public/pixelwriter.h
|
||||
// control = prev:0123 | new:4567
|
||||
uint32_t control = PERMUTE_IDENTITY; // original
|
||||
uint32_t src = xerotl(0x07060504, shift * 8);
|
||||
uint32_t mask = 0;
|
||||
switch (pack) {
|
||||
case 1: // VPACK_32
|
||||
// VPACK_32 & shift = 3 puts lower 32 bits in x (leftmost slot).
|
||||
mask = 0x000000FF << (shift * 8);
|
||||
control = (control & ~mask) | (src & mask);
|
||||
switch (shift) {
|
||||
case 0:
|
||||
control = PERMUTE_MASK(0, 0, 0, 1, 0, 2, 1, 3);
|
||||
break;
|
||||
case 1:
|
||||
control = PERMUTE_MASK(0, 0, 0, 1, 1, 3, 0, 3);
|
||||
break;
|
||||
case 2:
|
||||
control = PERMUTE_MASK(0, 0, 1, 3, 0, 2, 0, 3);
|
||||
break;
|
||||
case 3:
|
||||
control = PERMUTE_MASK(1, 3, 0, 1, 0, 2, 0, 3);
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(shift);
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 2: // 64bit
|
||||
if (shift < 3) {
|
||||
mask = 0x0000FFFF << (shift * 8);
|
||||
} else {
|
||||
// w
|
||||
src = 0x07000000;
|
||||
mask = 0xFF000000;
|
||||
switch (shift) {
|
||||
case 0:
|
||||
control = PERMUTE_MASK(0, 0, 0, 1, 1, 2, 1, 3);
|
||||
break;
|
||||
case 1:
|
||||
control = PERMUTE_MASK(0, 0, 1, 2, 1, 3, 0, 3);
|
||||
break;
|
||||
case 2:
|
||||
control = PERMUTE_MASK(1, 2, 1, 3, 0, 2, 0, 3);
|
||||
break;
|
||||
case 3:
|
||||
control = PERMUTE_MASK(1, 3, 0, 1, 0, 2, 0, 3);
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(shift);
|
||||
return 1;
|
||||
}
|
||||
control = (control & ~mask) | (src & mask);
|
||||
break;
|
||||
case 3: // 64bit
|
||||
if (shift < 3) {
|
||||
mask = 0x0000FFFF << (shift * 8);
|
||||
} else {
|
||||
// z
|
||||
src = 0x00000004;
|
||||
mask = 0x000000FF;
|
||||
switch (shift) {
|
||||
case 0:
|
||||
control = PERMUTE_MASK(0, 0, 0, 1, 1, 2, 1, 3);
|
||||
break;
|
||||
case 1:
|
||||
control = PERMUTE_MASK(0, 0, 1, 2, 1, 3, 0, 3);
|
||||
break;
|
||||
case 2:
|
||||
control = PERMUTE_MASK(1, 2, 1, 3, 0, 2, 0, 3);
|
||||
break;
|
||||
case 3:
|
||||
control = PERMUTE_MASK(0, 0, 0, 1, 0, 2, 1, 2);
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(shift);
|
||||
return 1;
|
||||
}
|
||||
control = (control & ~mask) | (src & mask);
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(pack);
|
||||
|
|
Binary file not shown.
|
@ -59,3 +59,39 @@ Disassembly of section .text:
|
|||
0000000000100068 <test_vpkd3d128_d3dcolor_3_3>:
|
||||
100068: 18 83 1e d0 vpkd3d128 v4,v3,0,2,2
|
||||
10006c: 4e 80 00 20 blr
|
||||
|
||||
0000000000100070 <test_vpkd3d128_short2_invalid_0>:
|
||||
100070: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0
|
||||
100074: 4e 80 00 20 blr
|
||||
|
||||
0000000000100078 <test_vpkd3d128_short2_invalid_1>:
|
||||
100078: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0
|
||||
10007c: 4e 80 00 20 blr
|
||||
|
||||
0000000000100080 <test_vpkd3d128_short2_0>:
|
||||
100080: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0
|
||||
100084: 4e 80 00 20 blr
|
||||
|
||||
0000000000100088 <test_vpkd3d128_short2_1>:
|
||||
100088: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0
|
||||
10008c: 4e 80 00 20 blr
|
||||
|
||||
0000000000100090 <test_vpkd3d128_short2_2>:
|
||||
100090: 18 85 1e 10 vpkd3d128 v4,v3,1,0,0
|
||||
100094: 4e 80 00 20 blr
|
||||
|
||||
0000000000100098 <test_vpkd3d128_float16_2_invalid_0>:
|
||||
100098: 18 8d 1e 10 vpkd3d128 v4,v3,3,0,0
|
||||
10009c: 4e 80 00 20 blr
|
||||
|
||||
00000000001000a0 <test_vpkd3d128_float16_2_0>:
|
||||
1000a0: 18 8d 1e 10 vpkd3d128 v4,v3,3,0,0
|
||||
1000a4: 4e 80 00 20 blr
|
||||
|
||||
00000000001000a8 <test_vpkd3d128_float16_4_invalid_0>:
|
||||
1000a8: 18 96 1e 10 vpkd3d128 v4,v3,1,2,0
|
||||
1000ac: 4e 80 00 20 blr
|
||||
|
||||
00000000001000b0 <test_vpkd3d128_float16_4_0>:
|
||||
1000b0: 18 96 1e 10 vpkd3d128 v4,v3,1,2,0
|
||||
1000b4: 4e 80 00 20 blr
|
||||
|
|
|
@ -12,3 +12,12 @@
|
|||
0000000000000058 t test_vpkd3d128_d3dcolor_3_1
|
||||
0000000000000060 t test_vpkd3d128_d3dcolor_3_2
|
||||
0000000000000068 t test_vpkd3d128_d3dcolor_3_3
|
||||
0000000000000070 t test_vpkd3d128_short2_invalid_0
|
||||
0000000000000078 t test_vpkd3d128_short2_invalid_1
|
||||
0000000000000080 t test_vpkd3d128_short2_0
|
||||
0000000000000088 t test_vpkd3d128_short2_1
|
||||
0000000000000090 t test_vpkd3d128_short2_2
|
||||
0000000000000098 t test_vpkd3d128_float16_2_invalid_0
|
||||
00000000000000a0 t test_vpkd3d128_float16_2_0
|
||||
00000000000000a8 t test_vpkd3d128_float16_4_invalid_0
|
||||
00000000000000b0 t test_vpkd3d128_float16_4_0
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,13 @@
|
|||
|
||||
/vagrant/src/alloy/frontend/ppc/test/bin//instr_vpkshss.o: file format elf64-powerpc
|
||||
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
0000000000100000 <test_vpkshss_0>:
|
||||
100000: 10 a3 21 8e vpkshss v5,v3,v4
|
||||
100004: 4e 80 00 20 blr
|
||||
|
||||
0000000000100008 <test_vpkshss_1>:
|
||||
100008: 10 a3 21 8e vpkshss v5,v3,v4
|
||||
10000c: 4e 80 00 20 blr
|
|
@ -0,0 +1,2 @@
|
|||
0000000000000000 t test_vpkshss_0
|
||||
0000000000000008 t test_vpkshss_1
|
Binary file not shown.
|
@ -0,0 +1,13 @@
|
|||
|
||||
/vagrant/src/alloy/frontend/ppc/test/bin//instr_vpkswss.o: file format elf64-powerpc
|
||||
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
0000000000100000 <test_vpkswss_0>:
|
||||
100000: 10 a3 21 ce vpkswss v5,v3,v4
|
||||
100004: 4e 80 00 20 blr
|
||||
|
||||
0000000000100008 <test_vpkswss_1>:
|
||||
100008: 10 a3 21 ce vpkswss v5,v3,v4
|
||||
10000c: 4e 80 00 20 blr
|
|
@ -0,0 +1,2 @@
|
|||
0000000000000000 t test_vpkswss_0
|
||||
0000000000000008 t test_vpkswss_1
|
|
@ -133,3 +133,82 @@ test_vpkd3d128_d3dcolor_3_3:
|
|||
blr
|
||||
#_ REGISTER_OUT v3 [40400001, 40400002, 40400003, 40400004]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 00000000]
|
||||
|
||||
|
||||
test_vpkd3d128_short2_invalid_0:
|
||||
#_ REGISTER_IN v3 [43817E00, C37CFC00, 42A23EC8, 403DB757]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
# vpkd3d128 v4, v3, 1, 1, 0
|
||||
.long 0x18851E10
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [43817E00, C37CFC00, 42A23EC8, 403DB757]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFF8001]
|
||||
test_vpkd3d128_short2_invalid_1:
|
||||
#_ REGISTER_IN v3 [412FDF00, C09FBE00, 42A23EC8, 403DB757]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
# vpkd3d128 v4, v3, 1, 1, 0
|
||||
.long 0x18851E10
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [412FDF00, C09FBE00, 42A23EC8, 403DB757]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFF8001]
|
||||
|
||||
test_vpkd3d128_short2_0:
|
||||
#_ REGISTER_IN v3 [40407FFF, 403F8001, 00000000, 00000000]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
# vpkd3d128 v4, v3, 1, 1, 0
|
||||
.long 0x18851E10
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [40407FFF, 403F8001, 00000000, 00000000]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFF8001]
|
||||
test_vpkd3d128_short2_1:
|
||||
#_ REGISTER_IN v3 [40404000, 403FC000, 40400003, 403F8001]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
# vpkd3d128 v4, v3, 1, 1, 0
|
||||
.long 0x18851E10
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [40404000, 403FC000, 40400003, 403F8001]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 4000C000]
|
||||
test_vpkd3d128_short2_2:
|
||||
#_ REGISTER_IN v3 [4040FFFE, 403FF333, 42A23EC8, 403DB757]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
# vpkd3d128 v4, v3, 1, 1, 0
|
||||
.long 0x18851E10
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [4040FFFE, 403FF333, 42A23EC8, 403DB757]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFFF333]
|
||||
|
||||
test_vpkd3d128_float16_2_invalid_0:
|
||||
#_ REGISTER_IN v3 [3FC00000, BFC00000, 42A23EC8, 403DB757]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
# vpkd3d128 v4, v3, 3, 1, 0
|
||||
.long 0x188D1E10
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [3FC00000, BFC00000, 42A23EC8, 403DB757]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3E00BE00]
|
||||
|
||||
test_vpkd3d128_float16_2_0:
|
||||
#_ REGISTER_IN v3 [3F000000, BF000000, 00000000, 00000000]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
# vpkd3d128 v4, v3, 3, 1, 0
|
||||
.long 0x188D1E10
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [3F000000, BF000000, 00000000, 00000000]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3800B800]
|
||||
|
||||
test_vpkd3d128_float16_4_invalid_0:
|
||||
#_ REGISTER_IN v3 [3FC00000, BFC00000, 3FC00000, BFC00000]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
# vpkd3d128 v4, v3, 5, 2, 0
|
||||
.long 0x18961E10
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [3FC00000, BFC00000, 3FC00000, BFC00000]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3E00BE00, 3E00BE00]
|
||||
|
||||
test_vpkd3d128_float16_4_0:
|
||||
#_ REGISTER_IN v3 [3F000000, BF000000, 3F000000, BF000000]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
# vpkd3d128 v4, v3, 5, 2, 0
|
||||
.long 0x18961E10
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [3F000000, BF000000, 3F000000, BF000000]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3800B800, 3800B800]
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
test_vpkshss_0:
|
||||
#_ REGISTER_IN v3 [00000001, 00020003, 00040005, 00060007]
|
||||
#_ REGISTER_IN v4 [00080009, 000A000B, 000C000D, 000E000F]
|
||||
vpkshss v5, v3, v4
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [00000001, 00020003, 00040005, 00060007]
|
||||
#_ REGISTER_OUT v4 [00080009, 000A000B, 000C000D, 000E000F]
|
||||
#_ REGISTER_OUT v5 [00010203, 04050607, 08090A0B, 0C0D0E0F]
|
||||
|
||||
test_vpkshss_1:
|
||||
#_ REGISTER_IN v3 [7FFF8000, 00020003, 00040005, 00060007]
|
||||
#_ REGISTER_IN v4 [7FFF8000, 000A000B, 000C000D, 000E000F]
|
||||
vpkshss v5, v3, v4
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [7FFF8000, 00020003, 00040005, 00060007]
|
||||
#_ REGISTER_OUT v4 [7FFF8000, 000A000B, 000C000D, 000E000F]
|
||||
#_ REGISTER_OUT v5 [7F800203, 04050607, 7F800A0B, 0C0D0E0F]
|
|
@ -0,0 +1,17 @@
|
|||
test_vpkswss_0:
|
||||
#_ REGISTER_IN v3 [00000001, 00000002, 00000003, 00000004]
|
||||
#_ REGISTER_IN v4 [00000005, 00000006, 00000007, 00000008]
|
||||
vpkswss v5, v3, v4
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [00000001, 00000002, 00000003, 00000004]
|
||||
#_ REGISTER_OUT v4 [00000005, 00000006, 00000007, 00000008]
|
||||
#_ REGISTER_OUT v5 [00010002, 00030004, 00050006, 00070008]
|
||||
|
||||
test_vpkswss_1:
|
||||
#_ REGISTER_IN v3 [7FFFFFFF, 80000000, 00000000, 00000004]
|
||||
#_ REGISTER_IN v4 [7FFFFFFF, 80000000, 00000000, 00000008]
|
||||
vpkswss v5, v3, v4
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [7FFFFFFF, 80000000, 00000000, 00000004]
|
||||
#_ REGISTER_OUT v4 [7FFFFFFF, 80000000, 00000000, 00000008]
|
||||
#_ REGISTER_OUT v5 [7FFF8000, 00000004, 7FFF8000, 00000008]
|
|
@ -1880,10 +1880,24 @@ Value* HIRBuilder::Swizzle(Value* value, TypeName part_type,
|
|||
}
|
||||
|
||||
Value* HIRBuilder::Pack(Value* value, uint32_t pack_flags) {
|
||||
ASSERT_VECTOR_TYPE(value);
|
||||
return Pack(value, LoadZero(VEC128_TYPE), pack_flags);
|
||||
}
|
||||
|
||||
Value* HIRBuilder::Pack(Value* value1, Value* value2, uint32_t pack_flags) {
|
||||
ASSERT_VECTOR_TYPE(value1);
|
||||
ASSERT_VECTOR_TYPE(value2);
|
||||
switch (pack_flags & PACK_TYPE_MODE) {
|
||||
case PACK_TYPE_D3DCOLOR:
|
||||
case PACK_TYPE_FLOAT16_2:
|
||||
case PACK_TYPE_FLOAT16_4:
|
||||
case PACK_TYPE_SHORT_2:
|
||||
assert_true(value2->IsConstantZero());
|
||||
break;
|
||||
}
|
||||
Instr* i = AppendInstr(OPCODE_PACK_info, pack_flags, AllocValue(VEC128_TYPE));
|
||||
i->set_src1(value);
|
||||
i->src2.value = i->src3.value = NULL;
|
||||
i->set_src1(value1);
|
||||
i->set_src2(value2);
|
||||
i->src3.value = NULL;
|
||||
return i->dest;
|
||||
}
|
||||
|
||||
|
|
|
@ -218,6 +218,7 @@ class HIRBuilder {
|
|||
Value* Swizzle(Value* value, TypeName part_type, uint32_t swizzle_mask);
|
||||
// SelectBits(cond, value1, value2)
|
||||
Value* Pack(Value* value, uint32_t pack_flags = 0);
|
||||
Value* Pack(Value* value1, Value* value2, uint32_t pack_flags = 0);
|
||||
Value* Unpack(Value* value, uint32_t pack_flags = 0);
|
||||
|
||||
Value* CompareExchange(Value* address, Value* compare_value,
|
||||
|
|
|
@ -65,16 +65,46 @@ enum Swizzles {
|
|||
SWIZZLE_XYZW_TO_ZWXY = SWIZZLE_MASK(2, 3, 0, 1),
|
||||
SWIZZLE_XYZW_TO_WXYZ = SWIZZLE_MASK(3, 0, 1, 2),
|
||||
};
|
||||
enum PackType {
|
||||
enum PackType : uint16_t {
|
||||
// Special types:
|
||||
PACK_TYPE_D3DCOLOR = 0,
|
||||
PACK_TYPE_FLOAT16_2 = 1,
|
||||
PACK_TYPE_FLOAT16_4 = 2,
|
||||
PACK_TYPE_SHORT_2 = 3,
|
||||
PACK_TYPE_S8_IN_16_LO = 4,
|
||||
PACK_TYPE_S8_IN_16_HI = 5,
|
||||
PACK_TYPE_S16_IN_32_LO = 6,
|
||||
PACK_TYPE_S16_IN_32_HI = 7,
|
||||
|
||||
// Types which use the bitmasks below for configuration:
|
||||
PACK_TYPE_8_IN_16 = 4,
|
||||
PACK_TYPE_16_IN_32 = 5,
|
||||
|
||||
PACK_TYPE_MODE = 0x000F, // just to get the mode
|
||||
|
||||
// Unpack to low or high parts.
|
||||
PACK_TYPE_TO_LO = 0 << 12,
|
||||
PACK_TYPE_TO_HI = 1 << 12,
|
||||
|
||||
// Input/output arithmetic flags:
|
||||
PACK_TYPE_IN_SIGNED = 0 << 13,
|
||||
PACK_TYPE_IN_UNSIGNED = 1 << 13,
|
||||
PACK_TYPE_OUT_SIGNED = 0 << 14,
|
||||
PACK_TYPE_OUT_UNSIGNED = 1 << 14,
|
||||
PACK_TYPE_OUT_UNSATURATE = 0 << 15,
|
||||
PACK_TYPE_OUT_SATURATE = 1 << 15,
|
||||
};
|
||||
inline bool IsPackToHi(uint32_t flags) {
|
||||
return (flags & PACK_TYPE_TO_HI) == PACK_TYPE_TO_HI;
|
||||
}
|
||||
inline bool IsPackToLo(uint32_t flags) {
|
||||
return !IsPackToHi(flags);
|
||||
}
|
||||
inline bool IsPackInUnsigned(uint32_t flags) {
|
||||
return (flags & PACK_TYPE_IN_UNSIGNED) == PACK_TYPE_IN_UNSIGNED;
|
||||
}
|
||||
inline bool IsPackOutUnsigned(uint32_t flags) {
|
||||
return (flags & PACK_TYPE_OUT_UNSIGNED) == PACK_TYPE_OUT_UNSIGNED;
|
||||
}
|
||||
inline bool IsPackOutSaturate(uint32_t flags) {
|
||||
return (flags & PACK_TYPE_OUT_SATURATE) == PACK_TYPE_OUT_SATURATE;
|
||||
}
|
||||
|
||||
enum Opcode {
|
||||
OPCODE_COMMENT,
|
||||
|
|
|
@ -596,7 +596,7 @@ DEFINE_OPCODE(
|
|||
DEFINE_OPCODE(
|
||||
OPCODE_PACK,
|
||||
"pack",
|
||||
OPCODE_SIG_V_V,
|
||||
OPCODE_SIG_V_V_V,
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
|
|
|
@ -27,7 +27,7 @@ TEST_CASE("PACK_D3DCOLOR", "[instr]") {
|
|||
});
|
||||
test.Run([](PPCContext* ctx) {
|
||||
ctx->v[4] =
|
||||
vec128i(0x3F800050, 0x3F800060, 0x3F800070, 0x3F800080);
|
||||
vec128i(0x40400050, 0x40400060, 0x40400070, 0x40400080);
|
||||
},
|
||||
[](PPCContext* ctx) {
|
||||
auto result = ctx->v[3];
|
||||
|
|
Loading…
Reference in New Issue