PPC: Support v[u]pkd3d128 VPACK_NORMSHORT4
This commit is contained in:
parent
82efbd7bc5
commit
d3ed53c43e
|
@ -639,12 +639,16 @@ static const vec128_t xmm_consts[] = {
|
||||||
0x01000302u),
|
0x01000302u),
|
||||||
/* XMMUnpackFLOAT16_4 */ vec128i(0x09080B0Au, 0x0D0C0F0Eu, 0xFFFFFFFFu,
|
/* XMMUnpackFLOAT16_4 */ vec128i(0x09080B0Au, 0x0D0C0F0Eu, 0xFFFFFFFFu,
|
||||||
0xFFFFFFFFu),
|
0xFFFFFFFFu),
|
||||||
/* XMMPackSHORT_2Min */ vec128i(0x403F8001u),
|
/* XMMPackSHORT_Min */ vec128i(0x403F8001u),
|
||||||
/* XMMPackSHORT_2Max */ vec128i(0x40407FFFu),
|
/* XMMPackSHORT_Max */ vec128i(0x40407FFFu),
|
||||||
/* XMMPackSHORT_2 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu,
|
/* XMMPackSHORT_2 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu,
|
||||||
0x01000504u),
|
0x01000504u),
|
||||||
|
/* XMMPackSHORT_4 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000504u,
|
||||||
|
0x09080D0Cu),
|
||||||
/* XMMUnpackSHORT_2 */ vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu,
|
/* XMMUnpackSHORT_2 */ vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu,
|
||||||
0xFFFFFFFFu),
|
0xFFFFFFFFu),
|
||||||
|
/* XMMUnpackSHORT_4 */ vec128i(0xFFFF0B0Au, 0xFFFF0908u, 0xFFFF0F0Eu,
|
||||||
|
0xFFFF0D0Cu),
|
||||||
/* XMMOneOver255 */ vec128f(1.0f / 255.0f),
|
/* XMMOneOver255 */ vec128f(1.0f / 255.0f),
|
||||||
/* XMMMaskEvenPI16 */ vec128i(0x0000FFFFu, 0x0000FFFFu, 0x0000FFFFu,
|
/* XMMMaskEvenPI16 */ vec128i(0x0000FFFFu, 0x0000FFFFu, 0x0000FFFFu,
|
||||||
0x0000FFFFu),
|
0x0000FFFFu),
|
||||||
|
|
|
@ -70,10 +70,12 @@ enum XmmConst {
|
||||||
XMMUnpackFLOAT16_2,
|
XMMUnpackFLOAT16_2,
|
||||||
XMMPackFLOAT16_4,
|
XMMPackFLOAT16_4,
|
||||||
XMMUnpackFLOAT16_4,
|
XMMUnpackFLOAT16_4,
|
||||||
XMMPackSHORT_2Min,
|
XMMPackSHORT_Min,
|
||||||
XMMPackSHORT_2Max,
|
XMMPackSHORT_Max,
|
||||||
XMMPackSHORT_2,
|
XMMPackSHORT_2,
|
||||||
|
XMMPackSHORT_4,
|
||||||
XMMUnpackSHORT_2,
|
XMMUnpackSHORT_2,
|
||||||
|
XMMUnpackSHORT_4,
|
||||||
XMMOneOver255,
|
XMMOneOver255,
|
||||||
XMMMaskEvenPI16,
|
XMMMaskEvenPI16,
|
||||||
XMMShiftMaskEvenPI16,
|
XMMShiftMaskEvenPI16,
|
||||||
|
|
|
@ -6881,6 +6881,9 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
||||||
case PACK_TYPE_SHORT_2:
|
case PACK_TYPE_SHORT_2:
|
||||||
EmitSHORT_2(e, i);
|
EmitSHORT_2(e, i);
|
||||||
break;
|
break;
|
||||||
|
case PACK_TYPE_SHORT_4:
|
||||||
|
EmitSHORT_4(e, i);
|
||||||
|
break;
|
||||||
case PACK_TYPE_UINT_2101010:
|
case PACK_TYPE_UINT_2101010:
|
||||||
EmitUINT_2101010(e, i);
|
EmitUINT_2101010(e, i);
|
||||||
break;
|
break;
|
||||||
|
@ -6970,11 +6973,19 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
||||||
static void EmitSHORT_2(X64Emitter& e, const EmitArgType& i) {
|
static void EmitSHORT_2(X64Emitter& e, const EmitArgType& i) {
|
||||||
assert_true(i.src2.value->IsConstantZero());
|
assert_true(i.src2.value->IsConstantZero());
|
||||||
// Saturate.
|
// Saturate.
|
||||||
e.vmaxps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackSHORT_2Min));
|
e.vmaxps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackSHORT_Min));
|
||||||
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_2Max));
|
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_Max));
|
||||||
// Pack.
|
// Pack.
|
||||||
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_2));
|
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_2));
|
||||||
}
|
}
|
||||||
|
static void EmitSHORT_4(X64Emitter& e, const EmitArgType& i) {
|
||||||
|
assert_true(i.src2.value->IsConstantZero());
|
||||||
|
// Saturate.
|
||||||
|
e.vmaxps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackSHORT_Min));
|
||||||
|
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_Max));
|
||||||
|
// Pack.
|
||||||
|
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_4));
|
||||||
|
}
|
||||||
static __m128i EmulatePackUINT_2101010(void*, __m128i src1) {
|
static __m128i EmulatePackUINT_2101010(void*, __m128i src1) {
|
||||||
// https://www.opengl.org/registry/specs/ARB/vertex_type_2_10_10_10_rev.txt
|
// https://www.opengl.org/registry/specs/ARB/vertex_type_2_10_10_10_rev.txt
|
||||||
union {
|
union {
|
||||||
|
@ -7229,15 +7240,15 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
||||||
case PACK_TYPE_FLOAT16_2:
|
case PACK_TYPE_FLOAT16_2:
|
||||||
EmitFLOAT16_2(e, i);
|
EmitFLOAT16_2(e, i);
|
||||||
break;
|
break;
|
||||||
case PACK_TYPE_FLOAT16_3:
|
|
||||||
EmitFLOAT16_3(e, i);
|
|
||||||
break;
|
|
||||||
case PACK_TYPE_FLOAT16_4:
|
case PACK_TYPE_FLOAT16_4:
|
||||||
EmitFLOAT16_4(e, i);
|
EmitFLOAT16_4(e, i);
|
||||||
break;
|
break;
|
||||||
case PACK_TYPE_SHORT_2:
|
case PACK_TYPE_SHORT_2:
|
||||||
EmitSHORT_2(e, i);
|
EmitSHORT_2(e, i);
|
||||||
break;
|
break;
|
||||||
|
case PACK_TYPE_SHORT_4:
|
||||||
|
EmitSHORT_4(e, i);
|
||||||
|
break;
|
||||||
case PACK_TYPE_UINT_2101010:
|
case PACK_TYPE_UINT_2101010:
|
||||||
EmitUINT_2101010(e, i);
|
EmitUINT_2101010(e, i);
|
||||||
break;
|
break;
|
||||||
|
@ -7323,27 +7334,6 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.vmovaps(i.dest, e.xmm0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// FIXME: This has not been verified on a real 360, but from context the
|
|
||||||
// return values are used in floating point math.
|
|
||||||
static __m128 EmulateFLOAT16_3(void*, __m128i src1) {
|
|
||||||
alignas(16) uint16_t a[8];
|
|
||||||
alignas(16) float b[4];
|
|
||||||
_mm_store_si128(reinterpret_cast<__m128i*>(a), src1);
|
|
||||||
|
|
||||||
for (int i = 0; i < 3; i++) {
|
|
||||||
b[i] = half_float::detail::half2float(a[VEC128_W(5 + i)]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// FIXME: Correct?
|
|
||||||
b[3] = 1.0f;
|
|
||||||
|
|
||||||
return _mm_load_ps(b);
|
|
||||||
}
|
|
||||||
static void EmitFLOAT16_3(X64Emitter& e, const EmitArgType& i) {
|
|
||||||
e.lea(e.r8, e.StashXmm(0, i.src1));
|
|
||||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulateFLOAT16_3));
|
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
|
||||||
}
|
|
||||||
static __m128 EmulateFLOAT16_4(void*, __m128i src1) {
|
static __m128 EmulateFLOAT16_4(void*, __m128i src1) {
|
||||||
alignas(16) uint16_t a[8];
|
alignas(16) uint16_t a[8];
|
||||||
alignas(16) float b[4];
|
alignas(16) float b[4];
|
||||||
|
@ -7398,6 +7388,36 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
||||||
// Add 3,3,0,1.
|
// Add 3,3,0,1.
|
||||||
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3301));
|
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3301));
|
||||||
}
|
}
|
||||||
|
static void EmitSHORT_4(X64Emitter& e, const EmitArgType& i) {
|
||||||
|
// (VD.x) = 3.0 + (VB.x>>16)*2^-22
|
||||||
|
// (VD.y) = 3.0 + (VB.x)*2^-22
|
||||||
|
// (VD.z) = 3.0 + (VB.y>>16)*2^-22
|
||||||
|
// (VD.w) = 3.0 + (VB.y)*2^-22
|
||||||
|
|
||||||
|
// XMLoadShortN4 plus 3,3,3,3 (for some reason)
|
||||||
|
// src is (xx,xx,VALUE,VALUE)
|
||||||
|
// (VALUE,VALUE,VALUE,VALUE)
|
||||||
|
Xmm src;
|
||||||
|
if (i.src1.is_constant) {
|
||||||
|
if (i.src1.value->IsConstantZero()) {
|
||||||
|
e.vmovdqa(i.dest, e.GetXmmConstPtr(XMM3333));
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
// TODO(benvanik): check other common constants/perform shuffle/or here.
|
||||||
|
src = e.xmm0;
|
||||||
|
e.LoadConstantXmm(src, i.src1.constant());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
src = i.src1;
|
||||||
|
}
|
||||||
|
// Shuffle bytes.
|
||||||
|
e.vpshufb(i.dest, src, e.GetXmmConstPtr(XMMUnpackSHORT_4));
|
||||||
|
// Sign extend words.
|
||||||
|
e.vpslld(i.dest, 16);
|
||||||
|
e.vpsrad(i.dest, 16);
|
||||||
|
// Add 3,3,3,3.
|
||||||
|
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333));
|
||||||
|
}
|
||||||
static void EmitUINT_2101010(X64Emitter& e, const EmitArgType& i) {
|
static void EmitUINT_2101010(X64Emitter& e, const EmitArgType& i) {
|
||||||
assert_always("not implemented");
|
assert_always("not implemented");
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,7 +77,7 @@ enum PackType : uint16_t {
|
||||||
// Special types:
|
// Special types:
|
||||||
PACK_TYPE_D3DCOLOR = 0,
|
PACK_TYPE_D3DCOLOR = 0,
|
||||||
PACK_TYPE_FLOAT16_2 = 1,
|
PACK_TYPE_FLOAT16_2 = 1,
|
||||||
PACK_TYPE_FLOAT16_3 = 2, // FIXME: Not verified, but looks correct.
|
PACK_TYPE_SHORT_4 = 2,
|
||||||
PACK_TYPE_FLOAT16_4 = 3,
|
PACK_TYPE_FLOAT16_4 = 3,
|
||||||
PACK_TYPE_SHORT_2 = 4,
|
PACK_TYPE_SHORT_2 = 4,
|
||||||
PACK_TYPE_UINT_2101010 = 5,
|
PACK_TYPE_UINT_2101010 = 5,
|
||||||
|
|
|
@ -2058,6 +2058,9 @@ int InstrEmit_vpkd3d128(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
case 3: // VPACK_... 2 FLOAT16s DXGI_FORMAT_R16G16_FLOAT
|
case 3: // VPACK_... 2 FLOAT16s DXGI_FORMAT_R16G16_FLOAT
|
||||||
v = f.Pack(v, PACK_TYPE_FLOAT16_2);
|
v = f.Pack(v, PACK_TYPE_FLOAT16_2);
|
||||||
break;
|
break;
|
||||||
|
case 4: // VPACK_NORMSHORT4
|
||||||
|
v = f.Pack(v, PACK_TYPE_SHORT_4);
|
||||||
|
break;
|
||||||
case 5: // VPACK_... 4 FLOAT16s DXGI_FORMAT_R16G16B16A16_FLOAT
|
case 5: // VPACK_... 4 FLOAT16s DXGI_FORMAT_R16G16B16A16_FLOAT
|
||||||
v = f.Pack(v, PACK_TYPE_FLOAT16_4);
|
v = f.Pack(v, PACK_TYPE_FLOAT16_4);
|
||||||
break;
|
break;
|
||||||
|
@ -2158,8 +2161,8 @@ int InstrEmit_vupkd3d128(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
case 3: // VPACK_... 2 FLOAT16s DXGI_FORMAT_R16G16_FLOAT
|
case 3: // VPACK_... 2 FLOAT16s DXGI_FORMAT_R16G16_FLOAT
|
||||||
v = f.Unpack(v, PACK_TYPE_FLOAT16_2);
|
v = f.Unpack(v, PACK_TYPE_FLOAT16_2);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4: // VPACK_NORMSHORT4
|
||||||
v = f.Unpack(v, PACK_TYPE_FLOAT16_3);
|
v = f.Unpack(v, PACK_TYPE_SHORT_4);
|
||||||
break;
|
break;
|
||||||
case 5: // VPACK_... 4 FLOAT16s DXGI_FORMAT_R16G16B16A16_FLOAT
|
case 5: // VPACK_... 4 FLOAT16s DXGI_FORMAT_R16G16B16A16_FLOAT
|
||||||
v = f.Unpack(v, PACK_TYPE_FLOAT16_4);
|
v = f.Unpack(v, PACK_TYPE_FLOAT16_4);
|
||||||
|
|
|
@ -2,7 +2,9 @@
|
||||||
# type:
|
# type:
|
||||||
# 0 = PACK_TYPE_D3DCOLOR
|
# 0 = PACK_TYPE_D3DCOLOR
|
||||||
# 1 = PACK_TYPE_SHORT_2
|
# 1 = PACK_TYPE_SHORT_2
|
||||||
|
# 2 = PACK_TYPE_2_10_10_10
|
||||||
# 3 = PACK_TYPE_FLOAT16_2
|
# 3 = PACK_TYPE_FLOAT16_2
|
||||||
|
# 4 = ?
|
||||||
# 5 = PACK_TYPE_FLOAT16_4
|
# 5 = PACK_TYPE_FLOAT16_4
|
||||||
# mask:
|
# mask:
|
||||||
# must not be zero
|
# must not be zero
|
||||||
|
@ -177,6 +179,15 @@ test_vpkd3d128_short2_2:
|
||||||
#_ REGISTER_OUT v3 [4040FFFE, 403FF333, 42A23EC8, 403DB757]
|
#_ REGISTER_OUT v3 [4040FFFE, 403FF333, 42A23EC8, 403DB757]
|
||||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFFF333]
|
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFFF333]
|
||||||
|
|
||||||
|
test_vpkd3d128_short4_0:
|
||||||
|
# v3 = [1.5, -1.5, 1.5, -1.5]
|
||||||
|
#_ REGISTER_IN v3 [403F8001, 403FFFF8, 4040007F, 40400000]
|
||||||
|
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||||
|
# vpkd3d128 v4, v3, 4, 2, 0
|
||||||
|
.long 0x18921E10
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v3 [403F8001, 403FFFF8, 4040007F, 40400000]
|
||||||
|
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 8001FFF8, 007F0000]
|
||||||
|
|
||||||
test_vpkd3d128_uint_2101010_0:
|
test_vpkd3d128_uint_2101010_0:
|
||||||
#_ REGISTER_IN v3 [B8FF8000, B8FF8000, C04001FF, 4E9A5A5A]
|
#_ REGISTER_IN v3 [B8FF8000, B8FF8000, C04001FF, 4E9A5A5A]
|
||||||
|
@ -221,7 +232,6 @@ test_vpkd3d128_float16_2_0:
|
||||||
#_ REGISTER_OUT v3 [3F000000, BF000000, 00000000, 00000000]
|
#_ REGISTER_OUT v3 [3F000000, BF000000, 00000000, 00000000]
|
||||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3800B800]
|
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3800B800]
|
||||||
|
|
||||||
|
|
||||||
test_vpkd3d128_float16_4_invalid_0:
|
test_vpkd3d128_float16_4_invalid_0:
|
||||||
#_ REGISTER_IN v3 [3FC00000, BFC00000, 3FC00000, BFC00000]
|
#_ REGISTER_IN v3 [3FC00000, BFC00000, 3FC00000, BFC00000]
|
||||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||||
|
|
|
@ -33,6 +33,13 @@ test_vupkd3d128_short2_2:
|
||||||
blr
|
blr
|
||||||
#_ REGISTER_OUT v3 [40407FFF, 403FF333, 00000000, 3f800000]
|
#_ REGISTER_OUT v3 [40407FFF, 403FF333, 00000000, 3f800000]
|
||||||
|
|
||||||
|
test_vupkd3d128_short4_0:
|
||||||
|
#_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, 7FFFFFFF, 007FFFF8]
|
||||||
|
# vupkd3d128 v3, v3, 4
|
||||||
|
.long 0x18701FF0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v3 [40407FFF, 403FFFFF, 4040007F, 403FFFF8]
|
||||||
|
|
||||||
test_vupkd3d128_float16_2_0:
|
test_vupkd3d128_float16_2_0:
|
||||||
#_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3800B800]
|
#_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3800B800]
|
||||||
# vupkd3d128 v3, v3, 3
|
# vupkd3d128 v3, v3, 3
|
||||||
|
|
Loading…
Reference in New Issue