PACK to 2101010.
This commit is contained in:
parent
98e14fcb1b
commit
cd62d4e461
|
@ -5884,6 +5884,9 @@ EMITTER(PACK, MATCH(I<OPCODE_PACK, V128<>, V128<>, V128<>>)) {
|
|||
case PACK_TYPE_SHORT_2:
|
||||
EmitSHORT_2(e, i);
|
||||
break;
|
||||
case PACK_TYPE_UINT_2101010:
|
||||
EmitUINT_2101010(e, i);
|
||||
break;
|
||||
case PACK_TYPE_8_IN_16:
|
||||
Emit8_IN_16(e, i, i.instr->flags);
|
||||
break;
|
||||
|
@ -5973,6 +5976,60 @@ EMITTER(PACK, MATCH(I<OPCODE_PACK, V128<>, V128<>, V128<>>)) {
|
|||
// Pack.
|
||||
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_2));
|
||||
}
|
||||
static __m128i EmulatePackUINT_2101010(void*, __m128i src1) {
|
||||
// https://www.opengl.org/registry/specs/ARB/vertex_type_2_10_10_10_rev.txt
|
||||
union {
|
||||
alignas(16) int32_t a_i[4];
|
||||
alignas(16) uint32_t a_u[4];
|
||||
alignas(16) float a_f[4];
|
||||
};
|
||||
alignas(16) uint32_t b[4];
|
||||
alignas(16) uint32_t c[4];
|
||||
_mm_store_si128(reinterpret_cast<__m128i*>(a_u), src1);
|
||||
// XYZ are 10 bits, signed and saturated.
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
static const int32_t kMinValueXYZ = 0x403FFE01; // 3 - 1FF / (1 << 22)
|
||||
static const int32_t kMaxValueXYZ = 0x404001FF; // 3 + 1FF / (1 << 22)
|
||||
uint32_t exponent = (a_u[i] >> 23) & 0xFF;
|
||||
uint32_t fractional = a_u[i] & 0x007FFFFF;
|
||||
if ((exponent == 0xFF) && fractional) {
|
||||
b[i] = 0x200;
|
||||
} else if (a_i[i] > kMaxValueXYZ) {
|
||||
b[i] = 0x1FF; // INT_MAX
|
||||
} else if (a_i[i] < kMinValueXYZ) {
|
||||
b[i] = 0x201; // -INT_MAX
|
||||
} else {
|
||||
b[i] = a_u[i] & 0x3FF;
|
||||
}
|
||||
}
|
||||
// W is 2 bits, unsigned and saturated.
|
||||
static const int32_t kMinValueW = 0x40400000; // 3
|
||||
static const int32_t kMaxValueW = 0x40400003; // 3 + 3 / (1 << 22)
|
||||
uint32_t w_exponent = (a_u[3] >> 23) & 0xFF;
|
||||
uint32_t w_fractional = a_u[3] & 0x007FFFFF;
|
||||
if ((w_exponent == 0xFF) && w_fractional) {
|
||||
b[3] = 0x0;
|
||||
} else if (a_i[3] > kMaxValueW) {
|
||||
b[3] = 0x3;
|
||||
} else if (a_i[3] < kMinValueW) {
|
||||
b[3] = 0x0;
|
||||
} else {
|
||||
b[3] = a_u[3] & 0x3;
|
||||
}
|
||||
// Combine in 2101010 WZYX.
|
||||
c[0] = c[1] = c[2] = 0;
|
||||
c[3] = ((b[3] & 0x3) << 30) | ((b[2] & 0x3FF) << 20) |
|
||||
((b[1] & 0x3FF) << 10) | ((b[0] & 0x3FF));
|
||||
return _mm_load_si128(reinterpret_cast<__m128i*>(c));
|
||||
}
|
||||
static void EmitUINT_2101010(X64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src2.value->IsConstantZero());
|
||||
// dest = [(b2(src1.w), b10(src1.z), b10(src1.y), b10(src1.x)), 0, 0, 0]
|
||||
// TODO(benvanik): optimized version.
|
||||
e.lea(e.r8, e.StashXmm(0, i.src1));
|
||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulatePackUINT_2101010));
|
||||
e.vmovaps(i.dest, e.xmm0);
|
||||
}
|
||||
static __m128i EmulatePack8_IN_16_UN_UN_SAT(void*, __m128i src1,
|
||||
__m128i src2) {
|
||||
alignas(16) uint16_t a[8];
|
||||
|
@ -6111,6 +6168,9 @@ EMITTER(UNPACK, MATCH(I<OPCODE_UNPACK, V128<>, V128<>>)) {
|
|||
case PACK_TYPE_SHORT_2:
|
||||
EmitSHORT_2(e, i);
|
||||
break;
|
||||
case PACK_TYPE_UINT_2101010:
|
||||
EmitUINT_2101010(e, i);
|
||||
break;
|
||||
case PACK_TYPE_8_IN_16:
|
||||
Emit8_IN_16(e, i, i.instr->flags);
|
||||
break;
|
||||
|
@ -6245,6 +6305,9 @@ EMITTER(UNPACK, MATCH(I<OPCODE_UNPACK, V128<>, V128<>>)) {
|
|||
// Add 3,3,0,1.
|
||||
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3301));
|
||||
}
|
||||
static void EmitUINT_2101010(X64Emitter& e, const EmitArgType& i) {
|
||||
assert_always("not implemented");
|
||||
}
|
||||
static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
|
||||
assert_false(IsPackOutSaturate(flags));
|
||||
if (IsPackToLo(flags)) {
|
||||
|
|
|
@ -2027,6 +2027,9 @@ XEEMITTER(vpkd3d128, VX128_4(6, 1552), VX128_4)(PPCHIRBuilder& f,
|
|||
case 1: // VPACK_NORMSHORT2
|
||||
v = f.Pack(v, PACK_TYPE_SHORT_2);
|
||||
break;
|
||||
case 2: // VPACK_... 2_10_10_10 w_z_y_x
|
||||
v = f.Pack(v, PACK_TYPE_UINT_2101010);
|
||||
break;
|
||||
case 3: // VPACK_... 2 FLOAT16s DXGI_FORMAT_R16G16_FLOAT
|
||||
v = f.Pack(v, PACK_TYPE_FLOAT16_2);
|
||||
break;
|
||||
|
@ -2125,6 +2128,9 @@ XEEMITTER(vupkd3d128, VX128_3(6, 2032), VX128_3)(PPCHIRBuilder& f,
|
|||
case 1: // VPACK_NORMSHORT2
|
||||
v = f.Unpack(v, PACK_TYPE_SHORT_2);
|
||||
break;
|
||||
case 2: // VPACK_... 2_10_10_10 w_z_y_x
|
||||
v = f.Unpack(v, PACK_TYPE_UINT_2101010);
|
||||
break;
|
||||
case 3: // VPACK_... 2 FLOAT16s DXGI_FORMAT_R16G16_FLOAT
|
||||
v = f.Unpack(v, PACK_TYPE_FLOAT16_2);
|
||||
break;
|
||||
|
|
|
@ -177,6 +177,33 @@ test_vpkd3d128_short2_2:
|
|||
#_ REGISTER_OUT v3 [4040FFFE, 403FF333, 42A23EC8, 403DB757]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFFF333]
|
||||
|
||||
|
||||
test_vpkd3d128_uint_2101010_0:
|
||||
#_ REGISTER_IN v3 [B8FF8000, B8FF8000, C04001FF, 4E9A5A5A]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
# vpkd3d128 v4, v3, 2, 1, 0
|
||||
.long 0x18891E10
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [B8FF8000, B8FF8000, C04001FF, 4E9A5A5A]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, E0180601]
|
||||
test_vpkd3d128_uint_2101010_1:
|
||||
#_ REGISTER_IN v3 [42C80000, C2C80000, 40400000, 3F800000]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
# vpkd3d128 v4, v3, 2, 1, 0
|
||||
.long 0x18891E10
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [42C80000, C2C80000, 40400000, 3F800000]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 000805FF]
|
||||
test_vpkd3d128_uint_2101010_2:
|
||||
#_ REGISTER_IN v3 [3F000000, BF000000, 3F800000, 00000000]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
# vpkd3d128 v4, v3, 2, 1, 0
|
||||
.long 0x18891E10
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [3F000000, BF000000, 3F800000, 00000000]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 20180601]
|
||||
|
||||
|
||||
test_vpkd3d128_float16_2_invalid_0:
|
||||
#_ REGISTER_IN v3 [3FC00000, BFC00000, 42A23EC8, 403DB757]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
|
@ -185,7 +212,6 @@ test_vpkd3d128_float16_2_invalid_0:
|
|||
blr
|
||||
#_ REGISTER_OUT v3 [3FC00000, BFC00000, 42A23EC8, 403DB757]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3E00BE00]
|
||||
|
||||
test_vpkd3d128_float16_2_0:
|
||||
#_ REGISTER_IN v3 [3F000000, BF000000, 00000000, 00000000]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
|
@ -195,6 +221,7 @@ test_vpkd3d128_float16_2_0:
|
|||
#_ REGISTER_OUT v3 [3F000000, BF000000, 00000000, 00000000]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3800B800]
|
||||
|
||||
|
||||
test_vpkd3d128_float16_4_invalid_0:
|
||||
#_ REGISTER_IN v3 [3FC00000, BFC00000, 3FC00000, BFC00000]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
|
@ -203,7 +230,6 @@ test_vpkd3d128_float16_4_invalid_0:
|
|||
blr
|
||||
#_ REGISTER_OUT v3 [3FC00000, BFC00000, 3FC00000, BFC00000]
|
||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3E00BE00, 3E00BE00]
|
||||
|
||||
test_vpkd3d128_float16_4_0:
|
||||
#_ REGISTER_IN v3 [3F000000, BF000000, 3F000000, BF000000]
|
||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||
|
|
|
@ -71,10 +71,11 @@ enum PackType : uint16_t {
|
|||
PACK_TYPE_FLOAT16_2 = 1,
|
||||
PACK_TYPE_FLOAT16_4 = 2,
|
||||
PACK_TYPE_SHORT_2 = 3,
|
||||
PACK_TYPE_UINT_2101010 = 4,
|
||||
|
||||
// Types which use the bitmasks below for configuration:
|
||||
PACK_TYPE_8_IN_16 = 4,
|
||||
PACK_TYPE_16_IN_32 = 5,
|
||||
PACK_TYPE_8_IN_16 = 5,
|
||||
PACK_TYPE_16_IN_32 = 6,
|
||||
|
||||
PACK_TYPE_MODE = 0x000F, // just to get the mode
|
||||
|
||||
|
|
Loading…
Reference in New Issue