[CPU] Fix vpkd3d half4 component order
This commit is contained in:
parent
c3fcb47efe
commit
4ae9266f13
|
@ -642,7 +642,7 @@ static const vec128_t xmm_consts[] = {
|
||||||
/* XMMUnpackFLOAT16_2 */
|
/* XMMUnpackFLOAT16_2 */
|
||||||
vec128i(0x0D0C0F0Eu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
vec128i(0x0D0C0F0Eu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
||||||
/* XMMPackFLOAT16_4 */
|
/* XMMPackFLOAT16_4 */
|
||||||
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x05040706u, 0x01000302u),
|
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000302u, 0x05040706u),
|
||||||
/* XMMUnpackFLOAT16_4 */
|
/* XMMUnpackFLOAT16_4 */
|
||||||
vec128i(0x09080B0Au, 0x0D0C0F0Eu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
vec128i(0x09080B0Au, 0x0D0C0F0Eu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
||||||
/* XMMPackSHORT_Min */ vec128i(0x403F8001u),
|
/* XMMPackSHORT_Min */ vec128i(0x403F8001u),
|
||||||
|
|
|
@ -1902,14 +1902,15 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
||||||
std::memset(b, 0, sizeof(b));
|
std::memset(b, 0, sizeof(b));
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
b[7 - i] = half_float::detail::float2half<std::round_toward_zero>(a[i]);
|
b[7 - (i ^ 2)] =
|
||||||
|
half_float::detail::float2half<std::round_toward_zero>(a[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return _mm_load_si128(reinterpret_cast<__m128i*>(b));
|
return _mm_load_si128(reinterpret_cast<__m128i*>(b));
|
||||||
}
|
}
|
||||||
static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) {
|
static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) {
|
||||||
assert_true(i.src2.value->IsConstantZero());
|
assert_true(i.src2.value->IsConstantZero());
|
||||||
// dest = [(src1.x | src1.y), (src1.z | src1.w), 0, 0]
|
// dest = [(src1.z | src1.w), (src1.x | src1.y), 0, 0]
|
||||||
|
|
||||||
Xmm src;
|
Xmm src;
|
||||||
if (e.IsFeatureEnabled(kX64EmitF16C)) {
|
if (e.IsFeatureEnabled(kX64EmitF16C)) {
|
||||||
|
@ -1921,7 +1922,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
||||||
}
|
}
|
||||||
// 0|0|0|0|W|Z|Y|X
|
// 0|0|0|0|W|Z|Y|X
|
||||||
e.vcvtps2ph(i.dest, src, 0b00000011);
|
e.vcvtps2ph(i.dest, src, 0b00000011);
|
||||||
// Shuffle to X|Y|Z|W|0|0|0|0
|
// Shuffle to Z|W|X|Y|0|0|0|0
|
||||||
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackFLOAT16_4));
|
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackFLOAT16_4));
|
||||||
} else {
|
} else {
|
||||||
if (i.src1.is_constant) {
|
if (i.src1.is_constant) {
|
||||||
|
|
|
@ -249,10 +249,10 @@ test_vpkd3d128_float16_4_invalid_0:
|
||||||
#_ REGISTER_OUT v3 [3FC00000, BFC00000, 3FC00000, BFC00000]
|
#_ REGISTER_OUT v3 [3FC00000, BFC00000, 3FC00000, BFC00000]
|
||||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3E00BE00, 3E00BE00]
|
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3E00BE00, 3E00BE00]
|
||||||
test_vpkd3d128_float16_4_0:
|
test_vpkd3d128_float16_4_0:
|
||||||
#_ REGISTER_IN v3 [3F000000, BF000000, 3F000000, BF000000]
|
#_ REGISTER_IN v3 [3F000000, BF000000, 3F800000, BF800000]
|
||||||
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
#_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD]
|
||||||
# vpkd3d128 v4, v3, 5, 2, 0
|
# vpkd3d128 v4, v3, 5, 2, 0
|
||||||
.long 0x18961E10
|
.long 0x18961E10
|
||||||
blr
|
blr
|
||||||
#_ REGISTER_OUT v3 [3F000000, BF000000, 3F000000, BF000000]
|
#_ REGISTER_OUT v3 [3F000000, BF000000, 3F800000, BF800000]
|
||||||
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3800B800, 3800B800]
|
#_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3800B800, 3C00BC00]
|
||||||
|
|
Loading…
Reference in New Issue