diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 28e5f86e4..9bbea6df7 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -642,7 +642,7 @@ static const vec128_t xmm_consts[] = { /* XMMUnpackFLOAT16_2 */ vec128i(0x0D0C0F0Eu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu), /* XMMPackFLOAT16_4 */ - vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x05040706u, 0x01000302u), + vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000302u, 0x05040706u), /* XMMUnpackFLOAT16_4 */ vec128i(0x09080B0Au, 0x0D0C0F0Eu, 0xFFFFFFFFu, 0xFFFFFFFFu), /* XMMPackSHORT_Min */ vec128i(0x403F8001u), diff --git a/src/xenia/cpu/backend/x64/x64_seq_vector.cc b/src/xenia/cpu/backend/x64/x64_seq_vector.cc index 8c157d7e2..0d1a0de33 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_vector.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_vector.cc @@ -1902,14 +1902,15 @@ struct PACK : Sequence> { std::memset(b, 0, sizeof(b)); for (int i = 0; i < 4; i++) { - b[7 - i] = half_float::detail::float2half(a[i]); + b[7 - (i ^ 2)] = + half_float::detail::float2half(a[i]); } return _mm_load_si128(reinterpret_cast<__m128i*>(b)); } static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) { assert_true(i.src2.value->IsConstantZero()); - // dest = [(src1.x | src1.y), (src1.z | src1.w), 0, 0] + // dest = [(src1.z | src1.w), (src1.x | src1.y), 0, 0] Xmm src; if (e.IsFeatureEnabled(kX64EmitF16C)) { @@ -1921,7 +1922,7 @@ struct PACK : Sequence> { } // 0|0|0|0|W|Z|Y|X e.vcvtps2ph(i.dest, src, 0b00000011); - // Shuffle to X|Y|Z|W|0|0|0|0 + // Shuffle to Z|W|X|Y|0|0|0|0 e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackFLOAT16_4)); } else { if (i.src1.is_constant) { diff --git a/src/xenia/cpu/ppc/testing/instr_vpkd3d128.s b/src/xenia/cpu/ppc/testing/instr_vpkd3d128.s index 397cfce7c..e4acd59f2 100644 --- a/src/xenia/cpu/ppc/testing/instr_vpkd3d128.s +++ b/src/xenia/cpu/ppc/testing/instr_vpkd3d128.s @@ -249,10 +249,10 @@ test_vpkd3d128_float16_4_invalid_0: #_ REGISTER_OUT v3 [3FC00000, BFC00000, 3FC00000, BFC00000] #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3E00BE00, 3E00BE00] test_vpkd3d128_float16_4_0: - #_ REGISTER_IN v3 [3F000000, BF000000, 3F000000, BF000000] + #_ REGISTER_IN v3 [3F000000, BF000000, 3F800000, BF800000] #_ REGISTER_IN v4 [CDCDCDCD, CDCDCDCD, CDCDCDCD, CDCDCDCD] # vpkd3d128 v4, v3, 5, 2, 0 .long 0x18961E10 blr - #_ REGISTER_OUT v3 [3F000000, BF000000, 3F000000, BF000000] - #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3800B800, 3800B800] + #_ REGISTER_OUT v3 [3F000000, BF000000, 3F800000, BF800000] + #_ REGISTER_OUT v4 [CDCDCDCD, CDCDCDCD, 3800B800, 3C00BC00]