From 24205ee860e2066bd1e6ccd53b499a07802d80cf Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 24 Jan 2022 08:06:05 -0800 Subject: [PATCH] [x64] Fix `VECTOR_SH{L,R,A}_V128(Int8)` masking [AltiVec](https://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf) doc says that it just uses the lower `log2(n)` bits of the shift-amount rather than the whole element-sized value. So there is no need to handle an overflow. Also adjusts 64-bit literals to utilize the explicit `UINT64_C` type. --- src/xenia/cpu/backend/x64/x64_seq_vector.cc | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_seq_vector.cc b/src/xenia/cpu/backend/x64/x64_seq_vector.cc index 72761aa6f..4daea260b 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_vector.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_vector.cc @@ -742,9 +742,9 @@ struct VECTOR_SHL_V128 } if (all_same) { // Every count is the same, so we can use gf2p8affineqb. - const uint8_t shift_amount = shamt.u8[0]; + const uint8_t shift_amount = shamt.u8[0] & 0b111; const uint64_t shift_matrix = - 0x0102040810204080 >> (shift_amount * 8); + UINT64_C(0x0102040810204080) >> (shift_amount * 8); e.vgf2p8affineqb(i.dest, i.src1, e.StashConstantXmm(0, vec128q(shift_matrix)), 0); return; @@ -950,8 +950,8 @@ struct VECTOR_SHR_V128 } if (all_same) { // Every count is the same, so we can use gf2p8affineqb. - const uint8_t shift_amount = shamt.u8[0]; - const uint64_t shift_matrix = 0x0102040810204080 + const uint8_t shift_amount = shamt.u8[0] & 0b111; + const uint64_t shift_matrix = UINT64_C(0x0102040810204080) << (shift_amount * 8); e.vgf2p8affineqb(i.dest, i.src1, e.StashConstantXmm(0, vec128q(shift_matrix)), 0); @@ -1133,12 +1133,11 @@ struct VECTOR_SHA_V128 } if (all_same) { // Every count is the same, so we can use gf2p8affineqb. - const uint8_t shift_amount = shamt.u8[0]; + const uint8_t shift_amount = shamt.u8[0] & 0b111; const uint64_t shift_matrix = - shift_amount < 8 - ? (0x0102040810204080ULL << (shift_amount * 8)) | - (0x8080808080808080ULL >> (64 - shift_amount * 8)) - : 0x8080808080808080ULL; + (UINT64_C(0x0102040810204080) << (shift_amount * 8)) | + (UINT64_C(0x8080808080808080) >> (64 - shift_amount * 8)); + ; e.vgf2p8affineqb(i.dest, i.src1, e.StashConstantXmm(0, vec128q(shift_matrix)), 0); return;