From 24205ee860e2066bd1e6ccd53b499a07802d80cf Mon Sep 17 00:00:00 2001
From: Wunkolo <Wunkolo@gmail.com>
Date: Mon, 24 Jan 2022 08:06:05 -0800
Subject: [PATCH] [x64] Fix `VECTOR_SH{L,R,A}_V128(Int8)` masking

[AltiVec](https://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf)
doc says that it just uses the lower `log2(n)` bits of the shift-amount
rather than the whole element-sized value. So there is no need to handle
an overflow. Also adjusts 64-bit literals to utilize the explicit
`UINT64_C` type.
---
 src/xenia/cpu/backend/x64/x64_seq_vector.cc | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/xenia/cpu/backend/x64/x64_seq_vector.cc b/src/xenia/cpu/backend/x64/x64_seq_vector.cc
index 72761aa6f..4daea260b 100644
--- a/src/xenia/cpu/backend/x64/x64_seq_vector.cc
+++ b/src/xenia/cpu/backend/x64/x64_seq_vector.cc
@@ -742,9 +742,9 @@ struct VECTOR_SHL_V128
         }
         if (all_same) {
           // Every count is the same, so we can use gf2p8affineqb.
-          const uint8_t shift_amount = shamt.u8[0];
+          const uint8_t shift_amount = shamt.u8[0] & 0b111;
           const uint64_t shift_matrix =
-              0x0102040810204080 >> (shift_amount * 8);
+              UINT64_C(0x0102040810204080) >> (shift_amount * 8);
           e.vgf2p8affineqb(i.dest, i.src1,
                            e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
           return;
@@ -950,8 +950,8 @@ struct VECTOR_SHR_V128
         }
         if (all_same) {
           // Every count is the same, so we can use gf2p8affineqb.
-          const uint8_t shift_amount = shamt.u8[0];
-          const uint64_t shift_matrix = 0x0102040810204080
+          const uint8_t shift_amount = shamt.u8[0] & 0b111;
+          const uint64_t shift_matrix = UINT64_C(0x0102040810204080)
                                         << (shift_amount * 8);
           e.vgf2p8affineqb(i.dest, i.src1,
                            e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
@@ -1133,12 +1133,11 @@ struct VECTOR_SHA_V128
         }
         if (all_same) {
           // Every count is the same, so we can use gf2p8affineqb.
-          const uint8_t shift_amount = shamt.u8[0];
+          const uint8_t shift_amount = shamt.u8[0] & 0b111;
           const uint64_t shift_matrix =
-              shift_amount < 8
-                  ? (0x0102040810204080ULL << (shift_amount * 8)) |
-                        (0x8080808080808080ULL >> (64 - shift_amount * 8))
-                  : 0x8080808080808080ULL;
+              (UINT64_C(0x0102040810204080) << (shift_amount * 8)) |
+              (UINT64_C(0x8080808080808080) >> (64 - shift_amount * 8));
+          ;
           e.vgf2p8affineqb(i.dest, i.src1,
                            e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
           return;