[x64] Fix `VECTOR_SH{L,R,A}_V128(Int8)` masking

[AltiVec](https://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf)
doc says that it just uses the lower `log2(n)` bits of the shift-amount
rather than the whole element-sized value. So there is no need to handle
an overflow. Also adjusts 64-bit literals to utilize the explicit
`UINT64_C` type.
This commit is contained in:
Wunkolo 2022-01-24 08:06:05 -08:00 committed by Rick Gibbed
parent f8350b5536
commit 24205ee860
1 changed files with 8 additions and 9 deletions

View File

@ -742,9 +742,9 @@ struct VECTOR_SHL_V128
}
if (all_same) {
// Every count is the same, so we can use gf2p8affineqb.
const uint8_t shift_amount = shamt.u8[0];
const uint8_t shift_amount = shamt.u8[0] & 0b111;
const uint64_t shift_matrix =
0x0102040810204080 >> (shift_amount * 8);
UINT64_C(0x0102040810204080) >> (shift_amount * 8);
e.vgf2p8affineqb(i.dest, i.src1,
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
return;
@ -950,8 +950,8 @@ struct VECTOR_SHR_V128
}
if (all_same) {
// Every count is the same, so we can use gf2p8affineqb.
const uint8_t shift_amount = shamt.u8[0];
const uint64_t shift_matrix = 0x0102040810204080
const uint8_t shift_amount = shamt.u8[0] & 0b111;
const uint64_t shift_matrix = UINT64_C(0x0102040810204080)
<< (shift_amount * 8);
e.vgf2p8affineqb(i.dest, i.src1,
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
@ -1133,12 +1133,11 @@ struct VECTOR_SHA_V128
}
if (all_same) {
// Every count is the same, so we can use gf2p8affineqb.
const uint8_t shift_amount = shamt.u8[0];
const uint8_t shift_amount = shamt.u8[0] & 0b111;
const uint64_t shift_matrix =
shift_amount < 8
? (0x0102040810204080ULL << (shift_amount * 8)) |
(0x8080808080808080ULL >> (64 - shift_amount * 8))
: 0x8080808080808080ULL;
(UINT64_C(0x0102040810204080) << (shift_amount * 8)) |
(UINT64_C(0x8080808080808080) >> (64 - shift_amount * 8));
;
e.vgf2p8affineqb(i.dest, i.src1,
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
return;