[x64] Fix `VECTOR_SH{L,R,A}_V128(Int8)` masking
[AltiVec](https://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf) doc says that it just uses the lower `log2(n)` bits of the shift-amount rather than the whole element-sized value. So there is no need to handle an overflow. Also adjusts 64-bit literals to utilize the explicit `UINT64_C` type.
This commit is contained in:
parent
f8350b5536
commit
24205ee860
|
@ -742,9 +742,9 @@ struct VECTOR_SHL_V128
|
||||||
}
|
}
|
||||||
if (all_same) {
|
if (all_same) {
|
||||||
// Every count is the same, so we can use gf2p8affineqb.
|
// Every count is the same, so we can use gf2p8affineqb.
|
||||||
const uint8_t shift_amount = shamt.u8[0];
|
const uint8_t shift_amount = shamt.u8[0] & 0b111;
|
||||||
const uint64_t shift_matrix =
|
const uint64_t shift_matrix =
|
||||||
0x0102040810204080 >> (shift_amount * 8);
|
UINT64_C(0x0102040810204080) >> (shift_amount * 8);
|
||||||
e.vgf2p8affineqb(i.dest, i.src1,
|
e.vgf2p8affineqb(i.dest, i.src1,
|
||||||
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
|
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
|
||||||
return;
|
return;
|
||||||
|
@ -950,8 +950,8 @@ struct VECTOR_SHR_V128
|
||||||
}
|
}
|
||||||
if (all_same) {
|
if (all_same) {
|
||||||
// Every count is the same, so we can use gf2p8affineqb.
|
// Every count is the same, so we can use gf2p8affineqb.
|
||||||
const uint8_t shift_amount = shamt.u8[0];
|
const uint8_t shift_amount = shamt.u8[0] & 0b111;
|
||||||
const uint64_t shift_matrix = 0x0102040810204080
|
const uint64_t shift_matrix = UINT64_C(0x0102040810204080)
|
||||||
<< (shift_amount * 8);
|
<< (shift_amount * 8);
|
||||||
e.vgf2p8affineqb(i.dest, i.src1,
|
e.vgf2p8affineqb(i.dest, i.src1,
|
||||||
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
|
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
|
||||||
|
@ -1133,12 +1133,11 @@ struct VECTOR_SHA_V128
|
||||||
}
|
}
|
||||||
if (all_same) {
|
if (all_same) {
|
||||||
// Every count is the same, so we can use gf2p8affineqb.
|
// Every count is the same, so we can use gf2p8affineqb.
|
||||||
const uint8_t shift_amount = shamt.u8[0];
|
const uint8_t shift_amount = shamt.u8[0] & 0b111;
|
||||||
const uint64_t shift_matrix =
|
const uint64_t shift_matrix =
|
||||||
shift_amount < 8
|
(UINT64_C(0x0102040810204080) << (shift_amount * 8)) |
|
||||||
? (0x0102040810204080ULL << (shift_amount * 8)) |
|
(UINT64_C(0x8080808080808080) >> (64 - shift_amount * 8));
|
||||||
(0x8080808080808080ULL >> (64 - shift_amount * 8))
|
;
|
||||||
: 0x8080808080808080ULL;
|
|
||||||
e.vgf2p8affineqb(i.dest, i.src1,
|
e.vgf2p8affineqb(i.dest, i.src1,
|
||||||
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
|
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
|
||||||
return;
|
return;
|
||||||
|
|
Loading…
Reference in New Issue