mirror of https://github.com/xemu-project/xemu.git
target/arm: Replace tcg_gen_dupi_vec with constants in translate-sve.c
Instead of copying a constant into a temporary with dupi, use a vector constant directly. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20240912024114.1097832-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
143e179c84
commit
04e824eac9
|
@ -6081,9 +6081,9 @@ static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
|
|||
|
||||
if (top) {
|
||||
if (shl == halfbits) {
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
|
||||
tcg_gen_and_vec(vece, d, n, t);
|
||||
tcg_gen_and_vec(vece, d, n,
|
||||
tcg_constant_vec_matching(d, vece,
|
||||
MAKE_64BIT_MASK(halfbits, halfbits)));
|
||||
} else {
|
||||
tcg_gen_sari_vec(vece, d, n, halfbits);
|
||||
tcg_gen_shli_vec(vece, d, d, shl);
|
||||
|
@ -6138,18 +6138,18 @@ static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
|
|||
|
||||
if (top) {
|
||||
if (shl == halfbits) {
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
|
||||
tcg_gen_and_vec(vece, d, n, t);
|
||||
tcg_gen_and_vec(vece, d, n,
|
||||
tcg_constant_vec_matching(d, vece,
|
||||
MAKE_64BIT_MASK(halfbits, halfbits)));
|
||||
} else {
|
||||
tcg_gen_shri_vec(vece, d, n, halfbits);
|
||||
tcg_gen_shli_vec(vece, d, d, shl);
|
||||
}
|
||||
} else {
|
||||
if (shl == 0) {
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
|
||||
tcg_gen_and_vec(vece, d, n, t);
|
||||
tcg_gen_and_vec(vece, d, n,
|
||||
tcg_constant_vec_matching(d, vece,
|
||||
MAKE_64BIT_MASK(0, halfbits)));
|
||||
} else {
|
||||
tcg_gen_shli_vec(vece, d, n, halfbits);
|
||||
tcg_gen_shri_vec(vece, d, d, halfbits - shl);
|
||||
|
@ -6317,18 +6317,14 @@ static const TCGOpcode sqxtn_list[] = {
|
|||
|
||||
static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
int64_t mask = (1ull << halfbits) - 1;
|
||||
int64_t min = -1ull << (halfbits - 1);
|
||||
int64_t max = -min - 1;
|
||||
|
||||
tcg_gen_dupi_vec(vece, t, min);
|
||||
tcg_gen_smax_vec(vece, d, n, t);
|
||||
tcg_gen_dupi_vec(vece, t, max);
|
||||
tcg_gen_smin_vec(vece, d, d, t);
|
||||
tcg_gen_dupi_vec(vece, t, mask);
|
||||
tcg_gen_and_vec(vece, d, d, t);
|
||||
tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, min));
|
||||
tcg_gen_smin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
|
||||
tcg_gen_and_vec(vece, d, d, tcg_constant_vec_matching(d, vece, mask));
|
||||
}
|
||||
|
||||
static const GVecGen2 sqxtnb_ops[3] = {
|
||||
|
@ -6349,19 +6345,15 @@ TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
|
|||
|
||||
static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
int64_t mask = (1ull << halfbits) - 1;
|
||||
int64_t min = -1ull << (halfbits - 1);
|
||||
int64_t max = -min - 1;
|
||||
|
||||
tcg_gen_dupi_vec(vece, t, min);
|
||||
tcg_gen_smax_vec(vece, n, n, t);
|
||||
tcg_gen_dupi_vec(vece, t, max);
|
||||
tcg_gen_smin_vec(vece, n, n, t);
|
||||
tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
|
||||
tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
|
||||
tcg_gen_shli_vec(vece, n, n, halfbits);
|
||||
tcg_gen_dupi_vec(vece, t, mask);
|
||||
tcg_gen_bitsel_vec(vece, d, t, d, n);
|
||||
tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
|
||||
}
|
||||
|
||||
static const GVecGen2 sqxtnt_ops[3] = {
|
||||
|
@ -6389,12 +6381,10 @@ static const TCGOpcode uqxtn_list[] = {
|
|||
|
||||
static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
int64_t max = (1ull << halfbits) - 1;
|
||||
|
||||
tcg_gen_dupi_vec(vece, t, max);
|
||||
tcg_gen_umin_vec(vece, d, n, t);
|
||||
tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
|
||||
}
|
||||
|
||||
static const GVecGen2 uqxtnb_ops[3] = {
|
||||
|
@ -6415,14 +6405,13 @@ TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
|
|||
|
||||
static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
int64_t max = (1ull << halfbits) - 1;
|
||||
TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
|
||||
|
||||
tcg_gen_dupi_vec(vece, t, max);
|
||||
tcg_gen_umin_vec(vece, n, n, t);
|
||||
tcg_gen_umin_vec(vece, n, n, maxv);
|
||||
tcg_gen_shli_vec(vece, n, n, halfbits);
|
||||
tcg_gen_bitsel_vec(vece, d, t, d, n);
|
||||
tcg_gen_bitsel_vec(vece, d, maxv, d, n);
|
||||
}
|
||||
|
||||
static const GVecGen2 uqxtnt_ops[3] = {
|
||||
|
@ -6450,14 +6439,11 @@ static const TCGOpcode sqxtun_list[] = {
|
|||
|
||||
static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
int64_t max = (1ull << halfbits) - 1;
|
||||
|
||||
tcg_gen_dupi_vec(vece, t, 0);
|
||||
tcg_gen_smax_vec(vece, d, n, t);
|
||||
tcg_gen_dupi_vec(vece, t, max);
|
||||
tcg_gen_umin_vec(vece, d, d, t);
|
||||
tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, 0));
|
||||
tcg_gen_umin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
|
||||
}
|
||||
|
||||
static const GVecGen2 sqxtunb_ops[3] = {
|
||||
|
@ -6478,16 +6464,14 @@ TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
|
|||
|
||||
static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
int64_t max = (1ull << halfbits) - 1;
|
||||
TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
|
||||
|
||||
tcg_gen_dupi_vec(vece, t, 0);
|
||||
tcg_gen_smax_vec(vece, n, n, t);
|
||||
tcg_gen_dupi_vec(vece, t, max);
|
||||
tcg_gen_umin_vec(vece, n, n, t);
|
||||
tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
|
||||
tcg_gen_umin_vec(vece, n, n, maxv);
|
||||
tcg_gen_shli_vec(vece, n, n, halfbits);
|
||||
tcg_gen_bitsel_vec(vece, d, t, d, n);
|
||||
tcg_gen_bitsel_vec(vece, d, maxv, d, n);
|
||||
}
|
||||
|
||||
static const GVecGen2 sqxtunt_ops[3] = {
|
||||
|
@ -6551,13 +6535,11 @@ static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
|
|||
|
||||
static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
|
||||
|
||||
tcg_gen_shri_vec(vece, n, n, shr);
|
||||
tcg_gen_dupi_vec(vece, t, mask);
|
||||
tcg_gen_and_vec(vece, d, n, t);
|
||||
tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
|
||||
}
|
||||
|
||||
static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
|
||||
|
@ -6609,13 +6591,11 @@ static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
|
|||
|
||||
static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
|
||||
|
||||
tcg_gen_shli_vec(vece, n, n, halfbits - shr);
|
||||
tcg_gen_dupi_vec(vece, t, mask);
|
||||
tcg_gen_bitsel_vec(vece, d, t, d, n);
|
||||
tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
|
||||
}
|
||||
|
||||
static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
|
||||
|
@ -6658,14 +6638,12 @@ TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
|
|||
static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
|
||||
TCGv_vec n, int64_t shr)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
uint64_t max = MAKE_64BIT_MASK(0, halfbits);
|
||||
|
||||
tcg_gen_sari_vec(vece, n, n, shr);
|
||||
tcg_gen_dupi_vec(vece, t, 0);
|
||||
tcg_gen_smax_vec(vece, n, n, t);
|
||||
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
|
||||
tcg_gen_umin_vec(vece, d, n, t);
|
||||
tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
|
||||
tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
|
||||
}
|
||||
|
||||
static const TCGOpcode sqshrunb_vec_list[] = {
|
||||
|
@ -6690,16 +6668,15 @@ TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
|
|||
static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
|
||||
TCGv_vec n, int64_t shr)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
uint64_t max = MAKE_64BIT_MASK(0, halfbits);
|
||||
TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
|
||||
|
||||
tcg_gen_sari_vec(vece, n, n, shr);
|
||||
tcg_gen_dupi_vec(vece, t, 0);
|
||||
tcg_gen_smax_vec(vece, n, n, t);
|
||||
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
|
||||
tcg_gen_umin_vec(vece, n, n, t);
|
||||
tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
|
||||
tcg_gen_umin_vec(vece, n, n, maxv);
|
||||
tcg_gen_shli_vec(vece, n, n, halfbits);
|
||||
tcg_gen_bitsel_vec(vece, d, t, d, n);
|
||||
tcg_gen_bitsel_vec(vece, d, maxv, d, n);
|
||||
}
|
||||
|
||||
static const TCGOpcode sqshrunt_vec_list[] = {
|
||||
|
@ -6742,18 +6719,15 @@ TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
|
|||
static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
|
||||
TCGv_vec n, int64_t shr)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
|
||||
int64_t min = -max - 1;
|
||||
int64_t mask = MAKE_64BIT_MASK(0, halfbits);
|
||||
|
||||
tcg_gen_sari_vec(vece, n, n, shr);
|
||||
tcg_gen_dupi_vec(vece, t, min);
|
||||
tcg_gen_smax_vec(vece, n, n, t);
|
||||
tcg_gen_dupi_vec(vece, t, max);
|
||||
tcg_gen_smin_vec(vece, n, n, t);
|
||||
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
|
||||
tcg_gen_and_vec(vece, d, n, t);
|
||||
tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
|
||||
tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
|
||||
tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
|
||||
}
|
||||
|
||||
static const TCGOpcode sqshrnb_vec_list[] = {
|
||||
|
@ -6778,19 +6752,16 @@ TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
|
|||
static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
|
||||
TCGv_vec n, int64_t shr)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
|
||||
int64_t min = -max - 1;
|
||||
int64_t mask = MAKE_64BIT_MASK(0, halfbits);
|
||||
|
||||
tcg_gen_sari_vec(vece, n, n, shr);
|
||||
tcg_gen_dupi_vec(vece, t, min);
|
||||
tcg_gen_smax_vec(vece, n, n, t);
|
||||
tcg_gen_dupi_vec(vece, t, max);
|
||||
tcg_gen_smin_vec(vece, n, n, t);
|
||||
tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
|
||||
tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
|
||||
tcg_gen_shli_vec(vece, n, n, halfbits);
|
||||
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
|
||||
tcg_gen_bitsel_vec(vece, d, t, d, n);
|
||||
tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
|
||||
}
|
||||
|
||||
static const TCGOpcode sqshrnt_vec_list[] = {
|
||||
|
@ -6833,12 +6804,11 @@ TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
|
|||
static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
|
||||
TCGv_vec n, int64_t shr)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
int64_t max = MAKE_64BIT_MASK(0, halfbits);
|
||||
|
||||
tcg_gen_shri_vec(vece, n, n, shr);
|
||||
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
|
||||
tcg_gen_umin_vec(vece, d, n, t);
|
||||
tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
|
||||
}
|
||||
|
||||
static const TCGOpcode uqshrnb_vec_list[] = {
|
||||
|
@ -6863,14 +6833,14 @@ TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
|
|||
static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
|
||||
TCGv_vec n, int64_t shr)
|
||||
{
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(d);
|
||||
int halfbits = 4 << vece;
|
||||
int64_t max = MAKE_64BIT_MASK(0, halfbits);
|
||||
TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
|
||||
|
||||
tcg_gen_shri_vec(vece, n, n, shr);
|
||||
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
|
||||
tcg_gen_umin_vec(vece, n, n, t);
|
||||
tcg_gen_umin_vec(vece, n, n, maxv);
|
||||
tcg_gen_shli_vec(vece, n, n, halfbits);
|
||||
tcg_gen_bitsel_vec(vece, d, t, d, n);
|
||||
tcg_gen_bitsel_vec(vece, d, maxv, d, n);
|
||||
}
|
||||
|
||||
static const TCGOpcode uqshrnt_vec_list[] = {
|
||||
|
|
Loading…
Reference in New Issue