mirror of https://github.com/xemu-project/xemu.git
tcg: Add gvec compare with immediate and scalar operand
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Tested-by: Song Gao <gaosong@loongson.cn> Reviewed-by: Song Gao <gaosong@loongson.cn> Message-Id: <20230831030904.1194667-2-richard.henderson@linaro.org>
This commit is contained in:
parent
58f8961285
commit
9622c697d1
|
@ -1042,6 +1042,32 @@ DO_CMP2(64)
|
|||
#undef DO_CMP1
|
||||
#undef DO_CMP2
|
||||
|
||||
#define DO_CMP1(NAME, TYPE, OP) \
|
||||
void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc) \
|
||||
{ \
|
||||
intptr_t oprsz = simd_oprsz(desc); \
|
||||
TYPE inv = simd_data(desc), b = b64; \
|
||||
for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) { \
|
||||
*(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^ inv); \
|
||||
} \
|
||||
clear_high(d, oprsz, desc); \
|
||||
}
|
||||
|
||||
#define DO_CMP2(SZ) \
|
||||
DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \
|
||||
DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \
|
||||
DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \
|
||||
DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \
|
||||
DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=)
|
||||
|
||||
DO_CMP2(8)
|
||||
DO_CMP2(16)
|
||||
DO_CMP2(32)
|
||||
DO_CMP2(64)
|
||||
|
||||
#undef DO_CMP1
|
||||
#undef DO_CMP2
|
||||
|
||||
void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
|
|
|
@ -297,4 +297,29 @@ DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
|||
DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_eqs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_eqs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_eqs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_eqs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_lts8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_lts16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_lts32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_lts64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_les8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_les16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_les32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_les64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_ltus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ltus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ltus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ltus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_leus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_leus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
|
|
|
@ -374,6 +374,12 @@ void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
|
|||
void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
|
||||
uint32_t aofs, uint32_t bofs,
|
||||
uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
|
||||
uint32_t aofs, int64_t c,
|
||||
uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
|
||||
uint32_t aofs, TCGv_i64 c,
|
||||
uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
/*
|
||||
* Perform vector bit select: d = (b & a) | (c & ~a).
|
||||
|
|
|
@ -3846,6 +3846,155 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
|
|||
}
|
||||
}
|
||||
|
||||
static void expand_cmps_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t oprsz, uint32_t tysz, TCGType type,
|
||||
TCGCond cond, TCGv_vec c)
|
||||
{
|
||||
TCGv_vec t0 = tcg_temp_new_vec(type);
|
||||
TCGv_vec t1 = tcg_temp_new_vec(type);
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += tysz) {
|
||||
tcg_gen_ld_vec(t1, cpu_env, aofs + i);
|
||||
tcg_gen_cmp_vec(cond, vece, t0, t1, c);
|
||||
tcg_gen_st_vec(t0, cpu_env, dofs + i);
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
|
||||
uint32_t aofs, TCGv_i64 c,
|
||||
uint32_t oprsz, uint32_t maxsz)
|
||||
{
|
||||
static const TCGOpcode cmp_list[] = { INDEX_op_cmp_vec, 0 };
|
||||
static gen_helper_gvec_2i * const eq_fn[4] = {
|
||||
gen_helper_gvec_eqs8, gen_helper_gvec_eqs16,
|
||||
gen_helper_gvec_eqs32, gen_helper_gvec_eqs64
|
||||
};
|
||||
static gen_helper_gvec_2i * const lt_fn[4] = {
|
||||
gen_helper_gvec_lts8, gen_helper_gvec_lts16,
|
||||
gen_helper_gvec_lts32, gen_helper_gvec_lts64
|
||||
};
|
||||
static gen_helper_gvec_2i * const le_fn[4] = {
|
||||
gen_helper_gvec_les8, gen_helper_gvec_les16,
|
||||
gen_helper_gvec_les32, gen_helper_gvec_les64
|
||||
};
|
||||
static gen_helper_gvec_2i * const ltu_fn[4] = {
|
||||
gen_helper_gvec_ltus8, gen_helper_gvec_ltus16,
|
||||
gen_helper_gvec_ltus32, gen_helper_gvec_ltus64
|
||||
};
|
||||
static gen_helper_gvec_2i * const leu_fn[4] = {
|
||||
gen_helper_gvec_leus8, gen_helper_gvec_leus16,
|
||||
gen_helper_gvec_leus32, gen_helper_gvec_leus64
|
||||
};
|
||||
static gen_helper_gvec_2i * const * const fns[16] = {
|
||||
[TCG_COND_EQ] = eq_fn,
|
||||
[TCG_COND_LT] = lt_fn,
|
||||
[TCG_COND_LE] = le_fn,
|
||||
[TCG_COND_LTU] = ltu_fn,
|
||||
[TCG_COND_LEU] = leu_fn,
|
||||
};
|
||||
|
||||
TCGType type;
|
||||
|
||||
check_size_align(oprsz, maxsz, dofs | aofs);
|
||||
check_overlap_2(dofs, aofs, maxsz);
|
||||
|
||||
if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
|
||||
do_dup(MO_8, dofs, oprsz, maxsz,
|
||||
NULL, NULL, -(cond == TCG_COND_ALWAYS));
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Implement inline with a vector type, if possible.
|
||||
* Prefer integer when 64-bit host and 64-bit comparison.
|
||||
*/
|
||||
type = choose_vector_type(cmp_list, vece, oprsz,
|
||||
TCG_TARGET_REG_BITS == 64 && vece == MO_64);
|
||||
if (type != 0) {
|
||||
const TCGOpcode *hold_list = tcg_swap_vecop_list(cmp_list);
|
||||
TCGv_vec t_vec = tcg_temp_new_vec(type);
|
||||
uint32_t some;
|
||||
|
||||
tcg_gen_dup_i64_vec(vece, t_vec, c);
|
||||
switch (type) {
|
||||
case TCG_TYPE_V256:
|
||||
some = QEMU_ALIGN_DOWN(oprsz, 32);
|
||||
expand_cmps_vec(vece, dofs, aofs, some, 32,
|
||||
TCG_TYPE_V256, cond, t_vec);
|
||||
aofs += some;
|
||||
dofs += some;
|
||||
oprsz -= some;
|
||||
maxsz -= some;
|
||||
/* fallthru */
|
||||
|
||||
case TCG_TYPE_V128:
|
||||
some = QEMU_ALIGN_DOWN(oprsz, 16);
|
||||
expand_cmps_vec(vece, dofs, aofs, some, 16,
|
||||
TCG_TYPE_V128, cond, t_vec);
|
||||
break;
|
||||
|
||||
case TCG_TYPE_V64:
|
||||
some = QEMU_ALIGN_DOWN(oprsz, 8);
|
||||
expand_cmps_vec(vece, dofs, aofs, some, 8,
|
||||
TCG_TYPE_V64, cond, t_vec);
|
||||
break;
|
||||
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
tcg_temp_free_vec(t_vec);
|
||||
tcg_swap_vecop_list(hold_list);
|
||||
} else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
|
||||
TCGv_i64 t0 = tcg_temp_ebb_new_i64();
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += 8) {
|
||||
tcg_gen_ld_i64(t0, cpu_env, aofs + i);
|
||||
tcg_gen_negsetcond_i64(cond, t0, t0, c);
|
||||
tcg_gen_st_i64(t0, cpu_env, dofs + i);
|
||||
}
|
||||
tcg_temp_free_i64(t0);
|
||||
} else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
|
||||
TCGv_i32 t0 = tcg_temp_ebb_new_i32();
|
||||
TCGv_i32 t1 = tcg_temp_ebb_new_i32();
|
||||
uint32_t i;
|
||||
|
||||
tcg_gen_extrl_i64_i32(t1, c);
|
||||
for (i = 0; i < oprsz; i += 8) {
|
||||
tcg_gen_ld_i32(t0, cpu_env, aofs + i);
|
||||
tcg_gen_negsetcond_i32(cond, t0, t0, t1);
|
||||
tcg_gen_st_i32(t0, cpu_env, dofs + i);
|
||||
}
|
||||
tcg_temp_free_i32(t0);
|
||||
tcg_temp_free_i32(t1);
|
||||
} else {
|
||||
gen_helper_gvec_2i * const *fn = fns[cond];
|
||||
bool inv = false;
|
||||
|
||||
if (fn == NULL) {
|
||||
cond = tcg_invert_cond(cond);
|
||||
fn = fns[cond];
|
||||
assert(fn != NULL);
|
||||
inv = true;
|
||||
}
|
||||
tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, inv, fn[vece]);
|
||||
return;
|
||||
}
|
||||
|
||||
if (oprsz < maxsz) {
|
||||
expand_clr(dofs + oprsz, maxsz - oprsz);
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
|
||||
uint32_t aofs, int64_t c,
|
||||
uint32_t oprsz, uint32_t maxsz)
|
||||
{
|
||||
TCGv_i64 tmp = tcg_constant_i64(c);
|
||||
tcg_gen_gvec_cmps(cond, vece, dofs, aofs, tmp, oprsz, maxsz);
|
||||
}
|
||||
|
||||
static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
|
||||
{
|
||||
TCGv_i64 t = tcg_temp_ebb_new_i64();
|
||||
|
|
Loading…
Reference in New Issue