mirror of https://github.com/xemu-project/xemu.git
Hexagon (target/hexagon) Short-circuit more HVX single instruction packets
The generated helpers for HVX use pass-by-reference, so they can't short-circuit when the reads/writes overlap. The instructions with overrides are OK because they use tcg_gen_gvec_*. We add a flag has_hvx_helper to DisasContext and extend gen_analyze_funcs to set the flag when the instruction is an HVX instruction with a generated helper. We add an override for V6_vcombine so that it can be short-circuited along with a test case in tests/tcg/hexagon/hvx_misc.c Signed-off-by: Taylor Simpson <tsimpson@quicinc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20230427230012.3800327-15-tsimpson@quicinc.com>
This commit is contained in:
parent
b85529854e
commit
d05d5eebc7
|
@ -212,6 +212,11 @@ def gen_analyze_func(f, tag, regs, imms):
|
|||
if has_generated_helper and "A_SCALAR_LOAD" in hex_common.attribdict[tag]:
|
||||
f.write(" ctx->need_pkt_has_store_s1 = true;\n")
|
||||
|
||||
## Mark HVX instructions with generated helpers
|
||||
if (has_generated_helper and
|
||||
"A_CVI" in hex_common.attribdict[tag]):
|
||||
f.write(" ctx->has_hvx_helper = true;\n")
|
||||
|
||||
f.write("}\n\n")
|
||||
|
||||
|
||||
|
|
|
@ -140,6 +140,29 @@ static inline void assert_vhist_tmp(DisasContext *ctx)
|
|||
sizeof(MMVector), sizeof(MMVector)); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Vector combine
|
||||
*
|
||||
* Be careful that the source and dest don't overlap
|
||||
*/
|
||||
#define fGEN_TCG_V6_vcombine(SHORTCODE) \
|
||||
do { \
|
||||
if (VddV_off != VuV_off) { \
|
||||
tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \
|
||||
sizeof(MMVector), sizeof(MMVector)); \
|
||||
tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), VuV_off, \
|
||||
sizeof(MMVector), sizeof(MMVector)); \
|
||||
} else { \
|
||||
intptr_t tmpoff = offsetof(CPUHexagonState, vtmp); \
|
||||
tcg_gen_gvec_mov(MO_64, tmpoff, VuV_off, \
|
||||
sizeof(MMVector), sizeof(MMVector)); \
|
||||
tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \
|
||||
sizeof(MMVector), sizeof(MMVector)); \
|
||||
tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), tmpoff, \
|
||||
sizeof(MMVector), sizeof(MMVector)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Vector conditional move */
|
||||
#define fGEN_TCG_VEC_CMOV(PRED) \
|
||||
do { \
|
||||
|
|
|
@ -378,8 +378,20 @@ static bool need_commit(DisasContext *ctx)
|
|||
return true;
|
||||
}
|
||||
|
||||
if (pkt->num_insns == 1 && !pkt->pkt_has_hvx) {
|
||||
return false;
|
||||
if (pkt->num_insns == 1) {
|
||||
if (pkt->pkt_has_hvx) {
|
||||
/*
|
||||
* The HVX instructions with generated helpers use
|
||||
* pass-by-reference, so they need the read/write overlap
|
||||
* check below.
|
||||
* The HVX instructions with overrides are OK.
|
||||
*/
|
||||
if (!ctx->has_hvx_helper) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check for overlap between register reads and writes */
|
||||
|
@ -454,6 +466,7 @@ static void analyze_packet(DisasContext *ctx)
|
|||
{
|
||||
Packet *pkt = ctx->pkt;
|
||||
ctx->need_pkt_has_store_s1 = false;
|
||||
ctx->has_hvx_helper = false;
|
||||
for (int i = 0; i < pkt->num_insns; i++) {
|
||||
Insn *insn = &pkt->insn[i];
|
||||
ctx->insn = insn;
|
||||
|
|
|
@ -68,6 +68,7 @@ typedef struct DisasContext {
|
|||
bool is_tight_loop;
|
||||
bool need_pkt_has_store_s1;
|
||||
bool short_circuit;
|
||||
bool has_hvx_helper;
|
||||
} DisasContext;
|
||||
|
||||
static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
|
||||
|
|
|
@ -454,6 +454,25 @@ static void test_load_cur_predicated(void)
|
|||
check_output_w(__LINE__, BUFSIZE);
|
||||
}
|
||||
|
||||
static void test_vcombine(void)
|
||||
{
|
||||
for (int i = 0; i < BUFSIZE / 2; i++) {
|
||||
asm volatile("v2 = vsplat(%0)\n\t"
|
||||
"v3 = vsplat(%1)\n\t"
|
||||
"v3:2 = vcombine(v2, v3)\n\t"
|
||||
"vmem(%2+#0) = v2\n\t"
|
||||
"vmem(%2+#1) = v3\n\t"
|
||||
:
|
||||
: "r"(2 * i), "r"(2 * i + 1), "r"(&output[2 * i])
|
||||
: "v2", "v3", "memory");
|
||||
for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
|
||||
expect[2 * i].w[j] = 2 * i + 1;
|
||||
expect[2 * i + 1].w[j] = 2 * i;
|
||||
}
|
||||
}
|
||||
check_output_w(__LINE__, BUFSIZE);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
init_buffers();
|
||||
|
@ -494,6 +513,8 @@ int main()
|
|||
test_load_tmp_predicated();
|
||||
test_load_cur_predicated();
|
||||
|
||||
test_vcombine();
|
||||
|
||||
puts(err ? "FAIL" : "PASS");
|
||||
return err ? 1 : 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue